/** * {} * @author yutent * @date 2023/10/30 16:41:59 */ let s = 0 const STATE_DICT = { PARSER_UNINITIALIZED: s++, START: s++, START_BOUNDARY: s++, HEADER_FIELD_START: s++, HEADER_FIELD: s++, HEADER_VALUE_START: s++, HEADER_VALUE: s++, HEADER_VALUE_ALMOST_DONE: s++, HEADERS_ALMOST_DONE: s++, PART_DATA_START: s++, PART_DATA: s++, PART_END: s++, END: s++ } let f = 1 const FLAG_DICT = { PART_BOUNDARY: f, LAST_BOUNDARY: (f *= 2) } const LF = 10 const CR = 13 const SPACE = 32 const HYPHEN = 45 const COLON = 58 const LETTER_A = 97 const LETTER_Z = 122 function lower(c) { return c | 0x20 } function stateToString(value) { for (let key in STATE_DICT) { let number = STATE_DICT[key] if (number === value) { return key } } } export class MultipartParser { boundary = null boundaryChars = null lookbehind = null state = STATE_DICT.PARSER_UNINITIALIZED index = null flags = 0 constructor(str) { this.boundary = Buffer.alloc(str.length + 4) this.boundary.write('\r\n--', 0) this.boundary.write(str, 4) this.lookbehind = Buffer.alloc(this.boundary.length + 8) this.state = STATE_DICT.START this.boundaryChars = {} for (let i = 0; i < this.boundary.length; i++) { this.boundaryChars[this.boundary[i]] = true } } #mark(k, v) { this[k + 'Mark'] = v } #emit(name, buf, idx, cleanup) { let mark = name + 'Mark' if (this[mark] !== void 0) { let start = this[mark] let end = buf.length if (cleanup) { end = idx delete this[mark] } else { this[mark] = 0 } if (start === end) { return } this['$' + name](buf.slice(start, end)) } } write(buffer) { let idx = 0, len = buffer.length, prevIndex = this.index, index = this.index, state = this.state, flags = this.flags, lookbehind = this.lookbehind, boundary = this.boundary, boundaryChars = this.boundaryChars, boundaryLength = this.boundary.length, boundaryEnd = boundaryLength - 1, bufferLength = buffer.length, c, cl for (idx = 0; idx < len; idx++) { c = buffer[idx] switch (state) { case STATE_DICT.PARSER_UNINITIALIZED: return case STATE_DICT.START: index = 0 state = STATE_DICT.START_BOUNDARY case STATE_DICT.START_BOUNDARY: if (index == boundary.length - 2) { if (c == HYPHEN) { flags |= FLAG_DICT.LAST_BOUNDARY } else if (c != CR) { return } index++ break } else if (index - 1 == boundary.length - 2) { if (flags & FLAG_DICT.LAST_BOUNDARY && c == HYPHEN) { this.$end() state = STATE_DICT.END flags = 0 } else if (!(flags & FLAG_DICT.LAST_BOUNDARY) && c == LF) { index = 0 this.$partBegin() state = STATE_DICT.HEADER_FIELD_START } else { return } break } if (c != boundary[index + 2]) { index = -2 } if (c == boundary[index + 2]) { index++ } break case STATE_DICT.HEADER_FIELD_START: state = STATE_DICT.HEADER_FIELD this.#mark('headerField', idx) index = 0 case STATE_DICT.HEADER_FIELD: if (c == CR) { delete this.headerFieldMark state = STATE_DICT.HEADERS_ALMOST_DONE break } index++ if (c == HYPHEN) { break } if (c == COLON) { if (index == 1) { // empty header field return } this.#emit('headerField', buffer, idx, true) state = STATE_DICT.HEADER_VALUE_START break } cl = lower(c) if (cl < LETTER_A || cl > LETTER_Z) { return } break case STATE_DICT.HEADER_VALUE_START: if (c == SPACE) { break } this.#mark('headerValue', idx) state = STATE_DICT.HEADER_VALUE case STATE_DICT.HEADER_VALUE: if (c == CR) { this.#emit('headerValue', buffer, idx, true) this.$headerEnd() state = STATE_DICT.HEADER_VALUE_ALMOST_DONE } break case STATE_DICT.HEADER_VALUE_ALMOST_DONE: if (c != LF) { return } state = STATE_DICT.HEADER_FIELD_START break case STATE_DICT.HEADERS_ALMOST_DONE: if (c != LF) { return } this.$headersEnd() state = STATE_DICT.PART_DATA_START break case STATE_DICT.PART_DATA_START: state = STATE_DICT.PART_DATA this.#mark('partData', idx) case STATE_DICT.PART_DATA: prevIndex = index if (index === 0) { // boyer-moore derrived algorithm to safely skip non-boundary data idx += boundaryEnd while (idx < bufferLength && !(buffer[idx] in boundaryChars)) { idx += boundaryLength } idx -= boundaryEnd c = buffer[idx] } if (index < boundary.length) { if (boundary[index] == c) { if (index === 0) { this.#emit('partData', buffer, idx, true) } index++ } else { index = 0 } } else if (index == boundary.length) { index++ if (c == CR) { // CR = part boundary flags |= FLAG_DICT.PART_BOUNDARY } else if (c == HYPHEN) { // HYPHEN = end boundary flags |= FLAG_DICT.LAST_BOUNDARY } else { index = 0 } } else if (index - 1 == boundary.length) { if (flags & FLAG_DICT.PART_BOUNDARY) { index = 0 if (c == LF) { // unset the PART_BOUNDARY flag flags &= ~FLAG_DICT.PART_BOUNDARY this.$partEnd() this.$partBegin() state = STATE_DICT.HEADER_FIELD_START break } } else if (flags & FLAG_DICT.LAST_BOUNDARY) { if (c == HYPHEN) { this.$partEnd() this.$end() state = STATE_DICT.END flags = 0 } else { index = 0 } } else { index = 0 } } if (index > 0) { // when matching a possible boundary, keep a lookbehind reference // in case it turns out to be a false lead lookbehind[index - 1] = c } else if (prevIndex > 0) { // if our boundary turned out to be rubbish, the captured lookbehind // belongs to partData this.$partData(lookbehind.slice(0, prevIndex)) prevIndex = 0 this.#mark('partData', idx) // reconsider the current character even so it interrupted the sequence // it could be the beginning of a new sequence idx-- } break case STATE_DICT.END: break default: return } } this.#emit('headerField', buffer, idx) this.#emit('headerValue', buffer, idx) this.#emit('partData', buffer, idx) this.index = index this.state = state this.flags = flags } end() { if ( (this.state === STATE_DICT.HEADER_FIELD_START && this.index === 0) || (this.state === STATE_DICT.PART_DATA && this.index == this.boundary.length) ) { this.$end() } else if (this.state !== STATE_DICT.END) { return new Error( 'MultipartParser.end(): stream ended unexpectedly: ' + this.explain() ) } } explain() { return 'state = ' + stateToString(this.state) } }