request/lib/multipart_parser.js

336 lines
8.1 KiB
JavaScript
Raw Permalink Normal View History

2023-10-30 16:59:54 +08:00
/**
* {}
* @author yutent<yutent.io@gmail.com>
* @date 2023/10/30 16:41:59
*/
2023-10-27 19:16:32 +08:00
2023-10-30 16:41:37 +08:00
let s = 0
const STATE_DICT = {
PARSER_UNINITIALIZED: s++,
START: s++,
START_BOUNDARY: s++,
HEADER_FIELD_START: s++,
HEADER_FIELD: s++,
HEADER_VALUE_START: s++,
HEADER_VALUE: s++,
HEADER_VALUE_ALMOST_DONE: s++,
HEADERS_ALMOST_DONE: s++,
PART_DATA_START: s++,
PART_DATA: s++,
PART_END: s++,
END: s++
}
let f = 1
const FLAG_DICT = {
PART_BOUNDARY: f,
LAST_BOUNDARY: (f *= 2)
}
2020-09-16 20:07:28 +08:00
2023-10-30 16:41:37 +08:00
const LF = 10
const CR = 13
const SPACE = 32
const HYPHEN = 45
const COLON = 58
const LETTER_A = 97
const LETTER_Z = 122
function lower(c) {
return c | 0x20
}
function stateToString(value) {
for (let key in STATE_DICT) {
let number = STATE_DICT[key]
if (number === value) {
return key
2023-10-27 19:16:32 +08:00
}
}
2023-10-30 16:41:37 +08:00
}
2023-10-27 19:16:32 +08:00
2023-10-30 16:41:37 +08:00
export class MultipartParser {
2023-10-25 18:45:16 +08:00
boundary = null
boundaryChars = null
lookbehind = null
2023-10-27 19:16:32 +08:00
state = STATE_DICT.PARSER_UNINITIALIZED
2020-09-16 20:07:28 +08:00
2023-10-25 18:45:16 +08:00
index = null
flags = 0
2020-09-16 20:07:28 +08:00
2023-10-27 19:16:32 +08:00
constructor(str) {
2023-10-25 18:45:16 +08:00
this.boundary = Buffer.alloc(str.length + 4)
this.boundary.write('\r\n--', 0)
this.boundary.write(str, 4)
this.lookbehind = Buffer.alloc(this.boundary.length + 8)
2023-10-27 19:16:32 +08:00
this.state = STATE_DICT.START
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
this.boundaryChars = {}
2023-10-27 19:16:32 +08:00
for (let i = 0; i < this.boundary.length; i++) {
2023-10-25 18:45:16 +08:00
this.boundaryChars[this.boundary[i]] = true
2020-09-16 20:07:28 +08:00
}
2023-10-25 18:45:16 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-30 16:41:37 +08:00
#mark(k, v) {
this[k + 'Mark'] = v
}
#emit(name, buff, idx, cleanup) {
let mark = name + 'Mark'
if (this[mark] !== void 0) {
let start = this[mark]
let end = buff.length
if (cleanup) {
end = idx
delete this[mark]
} else {
this[mark] = 0
}
if (start === end) {
return
}
this['$' + name](buff.slice(start, end))
}
}
2023-10-25 18:45:16 +08:00
write(buffer) {
2023-10-30 16:41:37 +08:00
let idx = 0,
2023-10-25 18:45:16 +08:00
len = buffer.length,
prevIndex = this.index,
index = this.index,
state = this.state,
flags = this.flags,
lookbehind = this.lookbehind,
boundary = this.boundary,
boundaryChars = this.boundaryChars,
boundaryLength = this.boundary.length,
boundaryEnd = boundaryLength - 1,
bufferLength = buffer.length,
c,
2023-10-27 19:16:32 +08:00
cl
2023-10-26 19:02:46 +08:00
2023-10-30 16:41:37 +08:00
for (idx = 0; idx < len; idx++) {
c = buffer[idx]
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
switch (state) {
2023-10-27 19:16:32 +08:00
case STATE_DICT.PARSER_UNINITIALIZED:
2023-10-30 16:41:37 +08:00
return
2023-10-27 19:16:32 +08:00
case STATE_DICT.START:
2023-10-25 18:45:16 +08:00
index = 0
2023-10-27 19:16:32 +08:00
state = STATE_DICT.START_BOUNDARY
case STATE_DICT.START_BOUNDARY:
2023-10-25 18:45:16 +08:00
if (index == boundary.length - 2) {
if (c == HYPHEN) {
2023-10-30 16:41:37 +08:00
flags |= FLAG_DICT.LAST_BOUNDARY
2023-10-25 18:45:16 +08:00
} else if (c != CR) {
2023-10-30 16:41:37 +08:00
return
2023-10-25 18:45:16 +08:00
}
index++
break
} else if (index - 1 == boundary.length - 2) {
2023-10-30 16:41:37 +08:00
if (flags & FLAG_DICT.LAST_BOUNDARY && c == HYPHEN) {
this.$end()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.END
2023-10-25 18:45:16 +08:00
flags = 0
2023-10-30 16:41:37 +08:00
} else if (!(flags & FLAG_DICT.LAST_BOUNDARY) && c == LF) {
2023-10-25 18:45:16 +08:00
index = 0
2023-10-30 16:41:37 +08:00
this.$partBegin()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_FIELD_START
2023-10-25 18:45:16 +08:00
} else {
2023-10-30 16:41:37 +08:00
return
2023-10-25 18:45:16 +08:00
}
break
2020-09-16 20:07:28 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
if (c != boundary[index + 2]) {
index = -2
}
if (c == boundary[index + 2]) {
index++
2020-09-16 20:07:28 +08:00
}
break
2023-10-30 16:41:37 +08:00
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADER_FIELD_START:
state = STATE_DICT.HEADER_FIELD
2023-10-30 16:41:37 +08:00
this.#mark('headerField', idx)
2023-10-25 18:45:16 +08:00
index = 0
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADER_FIELD:
2023-10-25 18:45:16 +08:00
if (c == CR) {
2023-10-27 19:16:32 +08:00
delete this.headerFieldMark
state = STATE_DICT.HEADERS_ALMOST_DONE
2023-10-25 18:45:16 +08:00
break
}
2023-10-26 19:02:46 +08:00
2020-09-16 20:07:28 +08:00
index++
2023-10-25 18:45:16 +08:00
if (c == HYPHEN) {
break
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
if (c == COLON) {
if (index == 1) {
// empty header field
2023-10-30 16:41:37 +08:00
return
2023-10-25 18:45:16 +08:00
}
2023-10-30 16:41:37 +08:00
this.#emit('headerField', buffer, idx, true)
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_VALUE_START
2023-10-25 18:45:16 +08:00
break
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
cl = lower(c)
2023-10-30 16:41:37 +08:00
if (cl < LETTER_A || cl > LETTER_Z) {
return
2023-10-25 18:45:16 +08:00
}
2020-09-16 20:07:28 +08:00
break
2023-10-30 16:41:37 +08:00
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADER_VALUE_START:
2023-10-25 18:45:16 +08:00
if (c == SPACE) {
break
}
2023-10-26 19:02:46 +08:00
2023-10-30 16:41:37 +08:00
this.#mark('headerValue', idx)
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_VALUE
2023-10-30 16:41:37 +08:00
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADER_VALUE:
2023-10-25 18:45:16 +08:00
if (c == CR) {
2023-10-30 16:41:37 +08:00
this.#emit('headerValue', buffer, idx, true)
this.$headerEnd()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_VALUE_ALMOST_DONE
2023-10-25 18:45:16 +08:00
}
2020-09-16 20:07:28 +08:00
break
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADER_VALUE_ALMOST_DONE:
2023-10-25 18:45:16 +08:00
if (c != LF) {
2023-10-30 16:41:37 +08:00
return
2020-09-16 20:07:28 +08:00
}
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_FIELD_START
2020-09-16 20:07:28 +08:00
break
2023-10-27 19:16:32 +08:00
case STATE_DICT.HEADERS_ALMOST_DONE:
2023-10-25 18:45:16 +08:00
if (c != LF) {
2023-10-30 16:41:37 +08:00
return
2023-10-25 18:45:16 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-30 16:41:37 +08:00
this.$headersEnd()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.PART_DATA_START
2020-09-16 20:07:28 +08:00
break
2023-10-27 19:16:32 +08:00
case STATE_DICT.PART_DATA_START:
state = STATE_DICT.PART_DATA
2023-10-30 16:41:37 +08:00
this.#mark('partData', idx)
2023-10-27 19:16:32 +08:00
case STATE_DICT.PART_DATA:
2023-10-25 18:45:16 +08:00
prevIndex = index
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
if (index === 0) {
// boyer-moore derrived algorithm to safely skip non-boundary data
2023-10-30 16:41:37 +08:00
idx += boundaryEnd
while (idx < bufferLength && !(buffer[idx] in boundaryChars)) {
idx += boundaryLength
2023-10-25 18:45:16 +08:00
}
2023-10-30 16:41:37 +08:00
idx -= boundaryEnd
c = buffer[idx]
2020-09-16 20:07:28 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
if (index < boundary.length) {
if (boundary[index] == c) {
if (index === 0) {
2023-10-30 16:41:37 +08:00
this.#emit('partData', buffer, idx, true)
2023-10-25 18:45:16 +08:00
}
index++
} else {
index = 0
2020-09-16 20:07:28 +08:00
}
2023-10-25 18:45:16 +08:00
} else if (index == boundary.length) {
2020-09-16 20:07:28 +08:00
index++
2023-10-25 18:45:16 +08:00
if (c == CR) {
// CR = part boundary
2023-10-30 16:41:37 +08:00
flags |= FLAG_DICT.PART_BOUNDARY
2023-10-25 18:45:16 +08:00
} else if (c == HYPHEN) {
// HYPHEN = end boundary
2023-10-30 16:41:37 +08:00
flags |= FLAG_DICT.LAST_BOUNDARY
2023-10-25 18:45:16 +08:00
} else {
index = 0
2020-09-16 20:07:28 +08:00
}
2023-10-25 18:45:16 +08:00
} else if (index - 1 == boundary.length) {
2023-10-30 16:41:37 +08:00
if (flags & FLAG_DICT.PART_BOUNDARY) {
2023-10-25 18:45:16 +08:00
index = 0
if (c == LF) {
// unset the PART_BOUNDARY flag
2023-10-30 16:41:37 +08:00
flags &= ~FLAG_DICT.PART_BOUNDARY
this.$partEnd()
this.$partBegin()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.HEADER_FIELD_START
2023-10-25 18:45:16 +08:00
break
}
2023-10-30 16:41:37 +08:00
} else if (flags & FLAG_DICT.LAST_BOUNDARY) {
2023-10-25 18:45:16 +08:00
if (c == HYPHEN) {
2023-10-30 16:41:37 +08:00
this.$partEnd()
this.$end()
2023-10-27 19:16:32 +08:00
state = STATE_DICT.END
2023-10-25 18:45:16 +08:00
flags = 0
} else {
index = 0
}
2020-09-16 20:07:28 +08:00
} else {
index = 0
}
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
if (index > 0) {
// when matching a possible boundary, keep a lookbehind reference
// in case it turns out to be a false lead
lookbehind[index - 1] = c
} else if (prevIndex > 0) {
// if our boundary turned out to be rubbish, the captured lookbehind
// belongs to partData
2023-10-30 16:41:37 +08:00
this.$partData(lookbehind.slice(0, prevIndex))
2023-10-25 18:45:16 +08:00
prevIndex = 0
2023-10-30 16:41:37 +08:00
this.#mark('partData', idx)
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
// reconsider the current character even so it interrupted the sequence
// it could be the beginning of a new sequence
2023-10-30 16:41:37 +08:00
idx--
2023-10-25 18:45:16 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
break
2023-10-30 16:41:37 +08:00
2023-10-27 19:16:32 +08:00
case STATE_DICT.END:
2023-10-25 18:45:16 +08:00
break
2023-10-27 19:16:32 +08:00
2023-10-25 18:45:16 +08:00
default:
2023-10-30 16:41:37 +08:00
return
2023-10-25 18:45:16 +08:00
}
2020-09-16 20:07:28 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-30 16:41:37 +08:00
this.#emit('headerField', buffer, idx)
this.#emit('headerValue', buffer, idx)
this.#emit('partData', buffer, idx)
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
this.index = index
this.state = state
this.flags = flags
2020-09-16 20:07:28 +08:00
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
end() {
if (
2023-10-27 19:16:32 +08:00
(this.state === STATE_DICT.HEADER_FIELD_START && this.index === 0) ||
2023-10-30 16:41:37 +08:00
(this.state === STATE_DICT.PART_DATA &&
this.index == this.boundary.length)
2023-10-25 18:45:16 +08:00
) {
2023-10-30 16:41:37 +08:00
this.$end()
2023-10-27 19:16:32 +08:00
} else if (this.state !== STATE_DICT.END) {
2023-10-25 18:45:16 +08:00
return new Error(
'MultipartParser.end(): stream ended unexpectedly: ' + this.explain()
)
2020-09-16 20:07:28 +08:00
}
}
2023-10-26 19:02:46 +08:00
2023-10-25 18:45:16 +08:00
explain() {
2023-10-27 19:16:32 +08:00
return 'state = ' + stateToString(this.state)
2020-09-16 20:07:28 +08:00
}
}