From 30f0ca48e317b6a4bb558fc866b93bb8e99108f9 Mon Sep 17 00:00:00 2001 From: yutent Date: Mon, 30 Oct 2023 16:41:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=902.0=E7=89=88=E9=87=8D?= =?UTF-8?q?=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- index.js | 8 +- lib/index.js | 213 ++++++++++++++++++------------------ lib/json_parser.js | 13 ++- lib/multipart_parser.js | 231 ++++++++++++++++++++------------------- lib/urlencoded_parser.js | 5 +- 6 files changed, 240 insertions(+), 232 deletions(-) diff --git a/.gitignore b/.gitignore index 6975b91..9bac4aa 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ ._* .idea .vscode -.tmp +.tmp/ node_modules/ \ No newline at end of file diff --git a/index.js b/index.js index e5309bc..632edde 100644 --- a/index.js +++ b/index.js @@ -39,6 +39,8 @@ export default class Request { #req = null #res = null + #opts = {} + #query = null #body = null #cookies = Object.create(null) @@ -49,7 +51,7 @@ export default class Request { url = '' host = '127.0.0.1' - constructor(req, res) { + constructor(req, res, opts = {}) { this.method = req.method.toUpperCase() this.#req = req @@ -58,6 +60,8 @@ export default class Request { this.host = req.headers['host'] this.#cookies = parseCookie(this.headers['cookie'] || '') + Object.assign(this.#opts, opts) + this.#init() } @@ -114,7 +118,7 @@ export default class Request { contentType = this.header('content-type') || DEFAULT_FORM_TYPE - form = new Parser(this.#req, { uploadDir: tmpdir }) + form = new Parser(this.#req, { ...this.#opts, uploadDir: tmpdir }) form .on('field', (name, value) => { diff --git a/lib/index.js b/lib/index.js index 30a5812..a524bf4 100644 --- a/lib/index.js +++ b/lib/index.js @@ -16,6 +16,20 @@ function randomPath(uploadDir) { return join(uploadDir, name) } +function parseFilename(headerValue) { + let matches = headerValue.match(/\bfilename="(.*?)"($|; )/i) + if (!matches) { + return + } + + let filename = matches[1].slice(matches[1].lastIndexOf('\\') + 1) + filename = filename.replace(/%22/g, '"') + filename = filename.replace(/&#([\d]{4});/g, function (m, code) { + return String.fromCharCode(code) + }) + return filename +} + /* ------------------------------------- */ export default class IncomingForm extends EventEmitter { @@ -31,7 +45,7 @@ export default class IncomingForm extends EventEmitter { bytesExpected = null #parser = null - #pending = true + #pending = 0 #openedFiles = [] @@ -42,7 +56,6 @@ export default class IncomingForm extends EventEmitter { this.uploadDir = opts.uploadDir this.encoding = opts.encoding || 'utf-8' - this.multiples = opts.multiples || false // Parse headers and setup the parser, ready to start listening for data. this.writeHeaders(req.headers) @@ -134,8 +147,8 @@ export default class IncomingForm extends EventEmitter { file.open() this.#openedFiles.push(file) - - this.#pending = true + // 表单解析完的时候文件写入不一定完成了, 所以需要加入pending计数 + this.#pending++ part .on('data', buffer => { @@ -145,12 +158,13 @@ export default class IncomingForm extends EventEmitter { file.write(buffer) }) .on('end', () => { - console.log('file part end...') + if (part.ended) { + return + } + part.ended = true file.end(() => { - console.log('<><><><>', part.name, file) this.emit('file', part.name, file) - this.#pending = false - // this.#handleEnd() + this.#pending-- }) }) } @@ -199,118 +213,99 @@ export default class IncomingForm extends EventEmitter { } #createMultipartParser(boundary) { - let parser = new MultipartParser(boundary) let headerField, headerValue, part - parser - .on('partBegin', function () { - part = new Stream() - part.readable = true - part.headers = {} - part.name = null - part.filename = null - part.mime = null + this.#parser = new MultipartParser(boundary) - part.transferEncoding = 'binary' - part.transferBuffer = '' + this.#parser.$partBegin = function () { + part = new Stream() + part.readable = true + part.headers = {} + part.name = null + part.filename = null + part.mime = null - headerField = '' - headerValue = '' - }) - .on('headerField', (b, start, end) => { - headerField += b.toString(this.encoding, start, end) - }) - .on('headerValue', (b, start, end) => { - headerValue += b.toString(this.encoding, start, end) - }) - .on('headerEnd', () => { - headerField = headerField.toLowerCase() - part.headers[headerField] = headerValue + part.transferEncoding = 'binary' + part.transferBuffer = '' - var m = headerValue.match(/\bname="([^"]+)"/i) - if (headerField == 'content-disposition') { - if (m) { - part.name = m[1] + headerField = '' + headerValue = '' + } + + this.#parser.$headerField = b => { + headerField += b.toString(this.encoding) + } + + this.#parser.$headerValue = b => { + headerValue += b.toString(this.encoding) + } + + this.#parser.$headerEnd = () => { + headerField = headerField.toLowerCase() + part.headers[headerField] = headerValue + + let matches = headerValue.match(/\bname="([^"]+)"/i) + if (headerField == 'content-disposition') { + if (matches) { + part.name = matches[1] + } + + part.filename = parseFilename(headerValue) + } else if (headerField == 'content-type') { + part.mime = headerValue + } else if (headerField == 'content-transfer-encoding') { + part.transferEncoding = headerValue.toLowerCase() + } + + headerField = '' + headerValue = '' + } + + this.#parser.$headersEnd = () => { + switch (part.transferEncoding) { + case 'binary': + case '7bit': + case '8bit': + this.#parser.$partData = function (b) { + part.emit('data', b) } + this.#parser.$partEnd = function () { + part.emit('end') + } + break - part.filename = this._fileName(headerValue) - } else if (headerField == 'content-type') { - part.mime = headerValue - } else if (headerField == 'content-transfer-encoding') { - part.transferEncoding = headerValue.toLowerCase() - } + case 'base64': + this.#parser.$partData = function (b) { + part.transferBuffer += b.toString('ascii') - headerField = '' - headerValue = '' - }) - .on('headersEnd', () => { - switch (part.transferEncoding) { - case 'binary': - case '7bit': - case '8bit': - parser - .on('partData', function (b, start, end) { - part.emit('data', b.slice(start, end)) - }) - .on('partEnd', function () { - part.emit('end') - }) - break + // 确保offset的值能被4整除 + let offset = ~~(part.transferBuffer.length / 4) * 4 + part.emit( + 'data', + Buffer.from(part.transferBuffer.slice(0, offset), 'base64') + ) + part.transferBuffer = part.transferBuffer.slice(offset) + } + this.#parser.$partEnd = function () { + part.emit('data', Buffer.from(part.transferBuffer, 'base64')) + part.emit('end') + } + break - case 'base64': - parser - .on('partData', function (b, start, end) { - part.transferBuffer += b.slice(start, end).toString('ascii') + default: + return this.#handleError(new Error('unknown transfer-encoding')) + } - /* - four bytes (chars) in base64 converts to three bytes in binary - encoding. So we should always work with a number of bytes that - can be divided by 4, it will result in a number of buytes that - can be divided vy 3. - */ - var offset = parseInt(part.transferBuffer.length / 4, 10) * 4 - part.emit( - 'data', - Buffer.from( - part.transferBuffer.substring(0, offset), - 'base64' - ) - ) - part.transferBuffer = part.transferBuffer.substring(offset) - }) - .on('partEnd', function () { - part.emit('data', Buffer.from(part.transferBuffer, 'base64')) - part.emit('end') - }) - break + this.#handlePart(part) + } - default: - return this.#handleError(new Error('unknown transfer-encoding')) - } - - this.#handlePart(part) - }) - .on('end', () => { - if (this.#pending) { - setTimeout(_ => parser.emit('end')) - } else { - this.#handleEnd() - } - }) - - this.#parser = parser - } - - _fileName(headerValue) { - var m = headerValue.match(/\bfilename="(.*?)"($|; )/i) - if (!m) return - - var filename = m[1].substr(m[1].lastIndexOf('\\') + 1) - filename = filename.replace(/%22/g, '"') - filename = filename.replace(/&#([\d]{4});/g, function (m, code) { - return String.fromCharCode(code) - }) - return filename + this.#parser.$end = () => { + if (this.#pending > 0) { + setTimeout(_ => this.#parser.$end()) + } else { + this.#handleEnd() + } + } } #createUrlencodedParser() { diff --git a/lib/json_parser.js b/lib/json_parser.js index 664f5e9..779a2ad 100644 --- a/lib/json_parser.js +++ b/lib/json_parser.js @@ -19,10 +19,10 @@ export class JSONParser extends EventEmitter { try { fields = JSON.parse(data) } catch (e) { - try{ + try { // 非标准的json语法,尝试用 Function 解析 fields = Function(`try{return ${data}}catch(e){}`)() - }catch(err){} + } catch (err) {} } this.emit('field', false, fields) @@ -30,7 +30,14 @@ export class JSONParser extends EventEmitter { this.#buff = null } else { - this.emit('error', new Error(`The uploaded data is incomplete. Expected ${this.#byteLen}, Received ${this.#buff.length} .`)) + this.emit( + 'error', + new Error( + `The uploaded data is incomplete. Expected ${ + this.#byteLen + }, Received ${this.#buff.length} .` + ) + ) } } } diff --git a/lib/multipart_parser.js b/lib/multipart_parser.js index cea3494..9909bbf 100644 --- a/lib/multipart_parser.js +++ b/lib/multipart_parser.js @@ -1,47 +1,49 @@ import { EventEmitter } from 'node:events' -var s = 0, - STATE_DICT = { - PARSER_UNINITIALIZED: s++, - START: s++, - START_BOUNDARY: s++, - HEADER_FIELD_START: s++, - HEADER_FIELD: s++, - HEADER_VALUE_START: s++, - HEADER_VALUE: s++, - HEADER_VALUE_ALMOST_DONE: s++, - HEADERS_ALMOST_DONE: s++, - PART_DATA_START: s++, - PART_DATA: s++, - PART_END: s++, - END: s++ - }, - f = 1, - F = { - PART_BOUNDARY: f, - LAST_BOUNDARY: (f *= 2) - }, - LF = 10, - CR = 13, - SPACE = 32, - HYPHEN = 45, - COLON = 58, - A = 97, - Z = 122, - lower = function (c) { - return c | 0x20 - } +let s = 0 +const STATE_DICT = { + PARSER_UNINITIALIZED: s++, + START: s++, + START_BOUNDARY: s++, + HEADER_FIELD_START: s++, + HEADER_FIELD: s++, + HEADER_VALUE_START: s++, + HEADER_VALUE: s++, + HEADER_VALUE_ALMOST_DONE: s++, + HEADERS_ALMOST_DONE: s++, + PART_DATA_START: s++, + PART_DATA: s++, + PART_END: s++, + END: s++ +} +let f = 1 +const FLAG_DICT = { + PART_BOUNDARY: f, + LAST_BOUNDARY: (f *= 2) +} - function stateToString(stateNumber) { - for (let state in STATE_DICT) { - let number = STATE_DICT[state] - if (number === stateNumber) { - return state - } +const LF = 10 +const CR = 13 +const SPACE = 32 +const HYPHEN = 45 +const COLON = 58 +const LETTER_A = 97 +const LETTER_Z = 122 + +function lower(c) { + return c | 0x20 +} + +function stateToString(value) { + for (let key in STATE_DICT) { + let number = STATE_DICT[key] + if (number === value) { + return key } } +} -export class MultipartParser extends EventEmitter { +export class MultipartParser { boundary = null boundaryChars = null lookbehind = null @@ -50,11 +52,7 @@ export class MultipartParser extends EventEmitter { index = null flags = 0 - - constructor(str) { - super() - this.boundary = Buffer.alloc(str.length + 4) this.boundary.write('\r\n--', 0) this.boundary.write(str, 4) @@ -67,8 +65,32 @@ export class MultipartParser extends EventEmitter { } } + #mark(k, v) { + this[k + 'Mark'] = v + } + + #emit(name, buff, idx, cleanup) { + let mark = name + 'Mark' + if (this[mark] !== void 0) { + let start = this[mark] + let end = buff.length + + if (cleanup) { + end = idx + delete this[mark] + } else { + this[mark] = 0 + } + + if (start === end) { + return + } + this['$' + name](buff.slice(start, end)) + } + } + write(buffer) { - var i = 0, + let idx = 0, len = buffer.length, prevIndex = this.index, index = this.index, @@ -83,55 +105,37 @@ export class MultipartParser extends EventEmitter { c, cl - let mark = (name) => { - this[name + 'Mark'] = i - }, - dataCallback = (name, clear) => { - var markSymbol = name + 'Mark' - if ((markSymbol in this)) { - if (clear) { - this.emit(name, buffer, this[markSymbol], i) - delete this[markSymbol] - } else { - this.emit(name, buffer, this[markSymbol], buffer.length) - this[markSymbol] = 0 - } - } + for (idx = 0; idx < len; idx++) { + c = buffer[idx] - } - - // console.log('???? ', state, 'len: ', len); - for (i = 0; i < len; i++) { - c = buffer[i] switch (state) { case STATE_DICT.PARSER_UNINITIALIZED: - return i + return case STATE_DICT.START: index = 0 state = STATE_DICT.START_BOUNDARY case STATE_DICT.START_BOUNDARY: - // console.log('=====>>>', index, c, boundary); if (index == boundary.length - 2) { if (c == HYPHEN) { - flags |= F.LAST_BOUNDARY + flags |= FLAG_DICT.LAST_BOUNDARY } else if (c != CR) { - return i + return } index++ break } else if (index - 1 == boundary.length - 2) { - if (flags & F.LAST_BOUNDARY && c == HYPHEN) { - this.emit('end') + if (flags & FLAG_DICT.LAST_BOUNDARY && c == HYPHEN) { + this.$end() state = STATE_DICT.END flags = 0 - } else if (!(flags & F.LAST_BOUNDARY) && c == LF) { + } else if (!(flags & FLAG_DICT.LAST_BOUNDARY) && c == LF) { index = 0 - this.emit('partBegin') + this.$partBegin() state = STATE_DICT.HEADER_FIELD_START } else { - return i + return } break } @@ -143,10 +147,10 @@ export class MultipartParser extends EventEmitter { index++ } break - + case STATE_DICT.HEADER_FIELD_START: state = STATE_DICT.HEADER_FIELD - mark('headerField') + this.#mark('headerField', idx) index = 0 case STATE_DICT.HEADER_FIELD: @@ -164,72 +168,72 @@ export class MultipartParser extends EventEmitter { if (c == COLON) { if (index == 1) { // empty header field - return i + return } - dataCallback('headerField', true) + this.#emit('headerField', buffer, idx, true) state = STATE_DICT.HEADER_VALUE_START break } cl = lower(c) - if (cl < A || cl > Z) { - return i + if (cl < LETTER_A || cl > LETTER_Z) { + return } break - + case STATE_DICT.HEADER_VALUE_START: if (c == SPACE) { break } - mark('headerValue') + this.#mark('headerValue', idx) state = STATE_DICT.HEADER_VALUE - + case STATE_DICT.HEADER_VALUE: if (c == CR) { - dataCallback('headerValue', true) - this.emit('headerEnd') + this.#emit('headerValue', buffer, idx, true) + this.$headerEnd() state = STATE_DICT.HEADER_VALUE_ALMOST_DONE } break case STATE_DICT.HEADER_VALUE_ALMOST_DONE: if (c != LF) { - return i + return } state = STATE_DICT.HEADER_FIELD_START break case STATE_DICT.HEADERS_ALMOST_DONE: if (c != LF) { - return i + return } - this.emit('headersEnd') + this.$headersEnd() state = STATE_DICT.PART_DATA_START break case STATE_DICT.PART_DATA_START: state = STATE_DICT.PART_DATA - mark('partData') + this.#mark('partData', idx) case STATE_DICT.PART_DATA: prevIndex = index if (index === 0) { // boyer-moore derrived algorithm to safely skip non-boundary data - i += boundaryEnd - while (i < bufferLength && !(buffer[i] in boundaryChars)) { - i += boundaryLength + idx += boundaryEnd + while (idx < bufferLength && !(buffer[idx] in boundaryChars)) { + idx += boundaryLength } - i -= boundaryEnd - c = buffer[i] + idx -= boundaryEnd + c = buffer[idx] } if (index < boundary.length) { if (boundary[index] == c) { if (index === 0) { - dataCallback('partData', true) + this.#emit('partData', buffer, idx, true) } index++ } else { @@ -239,28 +243,28 @@ export class MultipartParser extends EventEmitter { index++ if (c == CR) { // CR = part boundary - flags |= F.PART_BOUNDARY + flags |= FLAG_DICT.PART_BOUNDARY } else if (c == HYPHEN) { // HYPHEN = end boundary - flags |= F.LAST_BOUNDARY + flags |= FLAG_DICT.LAST_BOUNDARY } else { index = 0 } } else if (index - 1 == boundary.length) { - if (flags & F.PART_BOUNDARY) { + if (flags & FLAG_DICT.PART_BOUNDARY) { index = 0 if (c == LF) { // unset the PART_BOUNDARY flag - flags &= ~F.PART_BOUNDARY - this.emit('partEnd') - this.emit('partBegin') + flags &= ~FLAG_DICT.PART_BOUNDARY + this.$partEnd() + this.$partBegin() state = STATE_DICT.HEADER_FIELD_START break } - } else if (flags & F.LAST_BOUNDARY) { + } else if (flags & FLAG_DICT.LAST_BOUNDARY) { if (c == HYPHEN) { - this.emit('partEnd') - this.emit('end') + this.$partEnd() + this.$end() state = STATE_DICT.END flags = 0 } else { @@ -278,43 +282,42 @@ export class MultipartParser extends EventEmitter { } else if (prevIndex > 0) { // if our boundary turned out to be rubbish, the captured lookbehind // belongs to partData - this.emit('partData', lookbehind, 0, prevIndex) + + this.$partData(lookbehind.slice(0, prevIndex)) prevIndex = 0 - mark('partData') + this.#mark('partData', idx) // reconsider the current character even so it interrupted the sequence // it could be the beginning of a new sequence - i-- + idx-- } break - + case STATE_DICT.END: break default: - return i + return } } - dataCallback('headerField') - dataCallback('headerValue') - dataCallback('partData') + this.#emit('headerField', buffer, idx) + this.#emit('headerValue', buffer, idx) + this.#emit('partData', buffer, idx) this.index = index this.state = state this.flags = flags - - } end() { - if ( (this.state === STATE_DICT.HEADER_FIELD_START && this.index === 0) || - (this.state === STATE_DICT.PART_DATA && this.index == this.boundary.length) + (this.state === STATE_DICT.PART_DATA && + this.index == this.boundary.length) ) { - this.emit('end') + this.$end() } else if (this.state !== STATE_DICT.END) { return new Error( 'MultipartParser.end(): stream ended unexpectedly: ' + this.explain() diff --git a/lib/urlencoded_parser.js b/lib/urlencoded_parser.js index 1da0c39..78c61d0 100644 --- a/lib/urlencoded_parser.js +++ b/lib/urlencoded_parser.js @@ -17,11 +17,10 @@ export class UrlencodedParser extends EventEmitter { end() { let data = this.#buff.toString() let fields = parse(data) - + this.#buff = null - + this.emit('field', fields) this.emit('end') - } }