From c6e5fbb662e16bb2c1ea9440e791342e932dd4ee Mon Sep 17 00:00:00 2001 From: yutent Date: Thu, 13 Apr 2023 01:18:11 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E7=9B=AE=E5=BD=95=E7=BB=93?= =?UTF-8?q?=E6=9E=84;=20=E7=B2=BE=E7=AE=80=E5=AD=97=E7=AC=A6=E4=B8=B2?= =?UTF-8?q?=E5=A4=84=E7=90=86;=E8=B0=83=E6=95=B4=E7=BB=93=E6=9E=9C?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 9 +- index.js | 2 - lib/utils/strings.js | 197 -------------------------------- package.json | 22 ++-- lib/deflate.js => src/gzip.js | 4 +- src/index.js | 2 + lib/inflate.js => src/ungzip.js | 11 +- {lib => src}/utils/common.js | 0 src/utils/strings.js | 75 ++++++++++++ {lib => src}/zlib/adler32.js | 2 - {lib => src}/zlib/constants.js | 0 {lib => src}/zlib/crc32.js | 2 - {lib => src}/zlib/deflate.js | 2 - {lib => src}/zlib/gzheader.js | 2 - {lib => src}/zlib/inffast.js | 2 - {lib => src}/zlib/inflate.js | 2 - {lib => src}/zlib/inftrees.js | 2 - {lib => src}/zlib/messages.js | 2 - {lib => src}/zlib/trees.js | 2 - {lib => src}/zlib/zstream.js | 2 - 20 files changed, 107 insertions(+), 235 deletions(-) delete mode 100644 index.js delete mode 100644 lib/utils/strings.js rename lib/deflate.js => src/gzip.js (98%) create mode 100644 src/index.js rename lib/inflate.js => src/ungzip.js (98%) rename {lib => src}/utils/common.js (100%) create mode 100644 src/utils/strings.js rename {lib => src}/zlib/adler32.js (99%) rename {lib => src}/zlib/constants.js (100%) rename {lib => src}/zlib/crc32.js (99%) rename {lib => src}/zlib/deflate.js (99%) rename {lib => src}/zlib/gzheader.js (99%) rename {lib => src}/zlib/inffast.js (99%) rename {lib => src}/zlib/inflate.js (99%) rename {lib => src}/zlib/inftrees.js (99%) rename {lib => src}/zlib/messages.js (99%) rename {lib => src}/zlib/trees.js (99%) rename {lib => src}/zlib/zstream.js (99%) diff --git a/README.md b/README.md index c123770..8db0b92 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ ### 版本同步状态: pako: ![pako](https://img.shields.io/npm/v/pako.svg) + @bytedo/gzip: ![@bytedo/gzip](https://img.shields.io/npm/v/@bytedo/gzip.svg) @@ -22,10 +23,14 @@ import { gzip , ungzip } from '@bytedo/gzip' // use importmap // or import { gzip , ungzip } from '//jscdn.ink/@bytedo/gzip/latest/index.js' +// 也可以单独引入 +import { gzip } from '//jscdn.ink/@bytedo/gzip/latest/gzip.js' +import { ungzip } from '//jscdn.ink/@bytedo/gzip/latest/ungzip.js' -let res = gzip('hello world') // return Uint8Array object -let txt = ungzip(res) // return Uint8Array object +let base64Str = gzip('hello world') // return base64 string + +let txt = ungzip(base64Str) // return hello world ``` diff --git a/index.js b/index.js deleted file mode 100644 index 4d753f9..0000000 --- a/index.js +++ /dev/null @@ -1,2 +0,0 @@ -export { gzip } from './lib/deflate.js' -export { ungzip } from './lib/inflate.js' diff --git a/lib/utils/strings.js b/lib/utils/strings.js deleted file mode 100644 index 7f8583d..0000000 --- a/lib/utils/strings.js +++ /dev/null @@ -1,197 +0,0 @@ -// Quick check if we can use fast array to bin string conversion -// -// - apply(Array) can fail on Android 2.2 -// - apply(Uint8Array) can fail on iOS 5.1 Safari -// -let STR_APPLY_UIA_OK = true - -try { - String.fromCharCode.apply(null, new Uint8Array(1)) -} catch (__) { - STR_APPLY_UIA_OK = false -} - -// Table with utf8 lengths (calculated by first byte of sequence) -// Note, that 5 & 6-byte values and some 4-byte values can not be represented in JS, -// because max possible codepoint is 0x10ffff -const _utf8len = new Uint8Array(256) -for (let q = 0; q < 256; q++) { - _utf8len[q] = - q >= 252 ? 6 : q >= 248 ? 5 : q >= 240 ? 4 : q >= 224 ? 3 : q >= 192 ? 2 : 1 -} -_utf8len[254] = _utf8len[254] = 1 // Invalid sequence start - -// convert string to array (typed, when possible) -export const string2buf = str => { - if (typeof TextEncoder === 'function' && TextEncoder.prototype.encode) { - return new TextEncoder().encode(str) - } - - let buf, - c, - c2, - m_pos, - i, - str_len = str.length, - buf_len = 0 - - // count binary size - for (m_pos = 0; m_pos < str_len; m_pos++) { - c = str.charCodeAt(m_pos) - if ((c & 0xfc00) === 0xd800 && m_pos + 1 < str_len) { - c2 = str.charCodeAt(m_pos + 1) - if ((c2 & 0xfc00) === 0xdc00) { - c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00) - m_pos++ - } - } - buf_len += c < 0x80 ? 1 : c < 0x800 ? 2 : c < 0x10000 ? 3 : 4 - } - - // allocate buffer - buf = new Uint8Array(buf_len) - - // convert - for (i = 0, m_pos = 0; i < buf_len; m_pos++) { - c = str.charCodeAt(m_pos) - if ((c & 0xfc00) === 0xd800 && m_pos + 1 < str_len) { - c2 = str.charCodeAt(m_pos + 1) - if ((c2 & 0xfc00) === 0xdc00) { - c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00) - m_pos++ - } - } - if (c < 0x80) { - /* one byte */ - buf[i++] = c - } else if (c < 0x800) { - /* two bytes */ - buf[i++] = 0xc0 | (c >>> 6) - buf[i++] = 0x80 | (c & 0x3f) - } else if (c < 0x10000) { - /* three bytes */ - buf[i++] = 0xe0 | (c >>> 12) - buf[i++] = 0x80 | ((c >>> 6) & 0x3f) - buf[i++] = 0x80 | (c & 0x3f) - } else { - /* four bytes */ - buf[i++] = 0xf0 | (c >>> 18) - buf[i++] = 0x80 | ((c >>> 12) & 0x3f) - buf[i++] = 0x80 | ((c >>> 6) & 0x3f) - buf[i++] = 0x80 | (c & 0x3f) - } - } - - return buf -} - -// Helper -const buf2binstring = (buf, len) => { - // On Chrome, the arguments in a function call that are allowed is `65534`. - // If the length of the buffer is smaller than that, we can use this optimization, - // otherwise we will take a slower path. - if (len < 65534) { - if (buf.subarray && STR_APPLY_UIA_OK) { - return String.fromCharCode.apply( - null, - buf.length === len ? buf : buf.subarray(0, len) - ) - } - } - - let result = '' - for (let i = 0; i < len; i++) { - result += String.fromCharCode(buf[i]) - } - return result -} - -// convert array to string -export const buf2string = (buf, max) => { - const len = max || buf.length - - if (typeof TextDecoder === 'function' && TextDecoder.prototype.decode) { - return new TextDecoder().decode(buf.subarray(0, max)) - } - - let i, out - - // Reserve max possible length (2 words per char) - // NB: by unknown reasons, Array is significantly faster for - // String.fromCharCode.apply than Uint16Array. - const utf16buf = new Array(len * 2) - - for (out = 0, i = 0; i < len; ) { - let c = buf[i++] - // quick process ascii - if (c < 0x80) { - utf16buf[out++] = c - continue - } - - let c_len = _utf8len[c] - // skip 5 & 6 byte codes - if (c_len > 4) { - utf16buf[out++] = 0xfffd - i += c_len - 1 - continue - } - - // apply mask on first byte - c &= c_len === 2 ? 0x1f : c_len === 3 ? 0x0f : 0x07 - // join the rest - while (c_len > 1 && i < len) { - c = (c << 6) | (buf[i++] & 0x3f) - c_len-- - } - - // terminated by end of string? - if (c_len > 1) { - utf16buf[out++] = 0xfffd - continue - } - - if (c < 0x10000) { - utf16buf[out++] = c - } else { - c -= 0x10000 - utf16buf[out++] = 0xd800 | ((c >> 10) & 0x3ff) - utf16buf[out++] = 0xdc00 | (c & 0x3ff) - } - } - - return buf2binstring(utf16buf, out) -} - -// Calculate max possible position in utf8 buffer, -// that will not break sequence. If that's not possible -// - (very small limits) return max size as is. -// -// buf[] - utf8 bytes array -// max - length limit (mandatory); -export const utf8border = (buf, max) => { - max = max || buf.length - if (max > buf.length) { - max = buf.length - } - - // go back from last position, until start of sequence found - let pos = max - 1 - while (pos >= 0 && (buf[pos] & 0xc0) === 0x80) { - pos-- - } - - // Very small and broken sequence, - // return max, because we should return something anyway. - if (pos < 0) { - return max - } - - // If we came to start of buffer - that means buffer is too small, - // return max too. - if (pos === 0) { - return max - } - - return pos + _utf8len[buf[pos]] > max ? pos : max -} diff --git a/package.json b/package.json index f886a8a..76ee879 100644 --- a/package.json +++ b/package.json @@ -1,23 +1,27 @@ { "name": "@bytedo/gzip", "type": "module", - "description": "zlib port to javascript - fast, modularized, with browser support", + "description": "浏览器端的gzip库, fork于pako, 仅调整语法为esm, 并只保留gzip的导出。", "version": "2.1.0", "keywords": [ - "zlib", - "deflate", - "inflate", - "gzip" + "gzip", + "ungzip" ], "files": [ "dist/*" ], - "license": "(MIT AND Zlib)", - "repository": "bytedo/gzip", + "repository": { + "type": "git", + "url": "git+https://github.com/bytedo/gzip.git" + }, "scripts": { - "build": "esbuild index.js --minify --bundle --format=esm --target=esnext --outfile=dist/index.js" + "build:gzip": "esbuild src/gzip.js --minify --bundle --format=esm --target=esnext --outfile=dist/gzip.js", + "build:ungzip": "esbuild src/ungzip.js --minify --bundle --format=esm --target=esnext --outfile=dist/ungzip.js", + "build:all": "esbuild src/index.js --format=esm --outfile=dist/index.js", + "build": "npm run build:gzip && npm run build:ungzip && npm run build:all" }, "devDependencies": { "esbuild": "^0.17.16" - } + }, + "license": "(MIT AND Zlib)" } diff --git a/lib/deflate.js b/src/gzip.js similarity index 98% rename from lib/deflate.js rename to src/gzip.js index 32bcec3..15936d6 100644 --- a/lib/deflate.js +++ b/src/gzip.js @@ -1,6 +1,6 @@ import zlib_deflate from './zlib/deflate.js' import { flattenChunks } from './utils/common.js' -import { string2buf } from './utils/strings.js' +import { string2buf, buf2base64 } from './utils/strings.js' import msg from './zlib/messages.js' import ZStream from './zlib/zstream.js' @@ -357,7 +357,7 @@ function deflate(input, options) { function gzip(input, options) { options = options || {} options.gzip = true - return deflate(input, options) + return buf2base64(deflate(input, options)) } export { gzip } diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..982ad20 --- /dev/null +++ b/src/index.js @@ -0,0 +1,2 @@ +export { gzip } from './gzip.js' +export { ungzip } from './ungzip.js' diff --git a/lib/inflate.js b/src/ungzip.js similarity index 98% rename from lib/inflate.js rename to src/ungzip.js index ee7129c..87681fd 100644 --- a/lib/inflate.js +++ b/src/ungzip.js @@ -1,6 +1,11 @@ import zlib_inflate from './zlib/inflate.js' import { flattenChunks } from './utils/common.js' -import { string2buf, utf8border, buf2string } from './utils/strings.js' +import { + utf8border, + string2buf, + buf2string, + base642buf +} from './utils/strings.js' import msg from './zlib/messages.js' import ZStream from './zlib/zstream.js' import GZheader from './zlib/gzheader.js' @@ -388,12 +393,12 @@ Inflate.prototype.onEnd = function (status) { function ungzip(input, options) { const inflator = new Inflate(options) - inflator.push(input) + inflator.push(base642buf(input)) // That will never happens, if you don't cheat with options :) if (inflator.err) throw inflator.msg || msg[inflator.err] - return inflator.result + return buf2string(inflator.result) } /** diff --git a/lib/utils/common.js b/src/utils/common.js similarity index 100% rename from lib/utils/common.js rename to src/utils/common.js diff --git a/src/utils/strings.js b/src/utils/strings.js new file mode 100644 index 0000000..3e6f261 --- /dev/null +++ b/src/utils/strings.js @@ -0,0 +1,75 @@ +const encoder = new TextEncoder() +const decoder = new TextDecoder() + +// Table with utf8 lengths (calculated by first byte of sequence) +// Note, that 5 & 6-byte values and some 4-byte values can not be represented in JS, +// because max possible codepoint is 0x10ffff +const _utf8len = new Uint8Array(256) +for (let q = 0; q < 256; q++) { + _utf8len[q] = + q >= 252 ? 6 : q >= 248 ? 5 : q >= 240 ? 4 : q >= 224 ? 3 : q >= 192 ? 2 : 1 +} +_utf8len[254] = _utf8len[254] = 1 // Invalid sequence start + +export function buf2base64(uint8) { + var bin = '' + for (var i = 0; i < uint8.length; i++) { + bin += String.fromCharCode(uint8[i]) + } + return btoa(bin) +} + +export function base642buf(base64) { + let bin = atob(base64) + let u8 = new Uint8Array(bin.length) + + for (let i = 0; i < bin.length; i++) { + u8[i] = bin[i].codePointAt(0) + } + + return u8 +} + +// convert string to array (typed, when possible) +export const string2buf = str => { + return encoder.encode(str) +} + +// convert array to string +export const buf2string = (buf, max) => { + let len = max || buf.length + return decoder.decode(buf.subarray(0, max)) +} + +// Calculate max possible position in utf8 buffer, +// that will not break sequence. If that's not possible +// - (very small limits) return max size as is. +// +// buf[] - utf8 bytes array +// max - length limit (mandatory); +export const utf8border = (buf, max) => { + max = max || buf.length + if (max > buf.length) { + max = buf.length + } + + // go back from last position, until start of sequence found + let pos = max - 1 + while (pos >= 0 && (buf[pos] & 0xc0) === 0x80) { + pos-- + } + + // Very small and broken sequence, + // return max, because we should return something anyway. + if (pos < 0) { + return max + } + + // If we came to start of buffer - that means buffer is too small, + // return max too. + if (pos === 0) { + return max + } + + return pos + _utf8len[buf[pos]] > max ? pos : max +} diff --git a/lib/zlib/adler32.js b/src/zlib/adler32.js similarity index 99% rename from lib/zlib/adler32.js rename to src/zlib/adler32.js index 58c03c3..852fc4b 100644 --- a/lib/zlib/adler32.js +++ b/src/zlib/adler32.js @@ -1,5 +1,3 @@ -'use strict' - // Note: adler32 takes 12% for level 0 and 2% for level 6. // It isn't worth it to make additional optimizations as in original. // Small size is preferable. diff --git a/lib/zlib/constants.js b/src/zlib/constants.js similarity index 100% rename from lib/zlib/constants.js rename to src/zlib/constants.js diff --git a/lib/zlib/crc32.js b/src/zlib/crc32.js similarity index 99% rename from lib/zlib/crc32.js rename to src/zlib/crc32.js index b7805b9..7ce2947 100644 --- a/lib/zlib/crc32.js +++ b/src/zlib/crc32.js @@ -1,5 +1,3 @@ -'use strict' - // Note: we can't get significant speed boost here. // So write code to minimize size - no pregenerated tables // and array tools dependencies. diff --git a/lib/zlib/deflate.js b/src/zlib/deflate.js similarity index 99% rename from lib/zlib/deflate.js rename to src/zlib/deflate.js index 5cd9bf5..ef2069a 100644 --- a/lib/zlib/deflate.js +++ b/src/zlib/deflate.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/gzheader.js b/src/zlib/gzheader.js similarity index 99% rename from lib/zlib/gzheader.js rename to src/zlib/gzheader.js index d1796f4..c8b6564 100644 --- a/lib/zlib/gzheader.js +++ b/src/zlib/gzheader.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/inffast.js b/src/zlib/inffast.js similarity index 99% rename from lib/zlib/inffast.js rename to src/zlib/inffast.js index fc383cc..13fd9e7 100644 --- a/lib/zlib/inffast.js +++ b/src/zlib/inffast.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/inflate.js b/src/zlib/inflate.js similarity index 99% rename from lib/zlib/inflate.js rename to src/zlib/inflate.js index 4322c8d..ac52015 100644 --- a/lib/zlib/inflate.js +++ b/src/zlib/inflate.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/inftrees.js b/src/zlib/inftrees.js similarity index 99% rename from lib/zlib/inftrees.js rename to src/zlib/inftrees.js index a4b8271..bd7f002 100644 --- a/lib/zlib/inftrees.js +++ b/src/zlib/inftrees.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/messages.js b/src/zlib/messages.js similarity index 99% rename from lib/zlib/messages.js rename to src/zlib/messages.js index 2c5240c..2c550a7 100644 --- a/lib/zlib/messages.js +++ b/src/zlib/messages.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/trees.js b/src/zlib/trees.js similarity index 99% rename from lib/zlib/trees.js rename to src/zlib/trees.js index 0c2f492..581df86 100644 --- a/lib/zlib/trees.js +++ b/src/zlib/trees.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // diff --git a/lib/zlib/zstream.js b/src/zlib/zstream.js similarity index 99% rename from lib/zlib/zstream.js rename to src/zlib/zstream.js index 380ee36..76b905d 100644 --- a/lib/zlib/zstream.js +++ b/src/zlib/zstream.js @@ -1,5 +1,3 @@ -'use strict' - // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin //