完成词库下载

master
yutent 2022-03-21 16:46:49 +08:00
parent de40e1a905
commit ecbb76a48f
11 changed files with 24207 additions and 49692 deletions

View File

@ -19,6 +19,10 @@ wc-switch {
.flex {
display: flex;
&.wrap {
flex-wrap: wrap;
}
&.column {
flex-direction: column;
}

View File

@ -1 +1 @@
body{line-height:1.5;font-size:14px;color:var(--color-dark-1)}a{color:inherit;text-decoration:none}wc-switch{cursor:pointer}.app{width:100%;height:100vh}.flex{display:flex}.flex.column{flex-direction:column}.flex.ac{justify-content:center}.flex.alc{align-items:center}.flex.acc{justify-content:center;align-items:center}.flex.asc{justify-content:space-between;align-items:center}.wrapper{width:1024px}.topbar{width:100%;height:64px;background:#fff;box-shadow:0 6px 12px rgba(0,0,0,.05)}.topbar .logo{font-size:24px;color:var(--color-red-1)}.topbar .logo span{font-size:14px}.topbar .navs{font-size:16px}.topbar .navs .nav{margin-left:32px;cursor:pointer;transition:color .2s linear}.topbar .navs .nav.active,.topbar .navs .nav:hover{text-decoration:underline;color:var(--color-red-1)}.footer{width:100%;height:64px;border-top:1px solid var(--color-plain-2)}.main{overflow:hidden;flex:1}.main .wrapper{height:100%}.main .table-info{width:100%;height:64px}.main .table-info b{color:var(--color-red-1)}.main .table-info .download{margin-left:32px}.main .search{width:100%}.main .search .field{margin-top:16px}.main .search .field.result{line-height:2;font-family:Menlo;word-wrap:break-word;white-space:pre-wrap}.main .search .field wc-input{flex:1}.main .panel{overflow:hidden;flex:1;margin-top:32px}.main .panel .custom-file{position:relative;width:160px}.main .panel .custom-file input{position:absolute;width:100%;height:100%;opacity:0}.main .panel .tips{color:var(--color-orange-3)}.main .panel .scroll-view{overflow:hidden;flex:1}.main .panel .preview{margin-top:16px;font-family:Menlo;word-break:keep-all;white-space:pre-wrap}.main .about{padding:32px 16px}.main .about h2{font-size:24px}.main .about .logs .version{margin-top:32px;line-height:2;font-weight:bold;font-size:18px}.main .about .logs dd{color:var(--color-grey-3)}.main .about .logs ol{padding-left:2em;list-style:decimal}.github{position:fixed;right:-30px;top:20px;z-index:9;width:120px;height:22px;line-height:22px;text-align:center;color:#fff;background:var(--color-dark-1);transform:rotate(45deg)}.download-layer{width:640px;padding-bottom:64px;background:#fff}.download-layer .field{margin-top:16px;padding:0 16px}.download-layer .field .label{min-width:120px;padding-right:16px;line-height:32px;text-align:right;font-weight:bold;color:var(--color-grey-3)}.download-layer .field .label::after{content:" : "}.download-layer .field .tips{margin-left:16px;line-height:32px;color:var(--color-orange-3)}.download-layer .field.ctrol{padding:0 128px;margin-top:64px}@media screen and (max-width: 1024px){.topbar,.main{padding:0 16px}.wrapper{width:100%}.footer .wrapper{flex-direction:column;align-items:center;justify-content:center}}
body{line-height:1.5;font-size:14px;color:var(--color-dark-1)}a{color:inherit;text-decoration:none}wc-switch{cursor:pointer}.app{width:100%;height:100vh}.flex{display:flex}.flex.wrap{flex-wrap:wrap}.flex.column{flex-direction:column}.flex.ac{justify-content:center}.flex.alc{align-items:center}.flex.acc{justify-content:center;align-items:center}.flex.asc{justify-content:space-between;align-items:center}.wrapper{width:1024px}.topbar{width:100%;height:64px;background:#fff;box-shadow:0 6px 12px rgba(0,0,0,.05)}.topbar .logo{font-size:24px;color:var(--color-red-1)}.topbar .logo span{font-size:14px}.topbar .navs{font-size:16px}.topbar .navs .nav{margin-left:32px;cursor:pointer;transition:color .2s linear}.topbar .navs .nav.active,.topbar .navs .nav:hover{text-decoration:underline;color:var(--color-red-1)}.footer{width:100%;height:64px;border-top:1px solid var(--color-plain-2)}.main{overflow:hidden;flex:1}.main .wrapper{height:100%}.main .table-info{width:100%;height:64px}.main .table-info b{color:var(--color-red-1)}.main .table-info .download{margin-left:32px}.main .search{width:100%}.main .search .field{margin-top:16px}.main .search .field.result{line-height:2;font-family:Menlo;word-wrap:break-word;white-space:pre-wrap}.main .search .field.result b{letter-spacing:2px;color:var(--color-blue-1)}.main .search .field wc-input{flex:1}.main .panel{overflow:hidden;flex:1;margin-top:32px}.main .panel .custom-file{position:relative;width:160px}.main .panel .custom-file input{position:absolute;width:100%;height:100%;opacity:0}.main .panel .tips{color:var(--color-orange-3)}.main .panel .scroll-view{overflow:hidden;flex:1}.main .panel .preview{margin-top:16px;font-family:Menlo;word-break:keep-all;white-space:pre-wrap}.main .about{padding:32px 16px}.main .about h2{font-size:24px}.main .about .logs .version{margin-top:32px;line-height:2;font-weight:bold;font-size:18px}.main .about .logs dd{color:var(--color-grey-3)}.main .about .logs ol{padding-left:2em;list-style:decimal}.github{position:fixed;right:-30px;top:20px;z-index:9;width:120px;height:22px;line-height:22px;text-align:center;color:#fff;background:var(--color-dark-1);transform:rotate(45deg)}.download-layer{width:640px;padding-bottom:64px;background:#fff}.download-layer .field{margin-top:16px;padding:0 16px}.download-layer .field .label{min-width:120px;padding-right:16px;line-height:32px;text-align:right;font-weight:bold;color:var(--color-grey-3)}.download-layer .field .label::after{content:" : "}.download-layer .field .value{line-height:32px}.download-layer .field .tips{margin-left:16px;line-height:32px;color:var(--color-orange-3)}.download-layer .field.ctrol{padding:0 128px;margin-top:64px}@media screen and (max-width: 1024px){.topbar,.main{padding:0 16px}.wrapper{width:100%}.footer .wrapper{flex-direction:column;align-items:center;justify-content:center}}

View File

@ -32,6 +32,11 @@
font-family: Menlo;
word-wrap: break-word;
white-space: pre-wrap;
b {
letter-spacing: 2px;
color: var(--color-blue-1);
}
}
wc-input {
@ -138,6 +143,10 @@
}
}
.value {
line-height: 32px;
}
.tips {
margin-left: 16px;
line-height: 32px;

View File

@ -212,7 +212,6 @@
字典
字段
字符串
字符串
字符型
字节
自动回收

File diff suppressed because it is too large Load Diff

186
data/nethot.txt Normal file
View File

@ -0,0 +1,186 @@
俺也一样
保安在哪里
背刺
被邻居大妈支配的恐惧
比博燃
茶艺大师
车友车行
陈养鱼
吃席
抽烟抽骆驼
创创子梗
此处张新成演技炸裂
打工人
打开了,但没完全打开
大肠包小肠
大庆油田
淡了淡了
当思维运行速度超过智力
迪迦来接我了
跌妈不认
懂王
逗鹅冤
对线
多啦AV梦
夺笋
凡尔赛
凡尔赛文学
粉球侠
疯狂星期四
佛媛病媛
干点正事吧巴巴托斯
干饭人
干饭人干饭魂
哥谭噩梦
胳膊抬一下我瘾犯了
狗妈
官宣人先欠着
耗子尾汁
耗子喂汁
和平精英五大灵童
黑茶青茶菊花茶
画圣诞树
欢迎来到对抗路
鸡蛋挂面不锈钢盆
鸡汤来了
鸡娃
吉吉国王梗大全
剑谱最终页无爱即是神
健达奇趣蛋
江爽体操服
脚艺人
街溜子华子
杰哥不要
金钱豹
劲夫
精致
韭零后
就是玩儿
拒签吧我的姐姐
绝绝子
堪忧踹
看夜光手表
孔雀感冒
夸戳
快乐星球
来根华子
岚语
拦个女的折磨
蓝桉已遇释槐鸟
蓝精灵竟是我自己
老母猪电源
老尼姑看剑
老潘森已经开始哦哦哦哦哦哦
老色批
冷少下班了吗
李雨晴的妈妈
立刻有
梁志超奶奶
林黛玉倒拔垂杨柳
妈妈爱你
麦乐鸡侠
卖圣诞帽
毛毛歌
没文化可以学开车吗
每个人身上都有毛毛
迷路的小灰灰
拿来吧
拿来吧你
那我走
内卷
泥头车创死
你礼貌吗
你喜欢银杏吗
你要永远相信光
你要永远相信光的存在世上
年轻人不讲武德
您看我还有机会吗
女生送的十根金条
派蒙
潘嘎之交
泡过可乐的小皮筋
貔貅体质
破防
破天元神丶狗霸天
普信男
七夕取消
气氛组
请我吃个外卖吗
秋天的第一杯奶
秋天的第一杯奶茶和雪花
秋天的第一个柚子
秋天太适合恋爱
雀食蟀
人类高质量男性
三十倍胡桃摇
砂糖桔
闪电五连鞭
社会性死亡
社恐,社死
谁来帮帮胡桃
神兽
十根烤肠
十三香
室内流浪汉
四次元少女
苏州科技大学青钢影
笋都被你夺完了
塔姆带福,思路全无
糖果超甜
躺平
天龙人
天水的苹果
舔狗
贴贴
完了芭比Q了
玩得好就是挂吗
王德发
王思聪舔狗
王友梅
王者荣耀不孝有三
网络公主
网抑云时间到
喂三点几了饮茶先啦
我辈义不容辞
我不是本地的
我大意了啊没有闪
我命油我不油天
我能去你家过年吗
乌鸡哥
想你的液
小白船警告
小丑竟是我自己
谢谢你米哈游
谢谢你宁人
谢谢泡泡
心机之蛙一直摸你肚子
修狗
秀粉
杨颖霉霉
药酱
椰羊甘雨
爷青回
一百块钱是我腰疼的分界线
一起爬山吗
依萍找她爸要钱那天的雨
已回未支付
异世相遇,尽享美味
勇敢牛牛不怕困难
油麦
御三家
元宇宙
原来你也玩原神
原年人
原神
原神芭芭拉肉身解咒
在瓜田里迷了路
咱就是说
召唤神龙
这里是缅甸北部
抓鸭子
赘婿吉吉国王
EMO
JOJO
LSP
Siri型社交
YYDS

File diff suppressed because it is too large Load Diff

View File

@ -29,8 +29,8 @@
<main class="main flex ac">
<div class="wrapper flex column">
<header class="table-info flex alc">
现有单字GB2312(<b>{{gb2312}}</b>)个 + GBK(<b>{{gbk - gb2312}}</b>)个、异形字<b>{{dy}}</b>个、词组<b>{{words}}</b>个、emoji<b>{{emoji}}</b>个 !
<header class="table-info flex alc wrap">
现有单字GB2312(<b>{{gb2312}}</b>)个 + GBK(<b>{{gbk - gb2312}}</b>)个、异形字<b>{{dy}}</b>个、词组<b>{{words}}</b>个、网络热词<b>{{nethot}}</b>个、计算机术语<b>{{code}}</b>个、emoji<b>{{emoji}}</b>个 !
<wc-link class="download" type="primary" underline @click="openDownloadPanel">下载词库</wc-link>
</header>
@ -46,7 +46,7 @@
</wc-radio-group>
</section>
<section class="field result">{{result}}</section>
<section class="field result" :html="result"></section>
</div>
@ -60,7 +60,7 @@
</div>
<wc-scroll class="scroll-view">
<div class="preview">{{preview}}</div>
<div class="preview" :text="preview"></div>
</wc-scroll>
</div>
@ -99,7 +99,7 @@
<wc-checkbox readonly value="dy">异形字库</wc-checkbox>
<wc-checkbox value="emoji">emoji</wc-checkbox>
<wc-checkbox value="nethot">网络热词</wc-checkbox>
<wc-checkbox value="tech">计算机术语</wc-checkbox>
<wc-checkbox value="code">计算机术语</wc-checkbox>
<wc-checkbox value="personal" type="info">个人词库(自主上传的)</wc-checkbox>
</wc-checkbox-group>
</section>
@ -121,8 +121,13 @@
<section class="field flex">
<span class="label">是否生成拼音</span>
<wc-switch :duplex="dlOpt.pinyin"></wc-switch>
<span class="tips">(输入法支持临时拼音时, 请勾选)</span>
<wc-switch :duplex="dlOpt.pinyin" disabled></wc-switch>
<span class="tips">(输入法支持临时拼音时, 请勾选)(功能暂未支持, 多单字词库生成中...)</span>
</section>
<section class="field flex">
<span class="label">总计</span>
<span class="value">{{total | number(0)}} 个</span>
</section>
<section class="field flex asc ctrol">

View File

@ -14,7 +14,7 @@ import '//unpkg.yutent.top/@bytedo/wcui/dist/form/checkbox.js'
import '//unpkg.yutent.top/@bytedo/wcui/dist/form/switch.js'
import fetch from '//unpkg.yutent.top/@bytedo/fetch/dist/index.js'
import { Enum, saveFile, SString } from './lib/core.js'
import { Enum, saveFile, SString, createCode } from './lib/core.js'
import FIXED_86F from './lib/86_fixed.js'
const VER_86 = '86'
@ -27,6 +27,8 @@ const WB_TABLE_86F = new Enum(FIXED_86F)
const WB_WORDS = new Enum()
const WB_DY = new Enum()
const WB_EMOJI = new Enum()
const WB_NET = new Enum()
const WB_CODE = new Enum()
Anot({
$id: 'app',
@ -36,6 +38,8 @@ Anot({
words: 0,
dy: 0,
emoji: 0,
nethot: 0,
code: 0,
result: '',
filter: {
text: '',
@ -45,20 +49,29 @@ Anot({
pos: 'front',
version: VER_86,
reverse: true,
pinyin: true,
pinyin: false,
tables: ['2312', 'words', 'dy']
},
total: 0,
preview: ''
},
watch: {
'dlOpt.tables'() {
this.calculate()
}
},
mounted() {
Promise.all([
fetch('./data/gb2312.txt').then(r => r.text()),
fetch('./data/gbk.txt').then(r => r.text()),
fetch('./data/words.txt').then(r => r.text()),
fetch('./data/dy.txt').then(r => r.text()),
fetch('./data/extra.txt').then(r => r.text()),
fetch('./data/emoji.txt').then(r => r.text())
]).then(([gb2312, gbk, words, dy, extra, emoji]) => {
fetch('./data/emoji.txt').then(r => r.text()),
fetch('./data/nethot.txt').then(r => r.text()),
fetch('./data/code.txt').then(r => r.text())
]).then(([gb2312, gbk, words, dy, emoji, nethot, code]) => {
//
gb2312.split('\n').forEach(it => {
@ -80,19 +93,20 @@ Anot({
WB_TABLE_GBK.add(k, it)
}
})
WB_TABLE_GBK.concat(WB_TABLE_2312)
//
;(words + extra).split('\n').forEach(it => {
words.split('\n').forEach(it => {
it = it.split(' ')
let k = it.shift()
if (k) {
WB_WORDS.add(k, it)
WB_WORDS.add(k, createCode(WB_TABLE_GBK, k))
}
})
console.log(WB_WORDS)
dy.split('\n').forEach(it => {
it = it.split(' ')
@ -113,13 +127,38 @@ Anot({
}
})
WB_TABLE_GBK.concat(WB_TABLE_2312)
nethot.split('\n').forEach(it => {
it = it.split(' ')
let k = it.shift()
if (k) {
WB_NET.add(k, createCode(WB_TABLE_GBK, k))
}
})
code.split('\n').forEach(it => {
it = it.split(' ')
let k = it.shift()
if (k) {
WB_CODE.add(k, createCode(WB_TABLE_GBK, k))
}
})
this.gb2312 = WB_TABLE_2312.length
this.gbk = WB_TABLE_GBK.length
this.words = WB_WORDS.length
this.dy = WB_DY.length
this.emoji = WB_EMOJI.length
this.nethot = WB_NET.length
this.code = WB_CODE.length
window.WB_WORDS = WB_WORDS
window.WB_NET = WB_NET
window.WB_CODE = WB_CODE
this.calculate()
})
},
@ -142,15 +181,18 @@ Anot({
text = text.replace(/[\sa-z]/g, '')
}
text = new SString(text)
if (reverse || text.length === 1) {
console.log(text, text + '')
res = [WB_TABLE_GBK.get(text)]
if (version === VER_86F) {
resf = [WB_TABLE_86F.get(text)]
}
} else {
res = text.split('').map(t => WB_TABLE_GBK.get(t))
res = text.split().map(t => WB_TABLE_GBK.get(t))
if (version === VER_86F) {
resf = text.split('').map(t => WB_TABLE_86F.get(t))
resf = text.split().map(t => WB_TABLE_86F.get(t))
}
}
@ -164,9 +206,9 @@ Anot({
res = `${text} 】👉\t${res[0]
.map(
t =>
`${t}(${(resf && resf[0] ? WB_TABLE_86F.get(t) : WB_TABLE_GBK.get(t))
`${t}(<b>${(resf && resf[0] ? WB_TABLE_86F.get(t) : WB_TABLE_GBK.get(t))
.join('、')
.toUpperCase()})`
.toUpperCase()}</b>)`
)
.join('\t\t')}`
} else {
@ -183,17 +225,58 @@ Anot({
res = res
.map((it, i) => {
if (it) {
return `${text[i]} 】👉\t${it
.map(t => `${WB_CODE_NAME[t.length]}: ${t.toUpperCase()}`)
return `${text.at(i)} 】👉\t${it
.map(t => `${WB_CODE_NAME[t.length]}: <b>${t.toUpperCase()}</b>`)
.join('\t\t')}`
} else {
return `${text[i]} 】👉\t无结果, 请检查你的输入是否正确, 如果确认无误, 可以反馈缺失字库。`
return `${text.at(
i
)} 👉\t无结果, 请检查你的输入是否正确, 如果确认无误, 可以反馈缺失字库`
}
})
.join('\n')
// 词库查询
{
let extra =
WB_WORDS.get(text) ||
WB_DY.get(text) ||
WB_EMOJI.get(text) ||
WB_NET.get(text) ||
WB_CODE.get(text)
if (extra) {
let t = extra.shift()
res += `\n\n${'-'.repeat(6)} 词库查询结果 ${'-'.repeat(32)}\n${text} 】👉\t${
WB_CODE_NAME[t.length]
}: <b>${t.toUpperCase()}</b>`
}
}
}
this.result = `查询结果: \n${res}`
this.result = `${'-'.repeat(6)} 字库查询结果 ${'-'.repeat(32)}\n${res}`
},
calculate() {
var num = this.gb2312
if (this.dlOpt.tables.includes('gbk')) {
num = this.gbk
}
num += this.words
num += this.dy
if (this.dlOpt.tables.includes('emoji')) {
num += this.emoji
}
if (this.dlOpt.tables.includes('nethot')) {
num += this.nethot
}
if (this.dlOpt.tables.includes('code')) {
num += this.code
}
this.total = num
},
fileChange(ev) {
@ -245,7 +328,50 @@ Anot({
},
download() {
//
var opt = { ...this.dlOpt }
var temp = new Enum()
// 生成反查字库
if (opt.reverse) {
if (opt.tables.includes('gbk')) {
temp.concat(WB_TABLE_GBK)
let bin = new Blob([WB_TABLE_GBK.toString()], { type: 'text/plain' })
saveFile(bin, 'wb_table_gbk_reverse.txt')
} else {
temp.concat(WB_TABLE_2312)
let bin = new Blob([WB_TABLE_2312.toString()], { type: 'text/plain' })
saveFile(bin, 'wb_table_gb2312_reverse.txt')
}
}
// 默认词库
temp.concat(WB_WORDS)
// 异形字库
temp.concat(WB_DY)
// emoji表情
if (opt.tables.includes('emoji')) {
temp.concat(WB_EMOJI)
}
// 网络热词
if (opt.tables.includes('nethot')) {
temp.concat(WB_NET)
}
// 计算机术语
if (opt.tables.includes('code')) {
temp.concat(WB_CODE)
}
// 暂未支持
// if (opt.tables.includes('personal')) {
// temp.concat(WB_PERSONAL)
// }
let bin = new Blob([temp.toString(opt.pos === 'front')], { type: 'text/plain' })
saveFile(bin, `wb_table_${opt.pos}.txt`)
}
}
})

View File

@ -1,3 +1,6 @@
/**
* 保存词组
*/
export function saveFile(bin, fileName) {
var link = document.createElement('a')
link.href = URL.createObjectURL(bin)
@ -9,6 +12,44 @@ export function saveFile(bin, fileName) {
document.body.removeChild(link)
}
/**
* 生成五笔编码
*/
export function createCode(dict, word) {
if (/^[a-zA-Z]+/.test(word)) {
return word.match(/^([a-zA-Z]+)/)[1].toLowerCase()
}
switch (word.length) {
case 1: {
let c = dict.get(word)
return c?.shift()
}
case 2: {
let c1 = dict.get(word[0])?.pop()
let c2 = dict.get(word[1])?.pop()
if (c1 && c2) {
return c1.slice(0, 2) + c2.slice(0, 2)
} else {
console.error(word, '词组中存在未收录单字, 请到github上提交issues')
break
}
}
default: {
let c1 = dict.get(word[0])?.pop()
let c2 = dict.get(word[1])?.pop()
let ce = dict.get(word[word.length - 1])?.pop()
if (c1 && c2 && ce) {
return c1.slice(0, 1) + c2.slice(0, 1) + ce.slice(0, 2)
} else {
console.error(word, '词组中存在未收录单字, 请到github上提交issues')
break
}
}
}
}
/**
* 增强版 String
*/
@ -28,6 +69,14 @@ export class SString {
return this.#list.length
}
toLowerCase() {
return this.#origin.toLowerCase()
}
toUpperCase() {
return this.#origin.toUpperCase()
}
at(index = 0) {
if (this.length > 0) {
while (index < 0) {
@ -106,10 +155,13 @@ export class Enum {
}
get(k) {
// k += ''
k = k.toString()
if (this.#dict_k[k]) {
return this.#dict_k[k]
return [...this.#dict_k[k]]
} else if (this.#dict_v[k]) {
return this.#dict_v[k]
return [...this.#dict_v[k]]
}
}
@ -121,6 +173,27 @@ export class Enum {
}
}
slice(f, t) {
var res = []
var n = 0
if (t === void 0) {
t = this.length
}
if (f < 0) {
f += this.length
}
this.forEach((v, k) => {
if (n >= t) {
return false
}
if (n >= f) {
res.push({ [k]: [...v] })
}
n++
})
return res
}
forEach(callback, forV) {
var dict = forV ? this.#dict_v : this.#dict_k
for (let k in dict) {