优化markd解析器

master
yutent 2023-04-21 19:15:21 +08:00
parent f3a8723b03
commit 1d691ccac7
1 changed files with 50 additions and 57 deletions

View File

@ -15,6 +15,8 @@ const CODEBLOCK_RE = /```(.*?)([\w\W]*?)```/g
const BLOCK_RE = /<([\w\-]+)([^>]*?)>([\w\W]*?)<\/\1>/g
const IS_DOM_RE = /^<([\w\-]+)[^>]*?>.*?<\/\1>$/
const STYLE_RE = /<style[^>]*?>([\w\W]*?)<\/style>/g
const SINGLE_LINE_HTML_RE = /^\s*?<([\w\-]+)[^>]*?>.*?<\/\1>\s*?$/
const MULTI_LINE_HTML_RE = /^\s*?<\/?[^>]*?>/
const INLINE = {
code: /`([^`]*?[^`\\\s])`/g,
@ -27,11 +29,6 @@ const INLINE = {
qlist: /((<blockquote class="md\-quote">)*?)([\+\-\*]|\d+\.) (.*)/ // 引用中的列表
}
const ATTR_BR_SYMBOL = '⨨☇'
const NODE_BR_SYMBOL = '⨨⤶'
const ATTR_BR_EXP = new RegExp(ATTR_BR_SYMBOL, 'g')
const NODE_BR_EXP = new RegExp(NODE_BR_SYMBOL, 'g')
const Helper = {
// 是否分割线
isHr(str) {
@ -127,7 +124,7 @@ const Decoder = {
},
// 分割线
hr(name = '') {
return `<fieldset class="md-hr"><legend name="${name}"></legend></fieldset>`
return `\n\n<fieldset class="md-hr"><legend name="${name}"></legend></fieldset>\n\n`
},
// 标题
head(str) {
@ -140,16 +137,13 @@ const Decoder = {
if (level === 1) {
return `<h1>${m2}</h1>`
} else {
return `<h${level}><a href="#${hash}" id="${hash}" class="md-head-link">${m2}</a></h${level}>`
return `\n\n<h${level}><a href="#${hash}" id="${hash}" class="md-head-link">${m2}</a></h${level}>\n`
}
})
}
return false
},
// 引用模块
blockquote(str) {
//
},
// 任务
task(str) {
var todoChecked = Helper.isTodo(str)
@ -158,7 +152,7 @@ const Decoder = {
var stat = todoChecked === 1 ? 'checked' : ''
var txt = todoChecked === 1 ? `<del>${word}</del>` : word
return `<section><wc-checkbox readonly ${stat}>${txt}</wc-checkbox></section>`
return `\n<section><wc-checkbox readonly ${stat}>${txt}</wc-checkbox></section>\n`
}
return false
}
@ -171,23 +165,10 @@ function fixed(str) {
.replace(/\t/g, ' ')
.replace(/\u00a0/g, ' ')
.replace(/\u2424/g, '\n')
.replace(TAG_RE, (m, name, attr) => {
// 标签内的换行, 转为一组特殊字符, 方便后面还原
return `<${name + attr.replace(/\n/g, ATTR_BR_SYMBOL)}>`
})
.replace(BLOCK_RE, (m, tag, attr, txt) => {
return `<${tag + attr}>${txt.replace(/\n/g, NODE_BR_SYMBOL)}</${tag}>`
})
.replace(CODEBLOCK_RE, (m, lang, txt) => {
// 还原换行
let rollback = txt.replace(NODE_BR_EXP, '\n').replace(ATTR_BR_EXP, '\n')
return '```' + lang + rollback + '```'
})
.replace(BLOCK_RE, (m, tag, attr, txt) => {
return `<${tag + attr.replace(ATTR_BR_EXP, ' ')}>${txt
.replace(NODE_BR_EXP, '\n')
.replace(ATTR_BR_EXP, ' ')}</${tag}>`
})
}
function trimBr(str) {
return str.endsWith('<br>') ? str.slice(0, -4) : str
}
class Tool {
@ -213,10 +194,10 @@ class Tool {
emptyLineLength = 0
if (tmp.startsWith('```')) {
if (isCodeBlock) {
list.push('</xmp></wc-code>')
list.push('\n</xmp></wc-code>\n\n')
} else {
list.push(
tmp.replace(/^```([\w\#\-]*?)$/, `<wc-code lang="$1"><xmp>`)
tmp.replace(/^```([\w\#\-]*?)$/, `\n<wc-code lang="$1"><xmp>`)
)
}
isCodeBlock = !isCodeBlock
@ -233,9 +214,9 @@ class Tool {
thead.shift()
thead.pop()
list.push(
`<table><thead><tr>${thead
`\n<table>\n<thead><tr>${thead
.map(_ => `<th>${_}</th>`)
.join('')}</tr></thead><tbody>`
.join('')}</tr></thead>\n<tbody>`
)
isTable = true
continue
@ -270,7 +251,7 @@ class Tool {
} else {
if (isTable) {
isTable = false
list.push('</tbody></table>')
list.push('</tbody>\n</table>\n')
continue
}
if (list.length === 0 || (!isCodeBlock && emptyLineLength > 1)) {
@ -299,6 +280,7 @@ class Tool {
var isQuoteList = false // 引用中的列表, 只支持一层级
var quoteListStyle = 0 // 1有序, 2 无序
var isHtmlBlock = false // 是否原生html代码块
//
for (let it of this.list) {
@ -343,10 +325,11 @@ class Tool {
// wc-code标签直接拼接, 判断时多拼一个 < 和 >,
// 是为了避免在 wc-markd嵌入代码块示例时, 将其内容编译为html
if (~it.indexOf('<wc-code') || ~it.indexOf('wc-code>')) {
if (it.includes('<wc-code') || it.includes('wc-code>')) {
// 如果之前未闭合的段落, 先闭合
if (isParagraph) {
isParagraph = false
html += '</p>'
html += '</p>\n'
}
html += it
isCodeBlock = !isCodeBlock
@ -374,9 +357,13 @@ class Tool {
let head = Decoder.head(it)
if (head) {
isParagraph = false
html = trimBr(html)
if (isParagraph) {
isParagraph = false
html += '</p>'
}
html += head
// console.log(html)
continue
}
@ -408,7 +395,7 @@ class Tool {
if (innerQuote) {
// 之前有引用的列表时, 直接结束列表
if (isQuoteList) {
html += `</${quoteListStyle === 1 ? 'ul' : 'ul'}>`
html += `</${quoteListStyle === 1 ? 'ul' : 'ol'}>\n`
isQuoteList = false
}
}
@ -423,9 +410,8 @@ class Tool {
var qlist = ''
// 已有列表
if (isQuoteList) {
// 因为只支持一层级的列表, 所以同一级别不区分有序无序, 强制统一
} else {
// 因为只支持一层级的列表, 所以同一级别不区分有序无序, 强制统一
if (isQuoteList === false) {
isQuoteList = true
if (currListStyle === 1) {
qlist += '<ol>'
@ -515,11 +501,18 @@ class Tool {
}
if (isParagraph) {
html += `${it}<br>`
html += `${it}\n`
} else {
html += `<p>${it}<br>`
if (isHtmlBlock || SINGLE_LINE_HTML_RE.test(it)) {
html += `${it}\n`
} else if (MULTI_LINE_HTML_RE.test(it)) {
isHtmlBlock = !isHtmlBlock
html += `${it}\n`
} else {
html += `\n<p>\n${it}`
isParagraph = true
}
}
isParagraph = true
} else {
// 如果是在代码中, 直接拼接, 并加上换行
if (isCodeBlock) {
@ -534,7 +527,7 @@ class Tool {
emptyLineLength = 0
while (blockquoteLevel > 0) {
blockquoteLevel--
html += '</blockquote>'
html += '</blockquote>\n'
}
}
continue
@ -544,10 +537,10 @@ class Tool {
if (emptyLineLength > 1) {
while (orderListLevel > -1 || unorderListLevel > -1) {
if (orderListLevel > unorderListLevel) {
html += '</ol>'
html += '</ol>\n'
orderListLevel--
} else {
html += '</ul>'
html += '</ul>\n'
unorderListLevel--
}
}
@ -560,24 +553,24 @@ class Tool {
//
if (isParagraph) {
if (emptyLineLength > 1) {
emptyLineLength = 0
isParagraph = false
html += '</p>'
html = trimBr(html)
html += '\n</p>\n'
} else {
html += '<br>'
}
}
}
}
}
// 修正内嵌样式
html = html.replace(STYLE_RE, (m, code) => {
return `<style>${code
.replace(/<br>/g, '')
.replace(/<p>/g, '')
.replace(/<\/p>/g, '')}</style>`
})
if (isParagraph) {
html += '</p>'
}
delete this.list
delete this.__LINKS__
return html
return html.trim()
}
}