优化markd解析器

master
yutent 2023-04-21 19:15:21 +08:00
parent f3a8723b03
commit 1d691ccac7
1 changed files with 50 additions and 57 deletions

View File

@ -15,6 +15,8 @@ const CODEBLOCK_RE = /```(.*?)([\w\W]*?)```/g
const BLOCK_RE = /<([\w\-]+)([^>]*?)>([\w\W]*?)<\/\1>/g const BLOCK_RE = /<([\w\-]+)([^>]*?)>([\w\W]*?)<\/\1>/g
const IS_DOM_RE = /^<([\w\-]+)[^>]*?>.*?<\/\1>$/ const IS_DOM_RE = /^<([\w\-]+)[^>]*?>.*?<\/\1>$/
const STYLE_RE = /<style[^>]*?>([\w\W]*?)<\/style>/g const STYLE_RE = /<style[^>]*?>([\w\W]*?)<\/style>/g
const SINGLE_LINE_HTML_RE = /^\s*?<([\w\-]+)[^>]*?>.*?<\/\1>\s*?$/
const MULTI_LINE_HTML_RE = /^\s*?<\/?[^>]*?>/
const INLINE = { const INLINE = {
code: /`([^`]*?[^`\\\s])`/g, code: /`([^`]*?[^`\\\s])`/g,
@ -27,11 +29,6 @@ const INLINE = {
qlist: /((<blockquote class="md\-quote">)*?)([\+\-\*]|\d+\.) (.*)/ // 引用中的列表 qlist: /((<blockquote class="md\-quote">)*?)([\+\-\*]|\d+\.) (.*)/ // 引用中的列表
} }
const ATTR_BR_SYMBOL = '⨨☇'
const NODE_BR_SYMBOL = '⨨⤶'
const ATTR_BR_EXP = new RegExp(ATTR_BR_SYMBOL, 'g')
const NODE_BR_EXP = new RegExp(NODE_BR_SYMBOL, 'g')
const Helper = { const Helper = {
// 是否分割线 // 是否分割线
isHr(str) { isHr(str) {
@ -127,7 +124,7 @@ const Decoder = {
}, },
// 分割线 // 分割线
hr(name = '') { hr(name = '') {
return `<fieldset class="md-hr"><legend name="${name}"></legend></fieldset>` return `\n\n<fieldset class="md-hr"><legend name="${name}"></legend></fieldset>\n\n`
}, },
// 标题 // 标题
head(str) { head(str) {
@ -140,16 +137,13 @@ const Decoder = {
if (level === 1) { if (level === 1) {
return `<h1>${m2}</h1>` return `<h1>${m2}</h1>`
} else { } else {
return `<h${level}><a href="#${hash}" id="${hash}" class="md-head-link">${m2}</a></h${level}>` return `\n\n<h${level}><a href="#${hash}" id="${hash}" class="md-head-link">${m2}</a></h${level}>\n`
} }
}) })
} }
return false return false
}, },
// 引用模块
blockquote(str) {
//
},
// 任务 // 任务
task(str) { task(str) {
var todoChecked = Helper.isTodo(str) var todoChecked = Helper.isTodo(str)
@ -158,7 +152,7 @@ const Decoder = {
var stat = todoChecked === 1 ? 'checked' : '' var stat = todoChecked === 1 ? 'checked' : ''
var txt = todoChecked === 1 ? `<del>${word}</del>` : word var txt = todoChecked === 1 ? `<del>${word}</del>` : word
return `<section><wc-checkbox readonly ${stat}>${txt}</wc-checkbox></section>` return `\n<section><wc-checkbox readonly ${stat}>${txt}</wc-checkbox></section>\n`
} }
return false return false
} }
@ -171,23 +165,10 @@ function fixed(str) {
.replace(/\t/g, ' ') .replace(/\t/g, ' ')
.replace(/\u00a0/g, ' ') .replace(/\u00a0/g, ' ')
.replace(/\u2424/g, '\n') .replace(/\u2424/g, '\n')
.replace(TAG_RE, (m, name, attr) => { }
// 标签内的换行, 转为一组特殊字符, 方便后面还原
return `<${name + attr.replace(/\n/g, ATTR_BR_SYMBOL)}>` function trimBr(str) {
}) return str.endsWith('<br>') ? str.slice(0, -4) : str
.replace(BLOCK_RE, (m, tag, attr, txt) => {
return `<${tag + attr}>${txt.replace(/\n/g, NODE_BR_SYMBOL)}</${tag}>`
})
.replace(CODEBLOCK_RE, (m, lang, txt) => {
// 还原换行
let rollback = txt.replace(NODE_BR_EXP, '\n').replace(ATTR_BR_EXP, '\n')
return '```' + lang + rollback + '```'
})
.replace(BLOCK_RE, (m, tag, attr, txt) => {
return `<${tag + attr.replace(ATTR_BR_EXP, ' ')}>${txt
.replace(NODE_BR_EXP, '\n')
.replace(ATTR_BR_EXP, ' ')}</${tag}>`
})
} }
class Tool { class Tool {
@ -213,10 +194,10 @@ class Tool {
emptyLineLength = 0 emptyLineLength = 0
if (tmp.startsWith('```')) { if (tmp.startsWith('```')) {
if (isCodeBlock) { if (isCodeBlock) {
list.push('</xmp></wc-code>') list.push('\n</xmp></wc-code>\n\n')
} else { } else {
list.push( list.push(
tmp.replace(/^```([\w\#\-]*?)$/, `<wc-code lang="$1"><xmp>`) tmp.replace(/^```([\w\#\-]*?)$/, `\n<wc-code lang="$1"><xmp>`)
) )
} }
isCodeBlock = !isCodeBlock isCodeBlock = !isCodeBlock
@ -233,9 +214,9 @@ class Tool {
thead.shift() thead.shift()
thead.pop() thead.pop()
list.push( list.push(
`<table><thead><tr>${thead `\n<table>\n<thead><tr>${thead
.map(_ => `<th>${_}</th>`) .map(_ => `<th>${_}</th>`)
.join('')}</tr></thead><tbody>` .join('')}</tr></thead>\n<tbody>`
) )
isTable = true isTable = true
continue continue
@ -270,7 +251,7 @@ class Tool {
} else { } else {
if (isTable) { if (isTable) {
isTable = false isTable = false
list.push('</tbody></table>') list.push('</tbody>\n</table>\n')
continue continue
} }
if (list.length === 0 || (!isCodeBlock && emptyLineLength > 1)) { if (list.length === 0 || (!isCodeBlock && emptyLineLength > 1)) {
@ -299,6 +280,7 @@ class Tool {
var isQuoteList = false // 引用中的列表, 只支持一层级 var isQuoteList = false // 引用中的列表, 只支持一层级
var quoteListStyle = 0 // 1有序, 2 无序 var quoteListStyle = 0 // 1有序, 2 无序
var isHtmlBlock = false // 是否原生html代码块
// //
for (let it of this.list) { for (let it of this.list) {
@ -343,10 +325,11 @@ class Tool {
// wc-code标签直接拼接, 判断时多拼一个 < 和 >, // wc-code标签直接拼接, 判断时多拼一个 < 和 >,
// 是为了避免在 wc-markd嵌入代码块示例时, 将其内容编译为html // 是为了避免在 wc-markd嵌入代码块示例时, 将其内容编译为html
if (~it.indexOf('<wc-code') || ~it.indexOf('wc-code>')) { if (it.includes('<wc-code') || it.includes('wc-code>')) {
// 如果之前未闭合的段落, 先闭合
if (isParagraph) { if (isParagraph) {
isParagraph = false isParagraph = false
html += '</p>' html += '</p>\n'
} }
html += it html += it
isCodeBlock = !isCodeBlock isCodeBlock = !isCodeBlock
@ -374,9 +357,13 @@ class Tool {
let head = Decoder.head(it) let head = Decoder.head(it)
if (head) { if (head) {
html = trimBr(html)
if (isParagraph) {
isParagraph = false isParagraph = false
html += '</p>'
}
html += head html += head
// console.log(html)
continue continue
} }
@ -408,7 +395,7 @@ class Tool {
if (innerQuote) { if (innerQuote) {
// 之前有引用的列表时, 直接结束列表 // 之前有引用的列表时, 直接结束列表
if (isQuoteList) { if (isQuoteList) {
html += `</${quoteListStyle === 1 ? 'ul' : 'ul'}>` html += `</${quoteListStyle === 1 ? 'ul' : 'ol'}>\n`
isQuoteList = false isQuoteList = false
} }
} }
@ -423,9 +410,8 @@ class Tool {
var qlist = '' var qlist = ''
// 已有列表 // 已有列表
if (isQuoteList) {
// 因为只支持一层级的列表, 所以同一级别不区分有序无序, 强制统一 // 因为只支持一层级的列表, 所以同一级别不区分有序无序, 强制统一
} else { if (isQuoteList === false) {
isQuoteList = true isQuoteList = true
if (currListStyle === 1) { if (currListStyle === 1) {
qlist += '<ol>' qlist += '<ol>'
@ -515,11 +501,18 @@ class Tool {
} }
if (isParagraph) { if (isParagraph) {
html += `${it}<br>` html += `${it}\n`
} else { } else {
html += `<p>${it}<br>` if (isHtmlBlock || SINGLE_LINE_HTML_RE.test(it)) {
} html += `${it}\n`
} else if (MULTI_LINE_HTML_RE.test(it)) {
isHtmlBlock = !isHtmlBlock
html += `${it}\n`
} else {
html += `\n<p>\n${it}`
isParagraph = true isParagraph = true
}
}
} else { } else {
// 如果是在代码中, 直接拼接, 并加上换行 // 如果是在代码中, 直接拼接, 并加上换行
if (isCodeBlock) { if (isCodeBlock) {
@ -534,7 +527,7 @@ class Tool {
emptyLineLength = 0 emptyLineLength = 0
while (blockquoteLevel > 0) { while (blockquoteLevel > 0) {
blockquoteLevel-- blockquoteLevel--
html += '</blockquote>' html += '</blockquote>\n'
} }
} }
continue continue
@ -544,10 +537,10 @@ class Tool {
if (emptyLineLength > 1) { if (emptyLineLength > 1) {
while (orderListLevel > -1 || unorderListLevel > -1) { while (orderListLevel > -1 || unorderListLevel > -1) {
if (orderListLevel > unorderListLevel) { if (orderListLevel > unorderListLevel) {
html += '</ol>' html += '</ol>\n'
orderListLevel-- orderListLevel--
} else { } else {
html += '</ul>' html += '</ul>\n'
unorderListLevel-- unorderListLevel--
} }
} }
@ -560,24 +553,24 @@ class Tool {
// //
if (isParagraph) { if (isParagraph) {
if (emptyLineLength > 1) { if (emptyLineLength > 1) {
emptyLineLength = 0
isParagraph = false isParagraph = false
html += '</p>' html = trimBr(html)
html += '\n</p>\n'
} else {
html += '<br>'
} }
} }
} }
} }
} }
// 修正内嵌样式 if (isParagraph) {
html = html.replace(STYLE_RE, (m, code) => { html += '</p>'
return `<style>${code }
.replace(/<br>/g, '')
.replace(/<p>/g, '')
.replace(/<\/p>/g, '')}</style>`
})
delete this.list delete this.list
delete this.__LINKS__ delete this.__LINKS__
return html return html.trim()
} }
} }