UNPKG

very-small-parser

Version:

A very small Markdown, HTML, and CSS parser.

195 lines (194 loc) 7.83 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.parsers = void 0; const util_1 = require("../../util"); const regex_1 = require("../regex"); const html_1 = require("../../html"); const REG_INLINE_CODE = /^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/; const inlineCode = (_, value) => { const matches = value.match(REG_INLINE_CODE); if (!matches) return; return (0, util_1.token)(matches[0], 'inlineCode', void 0, { value: matches[2], wrap: matches[1], }); }; const REG_STRONG = /^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)|^__([^\s])__(?!_)|^\*\*([^\s])\*\*(?!\*)/; const strong = (parser, value) => { const matches = value.match(REG_STRONG); if (!matches) return; const subvalue = matches[4] || matches[3] || matches[2] || matches[1]; return (0, util_1.token)(matches[0], 'strong', parser.parse(subvalue)); }; const REG_EMPHASIS = /^_([^\s][\s\S]*?[^\s_])_(?!_)|^_([^\s_][\s\S]*?[^\s])_(?!_)|^\*([^\s][\s\S]*?[^\s*])\*(?!\*)|^\*([^\s*][\s\S]*?[^\s])\*(?!\*)|^_([^\s_])_(?!_)|^\*([^\s*])\*(?!\*)/; const emphasis = (parser, value) => { const matches = value.match(REG_EMPHASIS); if (!matches) return; const subvalue = matches[6] || matches[5] || matches[4] || matches[3] || matches[2] || matches[1]; return (0, util_1.token)(matches[0], 'emphasis', parser.parse(subvalue)); }; const REG_DELETE = /^~~(?=\S)([\s\S]*?\S)~~/; const deletedText = (parser, value) => { const matches = value.match(REG_DELETE); if (matches) return (0, util_1.token)(matches[0], 'delete', parser.parse(matches[1])); }; const REG_SPOILER = /^(?:(?:\|\|(?=\S)([\s\S]*)\|\|)|(?:\>\!(?=\S)([\s\S]*)\!\<))/; const spoiler = (parser, value) => { const matches = value.match(REG_SPOILER); if (!matches) return; const content = matches[1] || matches[2]; return (0, util_1.token)(matches[0], 'spoiler', parser.parse(content)); }; const REG_INLINE_MATH = /^\${1,2}(?=\S)([\s\S]*?\S)\${1,2}/; const inlineMath = (parser, value) => { const matches = value.match(REG_INLINE_MATH); if (matches) return (0, util_1.token)(matches[0], 'inlineMath', void 0, { value: matches[1] }); }; const REG_FOOTNOTE_REFERENCE = /^\[\^([a-zA-Z0-9\-_]{1,64})\]/; const footnoteReference = (parser, value) => { const matches = value.match(REG_FOOTNOTE_REFERENCE); if (!matches) return; const label = matches[1]; const identifier = label.toLowerCase(); return (0, util_1.token)(matches[0], 'footnoteReference', void 0, { label, identifier }); }; const REG_REFERENCE = (0, regex_1.replace)(/^!?\[(label)\]\s*(\[([^\]]*)\])?/, { label: regex_1.label }); const reference = (parser, value) => { const matches = value.match(REG_REFERENCE); if (!matches) return; const subvalue = matches[0]; const isImage = subvalue[0] === '!'; const type = isImage ? 'imageReference' : 'linkReference'; let identifier = matches[3]; let referenceType = 'full'; let children = void 0; if (!identifier) { identifier = matches[1]; referenceType = matches[2] ? 'collapsed' : 'shortcut'; } const overrides = { identifier, referenceType }; if (isImage) overrides.alt = matches[1] || null; else children = parser.parse(matches[1]); return (0, util_1.token)(subvalue, type, children, overrides); }; const REG_INLINE_LINK = new RegExp('^' + regex_1.urlInline.source); const inlineLink = (_, value) => { const matches = value.match(REG_INLINE_LINK); if (!matches) return; const subvalue = matches[0]; return (0, util_1.token)(subvalue, 'inlineLink', void 0, { value: subvalue }); }; const REG_SUP = /^\^(?=\S)([\s\S]*?\S)\^/; const sup = (0, util_1.regexParser)('sup', REG_SUP, 1); const REG_SUB = /^~(?=\S)([\s\S]*?\S)~/; const sub = (0, util_1.regexParser)('sub', REG_SUB, 1); const REG_MARK = /^==(?=\S)([\s\S]*?\S)==/; const mark = (0, util_1.regexParser)('mark', REG_MARK, 1); const REG_HANDLE = /^([#~@])(?![#~@])(([\w\-_\.\/#]{1,64})|(\{([\w\-_\.\/#=\/ ]{1,64})\}))/; const handle = (_, value) => { const matches = value.match(REG_HANDLE); if (!matches) return; const subvalue = matches[5] || matches[2]; return (0, util_1.token)(matches[0], 'handle', void 0, { value: subvalue, prefix: matches[1] }); }; const REG_UNDERLINE = /^\+\+(?=\S)([\s\S]*?\S)\+\+/; const underline = (0, util_1.regexParser)('underline', REG_UNDERLINE, 1); const REG1_BREAK1 = /^\s{2,}\r?\n(?!\s*$)/; const REG_BREAK2 = /^\s*\\n/; const inlineBreak = (_, value) => { const matches = value.match(REG1_BREAK1) || value.match(REG_BREAK2); if (matches) return (0, util_1.token)(matches[0], 'break'); }; const icon = (maxLength = 32) => { const REG_ICON1 = new RegExp(`^::([^'\\s:]{1,${maxLength}}?)::`); const REG_ICON2 = new RegExp(`^:([^'\\s:]{1,${maxLength}}?):`); return (_, value) => { const matches = value.match(REG_ICON1) || value.match(REG_ICON2); if (matches) return (0, util_1.token)(matches[0], 'icon', void 0, { emoji: matches[1] }); }; }; // biome-ignore lint: allow control characters in regexp const REG_URL = /\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)/; const REG_LINK = (0, regex_1.replace)(/^!?\[(r1)\]\(r2(?:\s+(title))?\s*\)/, { r1: regex_1.label, r2: REG_URL, title: regex_1.title }); const link = (parser, value) => { const matches = value.match(REG_LINK); if (!matches) return; const isImage = matches[0][0] === '!'; let linkTitle = matches[3]; if (linkTitle) linkTitle = linkTitle.slice(1, -1); if (isImage) return (0, util_1.token)(matches[0], 'image', void 0, { url: matches[2], alt: matches[1], title: linkTitle, }); return (0, util_1.token)(matches[0], 'link', parser.parse(matches[1]), { url: matches[2], title: linkTitle, }); }; const smarttext = (text) => // biome-ignore format: keep functional formatting (0, util_1.repAll)('...', '…', (0, util_1.repAll)('(P)', '§', (0, util_1.repAll)('+-', '±', (0, util_1.repAll)('--', '–', (0, util_1.repAll)('---', '—', (0, util_1.repAll)("'", '’', (0, util_1.repAll)('"', '”', (0, util_1.rep)(/\(c\)/gi, '©', (0, util_1.rep)(/\(r\)/gi, '®', (0, util_1.rep)(/\(tm\)/gi, '™', (0, util_1.rep)(/^'(?=\S)/, '\u2018', // opening singles (0, util_1.rep)(/^"(?=\S)/, '\u201c', // opening doubles text)))))))))))); const REG_NEWLINE = /\s{0,2}\r?\n/g; const newlineReplacer = (newline) => (newline[0] === ' ' && newline[1] === ' ' ? '\n' : ' '); const REG_TEXT = new RegExp('^[\\s\\S]+?(?=[\\<>!\\[_*`:~\\|#@\\$\\^=\\+]| {2,}\\n|(' + regex_1.urlInline.source + ')|\\\\n|\\\\`|$)'); const text = (dhe) => (eat, src) => { const matches = src.match(REG_TEXT); if (!matches) return; const match = matches[0]; let value = match.replace(REG_NEWLINE, newlineReplacer); value = smarttext(value); if (dhe) value = dhe(value); return (0, util_1.token)(match, 'text', void 0, { value }, match.length); }; const REG_ESCAPE = /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/; const inlineEscape = (_, value) => { const matches = value.match(REG_ESCAPE); if (matches) return (0, util_1.token)(matches[0], 'text', void 0, { value: matches[1] }); }; const html = (_, src) => html_1.html.el(src); const parsers = (dhe) => [ inlineEscape, inlineCode, strong, emphasis, spoiler, deletedText, inlineMath, footnoteReference, link, reference, inlineLink, sup, sub, mark, handle, underline, inlineBreak, icon(), html, text(dhe), ]; exports.parsers = parsers;