UNPKG

very-small-parser

Version:

A very small Markdown, HTML, and CSS parser.

191 lines (190 loc) 7.3 kB
import { regexParser, rep, repAll, token } from '../../util'; import { replace, label, urlInline, title } from '../regex'; import { html as htmlParser } from '../../html'; const REG_INLINE_CODE = /^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/; const inlineCode = (_, value) => { const matches = value.match(REG_INLINE_CODE); if (!matches) return; return token(matches[0], 'inlineCode', void 0, { value: matches[2], wrap: matches[1], }); }; const REG_STRONG = /^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)|^__([^\s])__(?!_)|^\*\*([^\s])\*\*(?!\*)/; const strong = (parser, value) => { const matches = value.match(REG_STRONG); if (!matches) return; const subvalue = matches[4] || matches[3] || matches[2] || matches[1]; return token(matches[0], 'strong', parser.parse(subvalue)); }; const REG_EMPHASIS = /^_([^\s][\s\S]*?[^\s_])_(?!_)|^_([^\s_][\s\S]*?[^\s])_(?!_)|^\*([^\s][\s\S]*?[^\s*])\*(?!\*)|^\*([^\s*][\s\S]*?[^\s])\*(?!\*)|^_([^\s_])_(?!_)|^\*([^\s*])\*(?!\*)/; const emphasis = (parser, value) => { const matches = value.match(REG_EMPHASIS); if (!matches) return; const subvalue = matches[6] || matches[5] || matches[4] || matches[3] || matches[2] || matches[1]; return token(matches[0], 'emphasis', parser.parse(subvalue)); }; const REG_DELETE = /^~~(?=\S)([\s\S]*?\S)~~/; const deletedText = (parser, value) => { const matches = value.match(REG_DELETE); if (matches) return token(matches[0], 'delete', parser.parse(matches[1])); }; const REG_SPOILER = /^(?:(?:\|\|(?=\S)([\s\S]*)\|\|)|(?:\>\!(?=\S)([\s\S]*)\!\<))/; const spoiler = (parser, value) => { const matches = value.match(REG_SPOILER); if (!matches) return; const content = matches[1] || matches[2]; return token(matches[0], 'spoiler', parser.parse(content)); }; const REG_INLINE_MATH = /^\${1,2}(?=\S)([\s\S]*?\S)\${1,2}/; const inlineMath = (parser, value) => { const matches = value.match(REG_INLINE_MATH); if (matches) return token(matches[0], 'inlineMath', void 0, { value: matches[1] }); }; const REG_FOOTNOTE_REFERENCE = /^\[\^([a-zA-Z0-9\-_]{1,64})\]/; const footnoteReference = (parser, value) => { const matches = value.match(REG_FOOTNOTE_REFERENCE); if (!matches) return; const label = matches[1]; const identifier = label.toLowerCase(); return token(matches[0], 'footnoteReference', void 0, { label, identifier }); }; const REG_REFERENCE = replace(/^!?\[(label)\]\s*(\[([^\]]*)\])?/, { label }); const reference = (parser, value) => { const matches = value.match(REG_REFERENCE); if (!matches) return; const subvalue = matches[0]; const isImage = subvalue[0] === '!'; const type = isImage ? 'imageReference' : 'linkReference'; let identifier = matches[3]; let referenceType = 'full'; let children = void 0; if (!identifier) { identifier = matches[1]; referenceType = matches[2] ? 'collapsed' : 'shortcut'; } const overrides = { identifier, referenceType }; if (isImage) overrides.alt = matches[1] || null; else children = parser.parse(matches[1]); return token(subvalue, type, children, overrides); }; const REG_INLINE_LINK = new RegExp('^' + urlInline.source); const inlineLink = (_, value) => { const matches = value.match(REG_INLINE_LINK); if (!matches) return; const subvalue = matches[0]; return token(subvalue, 'inlineLink', void 0, { value: subvalue }); }; const REG_SUP = /^\^(?=\S)([\s\S]*?\S)\^/; const sup = regexParser('sup', REG_SUP, 1); const REG_SUB = /^~(?=\S)([\s\S]*?\S)~/; const sub = regexParser('sub', REG_SUB, 1); const REG_MARK = /^==(?=\S)([\s\S]*?\S)==/; const mark = regexParser('mark', REG_MARK, 1); const REG_HANDLE = /^([#~@])(?![#~@])(([\w\-_\.\/#]{1,64})|(\{([\w\-_\.\/#=\/ ]{1,64})\}))/; const handle = (_, value) => { const matches = value.match(REG_HANDLE); if (!matches) return; const subvalue = matches[5] || matches[2]; return token(matches[0], 'handle', void 0, { value: subvalue, prefix: matches[1] }); }; const REG_UNDERLINE = /^\+\+(?=\S)([\s\S]*?\S)\+\+/; const underline = regexParser('underline', REG_UNDERLINE, 1); const REG1_BREAK1 = /^\s{2,}\r?\n(?!\s*$)/; const REG_BREAK2 = /^\s*\\n/; const inlineBreak = (_, value) => { const matches = value.match(REG1_BREAK1) || value.match(REG_BREAK2); if (matches) return token(matches[0], 'break'); }; const icon = (maxLength = 32) => { const REG_ICON1 = new RegExp(`^::([^'\\s:]{1,${maxLength}}?)::`); const REG_ICON2 = new RegExp(`^:([^'\\s:]{1,${maxLength}}?):`); return (_, value) => { const matches = value.match(REG_ICON1) || value.match(REG_ICON2); if (matches) return token(matches[0], 'icon', void 0, { emoji: matches[1] }); }; }; // biome-ignore lint: allow control characters in regexp const REG_URL = /\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)/; const REG_LINK = replace(/^!?\[(r1)\]\(r2(?:\s+(title))?\s*\)/, { r1: label, r2: REG_URL, title }); const link = (parser, value) => { const matches = value.match(REG_LINK); if (!matches) return; const isImage = matches[0][0] === '!'; let linkTitle = matches[3]; if (linkTitle) linkTitle = linkTitle.slice(1, -1); if (isImage) return token(matches[0], 'image', void 0, { url: matches[2], alt: matches[1], title: linkTitle, }); return token(matches[0], 'link', parser.parse(matches[1]), { url: matches[2], title: linkTitle, }); }; const smarttext = (text) => // biome-ignore format: keep functional formatting repAll('...', '…', repAll('(P)', '§', repAll('+-', '±', repAll('--', '–', repAll('---', '—', repAll("'", '’', repAll('"', '”', rep(/\(c\)/gi, '©', rep(/\(r\)/gi, '®', rep(/\(tm\)/gi, '™', rep(/^'(?=\S)/, '\u2018', // opening singles rep(/^"(?=\S)/, '\u201c', // opening doubles text)))))))))))); const REG_NEWLINE = /\s{0,2}\r?\n/g; const newlineReplacer = (newline) => (newline[0] === ' ' && newline[1] === ' ' ? '\n' : ' '); const REG_TEXT = new RegExp('^[\\s\\S]+?(?=[\\<>!\\[_*`:~\\|#@\\$\\^=\\+]| {2,}\\n|(' + urlInline.source + ')|\\\\n|\\\\`|$)'); const text = (dhe) => (eat, src) => { const matches = src.match(REG_TEXT); if (!matches) return; const match = matches[0]; let value = match.replace(REG_NEWLINE, newlineReplacer); value = smarttext(value); if (dhe) value = dhe(value); return token(match, 'text', void 0, { value }, match.length); }; const REG_ESCAPE = /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/; const inlineEscape = (_, value) => { const matches = value.match(REG_ESCAPE); if (matches) return token(matches[0], 'text', void 0, { value: matches[1] }); }; const html = (_, src) => htmlParser.el(src); export const parsers = (dhe) => [ inlineEscape, inlineCode, strong, emphasis, spoiler, deletedText, inlineMath, footnoteReference, link, reference, inlineLink, sup, sub, mark, handle, underline, inlineBreak, icon(), html, text(dhe), ];