very-small-parser
Version:
A very small Markdown, HTML, and CSS parser.
191 lines (190 loc) • 7.3 kB
JavaScript
import { regexParser, rep, repAll, token } from '../../util';
import { replace, label, urlInline, title } from '../regex';
import { html as htmlParser } from '../../html';
const REG_INLINE_CODE = /^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/;
const inlineCode = (_, value) => {
const matches = value.match(REG_INLINE_CODE);
if (!matches)
return;
return token(matches[0], 'inlineCode', void 0, {
value: matches[2],
wrap: matches[1],
});
};
const REG_STRONG = /^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)|^__([^\s])__(?!_)|^\*\*([^\s])\*\*(?!\*)/;
const strong = (parser, value) => {
const matches = value.match(REG_STRONG);
if (!matches)
return;
const subvalue = matches[4] || matches[3] || matches[2] || matches[1];
return token(matches[0], 'strong', parser.parse(subvalue));
};
const REG_EMPHASIS = /^_([^\s][\s\S]*?[^\s_])_(?!_)|^_([^\s_][\s\S]*?[^\s])_(?!_)|^\*([^\s][\s\S]*?[^\s*])\*(?!\*)|^\*([^\s*][\s\S]*?[^\s])\*(?!\*)|^_([^\s_])_(?!_)|^\*([^\s*])\*(?!\*)/;
const emphasis = (parser, value) => {
const matches = value.match(REG_EMPHASIS);
if (!matches)
return;
const subvalue = matches[6] || matches[5] || matches[4] || matches[3] || matches[2] || matches[1];
return token(matches[0], 'emphasis', parser.parse(subvalue));
};
const REG_DELETE = /^~~(?=\S)([\s\S]*?\S)~~/;
const deletedText = (parser, value) => {
const matches = value.match(REG_DELETE);
if (matches)
return token(matches[0], 'delete', parser.parse(matches[1]));
};
const REG_SPOILER = /^(?:(?:\|\|(?=\S)([\s\S]*)\|\|)|(?:\>\!(?=\S)([\s\S]*)\!\<))/;
const spoiler = (parser, value) => {
const matches = value.match(REG_SPOILER);
if (!matches)
return;
const content = matches[1] || matches[2];
return token(matches[0], 'spoiler', parser.parse(content));
};
const REG_INLINE_MATH = /^\${1,2}(?=\S)([\s\S]*?\S)\${1,2}/;
const inlineMath = (parser, value) => {
const matches = value.match(REG_INLINE_MATH);
if (matches)
return token(matches[0], 'inlineMath', void 0, { value: matches[1] });
};
const REG_FOOTNOTE_REFERENCE = /^\[\^([a-zA-Z0-9\-_]{1,64})\]/;
const footnoteReference = (parser, value) => {
const matches = value.match(REG_FOOTNOTE_REFERENCE);
if (!matches)
return;
const label = matches[1];
const identifier = label.toLowerCase();
return token(matches[0], 'footnoteReference', void 0, { label, identifier });
};
const REG_REFERENCE = replace(/^!?\[(label)\]\s*(\[([^\]]*)\])?/, { label });
const reference = (parser, value) => {
const matches = value.match(REG_REFERENCE);
if (!matches)
return;
const subvalue = matches[0];
const isImage = subvalue[0] === '!';
const type = isImage ? 'imageReference' : 'linkReference';
let identifier = matches[3];
let referenceType = 'full';
let children = void 0;
if (!identifier) {
identifier = matches[1];
referenceType = matches[2] ? 'collapsed' : 'shortcut';
}
const overrides = { identifier, referenceType };
if (isImage)
overrides.alt = matches[1] || null;
else
children = parser.parse(matches[1]);
return token(subvalue, type, children, overrides);
};
const REG_INLINE_LINK = new RegExp('^' + urlInline.source);
const inlineLink = (_, value) => {
const matches = value.match(REG_INLINE_LINK);
if (!matches)
return;
const subvalue = matches[0];
return token(subvalue, 'inlineLink', void 0, { value: subvalue });
};
const REG_SUP = /^\^(?=\S)([\s\S]*?\S)\^/;
const sup = regexParser('sup', REG_SUP, 1);
const REG_SUB = /^~(?=\S)([\s\S]*?\S)~/;
const sub = regexParser('sub', REG_SUB, 1);
const REG_MARK = /^==(?=\S)([\s\S]*?\S)==/;
const mark = regexParser('mark', REG_MARK, 1);
const REG_HANDLE = /^([#~@])(?![#~@])(([\w\-_\.\/#]{1,64})|(\{([\w\-_\.\/#=\/ ]{1,64})\}))/;
const handle = (_, value) => {
const matches = value.match(REG_HANDLE);
if (!matches)
return;
const subvalue = matches[5] || matches[2];
return token(matches[0], 'handle', void 0, { value: subvalue, prefix: matches[1] });
};
const REG_UNDERLINE = /^\+\+(?=\S)([\s\S]*?\S)\+\+/;
const underline = regexParser('underline', REG_UNDERLINE, 1);
const REG1_BREAK1 = /^\s{2,}\r?\n(?!\s*$)/;
const REG_BREAK2 = /^\s*\\n/;
const inlineBreak = (_, value) => {
const matches = value.match(REG1_BREAK1) || value.match(REG_BREAK2);
if (matches)
return token(matches[0], 'break');
};
const icon = (maxLength = 32) => {
const REG_ICON1 = new RegExp(`^::([^'\\s:]{1,${maxLength}}?)::`);
const REG_ICON2 = new RegExp(`^:([^'\\s:]{1,${maxLength}}?):`);
return (_, value) => {
const matches = value.match(REG_ICON1) || value.match(REG_ICON2);
if (matches)
return token(matches[0], 'icon', void 0, { emoji: matches[1] });
};
};
// biome-ignore lint: allow control characters in regexp
const REG_URL = /\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)/;
const REG_LINK = replace(/^!?\[(r1)\]\(r2(?:\s+(title))?\s*\)/, { r1: label, r2: REG_URL, title });
const link = (parser, value) => {
const matches = value.match(REG_LINK);
if (!matches)
return;
const isImage = matches[0][0] === '!';
let linkTitle = matches[3];
if (linkTitle)
linkTitle = linkTitle.slice(1, -1);
if (isImage)
return token(matches[0], 'image', void 0, {
url: matches[2],
alt: matches[1],
title: linkTitle,
});
return token(matches[0], 'link', parser.parse(matches[1]), {
url: matches[2],
title: linkTitle,
});
};
const smarttext = (text) =>
// biome-ignore format: keep functional formatting
repAll('...', '…', repAll('(P)', '§', repAll('+-', '±', repAll('--', '–', repAll('---', '—', repAll("'", '’', repAll('"', '”', rep(/\(c\)/gi, '©', rep(/\(r\)/gi, '®', rep(/\(tm\)/gi, '™', rep(/^'(?=\S)/, '\u2018', // opening singles
rep(/^"(?=\S)/, '\u201c', // opening doubles
text))))))))))));
const REG_NEWLINE = /\s{0,2}\r?\n/g;
const newlineReplacer = (newline) => (newline[0] === ' ' && newline[1] === ' ' ? '\n' : ' ');
const REG_TEXT = new RegExp('^[\\s\\S]+?(?=[\\<>!\\[_*`:~\\|#@\\$\\^=\\+]| {2,}\\n|(' + urlInline.source + ')|\\\\n|\\\\`|$)');
const text = (dhe) => (eat, src) => {
const matches = src.match(REG_TEXT);
if (!matches)
return;
const match = matches[0];
let value = match.replace(REG_NEWLINE, newlineReplacer);
value = smarttext(value);
if (dhe)
value = dhe(value);
return token(match, 'text', void 0, { value }, match.length);
};
const REG_ESCAPE = /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/;
const inlineEscape = (_, value) => {
const matches = value.match(REG_ESCAPE);
if (matches)
return token(matches[0], 'text', void 0, { value: matches[1] });
};
const html = (_, src) => htmlParser.el(src);
export const parsers = (dhe) => [
inlineEscape,
inlineCode,
strong,
emphasis,
spoiler,
deletedText,
inlineMath,
footnoteReference,
link,
reference,
inlineLink,
sup,
sub,
mark,
handle,
underline,
inlineBreak,
icon(),
html,
text(dhe),
];