UNPKG

very-small-parser

Version:

A very small Markdown, HTML, and CSS parser.

77 lines (76 loc) 2.47 kB
import { token } from '../util'; import * as reg from '../markdown/regex'; const REG_COMMENT = /^<!--(?!-?>)[\s\S]*?-->/; const comment = (_, src) => { const matches = src.match(REG_COMMENT); if (matches) { const match = matches[0]; const value = match.slice(4, -3); return token(match, 'comment', void 0, { value }); } }; const REG_TEXT = /^[^<]+/; const text = (dhe) => (_, src) => { const matches = src.match(REG_TEXT); if (!matches) return; let value = matches[0]; if (dhe) value = dhe(value); return token(value, 'text', void 0, { value }, value.length); }; const unescapeAttr = (str) => { return str .replace(/&lt;/g, '<') .replace(/&gt;/g, '>') .replace(/&quot;/g, '"') .replace(/&apos;/g, "'") .replace(/&amp;/g, '&') .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(code)); }; const REG_ATTR = / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/; const REG_OPEN_TAG = reg.replace(/^<([a-z][\w-]*)(?:attr)*? *(\/?)>/, { attr: REG_ATTR }); const REG_ATTRS = /([\w|data-]+)=["']?((?:.(?!["']?\s+(?:\S+)=|\s*\/?[>"']))*.)["']?/gm; const REG_CLOSE_TAG = /^<\/([a-z][\w-]*)>/; export const el = (parser, src) => { const matchOpen = src.match(REG_OPEN_TAG); if (!matchOpen) return; const [match, tagName, selfClosing] = matchOpen; const matchLength = match.length; const attrSrc = match.slice(tagName.length + 1, -1 - selfClosing.length); const properties = {}; if (attrSrc) { const attrs = attrSrc.matchAll(REG_ATTRS); for (const [, key, value] of attrs) properties[key] = unescapeAttr(value); } const token = { type: 'element', tagName, properties, children: [], len: matchLength, }; if (!selfClosing) { const substr = src.slice(matchLength); const fragment = parser.parsef(substr); const fragmentLen = fragment.len; if (selfClosing) { token.len += fragment.len; } else { const matchClose = substr.slice(fragmentLen).match(REG_CLOSE_TAG); if (!matchClose) return token; token.len += fragment.len + (matchClose?.[0].length ?? 0); } token.children = fragment.children; } return token; }; export const parsers = (dhe) => [ text(dhe), comment, el, ];