UNPKG

very-small-parser

Version:

A very small Markdown, HTML, and CSS parser.

250 lines (249 loc) 8.91 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.parsers = void 0; const tslib_1 = require("tslib"); const util_1 = require("../../util"); const reg = tslib_1.__importStar(require("../regex")); const html_1 = require("../../html"); const REG_NEWLINE = /^[\n\r]+/; const newline = (_, src) => { const matches = src.match(REG_NEWLINE); if (matches) return (0, util_1.token)(matches[0], ''); }; const REG_CODE = /^(\s{4}[^\n]+)+/; const code = (_, src) => { const matches = src.match(REG_CODE); if (!matches) return; const subvalue = matches[0]; const overrides = { value: (0, util_1.rep)(/\n+$/, '', (0, util_1.rep)(/^ {4}/gm, '', subvalue)), lang: null, }; return (0, util_1.token)(subvalue, 'code', void 0, overrides, subvalue.length); }; const REG_FENCES = /^ *(`{3,}|~{3,})([^\s]*) *([^\n]*)\n([\s\S]*?)\s*\1 *(?:\n+|$)/; const fences = (_, src) => { const matches = src.match(REG_FENCES); if (!matches) return; const subvalue = matches[0]; const overrides = { value: matches[4] || matches[3], lang: matches[2] || '', meta: matches.length > 4 ? matches[3] : null, }; return (0, util_1.token)(subvalue, 'code', void 0, overrides); }; const REG_MATH = /^ *\$\$[ \.]*(\S+)? *\n([\s\S]*?)\s*\$\$ *(?:\n+|$)/; const math = (_, src) => { const matches = src.match(REG_MATH); if (matches) return (0, util_1.token)(matches[0], 'math', void 0, { value: matches[2] }); }; const thematicBreak = (_, src) => { const matches = src.match(reg.hr); if (matches) return (0, util_1.token)(matches[0], 'thematicBreak', void 0, { value: matches[1] }); }; const REG_HEADING1 = /^ *(#{1,6}) +([^\n]+?) *(?:#+ *)?(?:\n+|$)/; const REG_HEADING2 = /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/; const heading = (parser, src) => { let matches = src.match(REG_HEADING1); if (matches) { const subvalue = matches[2]; return (0, util_1.token)(matches[0], 'heading', parser.parsei(subvalue), { depth: matches[1].length }); } matches = src.match(REG_HEADING2); if (matches) { const subvalue = matches[1]; return (0, util_1.token)(matches[0], 'heading', parser.parsei(subvalue), { depth: matches[2] === '-' ? 2 : 1, }); } }; const REG_BLOCKQUOTE = /^( *>[^\n]+(\n(?!^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +["(]([^\n]+)[")])? *(?:\n+|$))[^\n]+)*)+/; const REG_BLOCKQUOTE_OUTDENT = /^ *> ?/gm; const REG_BLOCK_SPOILER_CHECK = /^ *!/; const REG_BLOCK_SPOILER_OUTDENT = /^ *! ?/gm; const blockquote = (parser, src) => { const matches = src.match(REG_BLOCKQUOTE); if (!matches) return; const subvalue = matches[0]; let content = (0, util_1.rep)(REG_BLOCKQUOTE_OUTDENT, '', subvalue); let spoiler = false; const isPotentiallySpoiler = content.match(REG_BLOCK_SPOILER_CHECK); if (isPotentiallySpoiler) { spoiler = content.split('\n').every((line) => line.match(REG_BLOCK_SPOILER_CHECK)); if (spoiler) content = content.replace(REG_BLOCK_SPOILER_OUTDENT, ''); } const children = parser.parse(content); const node = (0, util_1.token)(subvalue, 'blockquote', children); if (spoiler) node.spoiler = spoiler; return node; }; const REG_BULLET = /^\s{0,3}([*+-]|\d{1,3}\.)\s{1,42}/; const bull = /(?:[*+-]|\d{1,3}\.)/; const REG_LOOSE = /\n\s*\n(?!\s*$)/; const REG_ITEM = reg.replace(/^( {0,333})(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/gm, { bull }); const REG_LIST = reg.replace(/^ {0,3}(?:bull) (?:(?!\r?\n\r?\n)[\s\S])+/, { bull, hr: reg.hr, def: reg.def, }); const getParts = (subvalue) => subvalue.match(REG_ITEM); const list = (parser, value) => { const matches = value.match(REG_LIST); if (!matches) return; const subvalue = matches[0]; const parts = getParts(subvalue); if (!parts) return; const length = parts.length; const children = []; let start = null; for (let i = 0; i < length; i++) { const part = parts[i]; // if (part[part.length - 1] === '\n') part = part.trimEnd(); const bulletMatch = part.match(REG_BULLET); if (!bulletMatch) return; const [bulletWithWhitespace, bullet] = bulletMatch; let content = part.slice(bulletWithWhitespace.length); if (i === 0 && bullet.length > 1) start = Number(bullet); let checked = null; if (content[0] === '[' && content[2] === ']' && content[3] === ' ') { checked = content[1] !== ' '; content = content.slice(4); } const newLinePos = content.indexOf('\n'); if (newLinePos > 0) { let outdentSize = 0; for (let pos = newLinePos + 1; pos < newLinePos + 4; pos++) if (content[pos] === ' ') outdentSize++; else break; if (outdentSize) content = (0, util_1.rep)(new RegExp('^ {1,' + outdentSize + '}', 'gm'), '', content); } children.push({ type: 'listItem', spread: REG_LOOSE.test(content), checked, children: parser.parse(content), }); } return (0, util_1.token)(subvalue, 'list', children, { ordered: start !== null, start }); }; const splitCells = (tableRow, count) => { const cells = (0, util_1.rep)(/([^\\])\|/g, '$1 |', tableRow).split(/ +\| */); if (count !== void 0) { if (cells.length > count) cells.splice(count); else while (cells.length < count) cells.push(''); } for (let i = 0; i < cells.length; i++) cells[i] = (0, util_1.rep)(/\\\|/g, '|', cells[i]); return cells; }; const REG_TABLE = /^ *\|?(.+)\n *\|?( *[-:]+[-| :]*)(?:\n((?: *[^>\n ].*(?:\n|$))*)\n*|$)/; const table = (parser, value) => { const matches = value.match(REG_TABLE); if (!matches) return; const subvalue = matches[0]; const header = matches[1]; const align = (0, util_1.rep)(/^ *|\| *$/g, '', matches[2]) .split(/ *\| */) .map((spec) => { spec = spec.trim(); return spec[0] === ':' ? spec[spec.length - 1] === ':' ? 'center' : 'left' : spec[spec.length - 1] === ':' ? 'right' : null; }); const rows = matches[3] ? (0, util_1.rep)(/(?: *\| *)?\n$/, '', matches[3]).split('\n') : []; const children = []; const headers = splitCells((0, util_1.rep)(/^ *| *\| *$/g, '', header)).map((headerText) => ({ type: 'tableCell', children: parser.parsei(headerText), })); children.push({ type: 'tableRow', children: headers, }); if (rows && rows.length) { for (let i = 0; i < rows.length; i++) { const row = rows[i]; const cells = splitCells((0, util_1.rep)(/^ *\| *| *\| *$/g, '', row), headers.length); children.push({ type: 'tableRow', children: cells.map((cellRawValue) => ({ type: 'tableCell', children: parser.parsei(cellRawValue), })), }); } } return (0, util_1.token)(subvalue, 'table', children, { align }); }; const REG_FOOTNOTE_DEF = /^\[\^([a-zA-Z0-9\-_]+)\]: *([^\n]*(\n?(( {2}([^\n]*)\n?)|\n(?!\n))*)?)/; const footnoteDefinition = (parser, value) => { const matches = value.match(REG_FOOTNOTE_DEF); if (!matches) return; const subvalue = matches[0]; const label = matches[1]; const identifier = label.toLowerCase(); const outdented = (0, util_1.rep)(/^ {1,4}/gm, '', matches[2]); const children = parser.parse(outdented); return (0, util_1.token)(subvalue, 'footnoteDefinition', children, { label, identifier }); }; const definition = (_, value) => { const matches = value.match(reg.def); if (!matches) return; const subvalue = matches[0]; const label = matches[1]; const title = matches[3]; return (0, util_1.token)(subvalue, 'definition', void 0, { label: label, identifier: label.toLowerCase(), title: title ? title.slice(1, -1) : null, url: matches[2], }); }; const html = (_, src) => html_1.html.el(src); const REG_PARAGRAPH = reg.replace(/^((?:[^\n]+(\n(?!\s{0,3}bull))?)+)\n*/, { bull }); const paragraph = (parser, value) => { const matches = value.match(REG_PARAGRAPH); if (matches) return (0, util_1.token)(matches[0], 'paragraph', parser.parsei(matches[1].trim())); }; exports.parsers = [ newline, code, fences, math, thematicBreak, heading, blockquote, list, table, footnoteDefinition, definition, html, paragraph, ];