UNPKG

ecmarkup

Version:

Custom element definitions and core utilities for markup that specifies ECMAScript and related technologies.

526 lines (525 loc) 22.8 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.walk = exports.parse = void 0; const header_parser_1 = require("./header-parser"); const tokMatcher = /(?<olist>&laquo;|«)|(?<clist>&raquo;|»)|(?<orec>\{)|(?<crec>\})|(?<oparen>\()|(?<cparen>\))|(?<and>(?:, )?and )|(?<is> is )|(?<comma>,)|(?<period>\.(?= |$))|(?<x_of>\b\w+ of )|(?<with_args> with arguments? )/u; class ParseFailure extends Error { constructor(message, offset) { super(message); this.offset = offset; } } function formatClose(close) { const mapped = close.map(c => { switch (c) { case 'clist': return 'list close'; case 'crec': return 'record close'; case 'cparen': return 'close parenthesis'; case 'eof': return 'end of line'; case 'with_args': return '"with argument(s)"'; case 'comma': return 'comma'; case 'period': return 'period'; case 'and': return '"and"'; case 'is': return '"is"'; default: return c; } }); return (0, header_parser_1.formatEnglishList)(mapped, 'or'); } function addProse(items, token) { // sometimes we determine after seeing a token that it should not have been treated as a token // in that case we want to join it with the preceding prose, if any const prev = items[items.length - 1]; if (token.type === 'prose') { if (prev == null || prev.type !== 'prose') { items.push(token); } else { const lastPartOfPrev = prev.parts[prev.parts.length - 1]; const firstPartOfThis = token.parts[0]; if ((lastPartOfPrev === null || lastPartOfPrev === void 0 ? void 0 : lastPartOfPrev.name) === 'text' && (firstPartOfThis === null || firstPartOfThis === void 0 ? void 0 : firstPartOfThis.name) === 'text') { items[items.length - 1] = { type: 'prose', parts: [ ...prev.parts.slice(0, -1), { name: 'text', contents: lastPartOfPrev.contents + firstPartOfThis.contents, location: { start: { offset: lastPartOfPrev.location.start.offset } }, }, ...token.parts.slice(1), ], }; } else { items[items.length - 1] = { type: 'prose', parts: [...prev.parts, ...token.parts], }; } } } else { addProse(items, { type: 'prose', parts: [ { name: 'text', contents: token.source, location: { start: { offset: token.offset } }, }, ], }); } } function isWhitespace(x) { return x.parts.every(p => p.name === 'text' && /^\s*$/.test(p.contents)); } function isEmpty(s) { return s.items.every(i => i.type === 'prose' && isWhitespace(i)); } function emptyThingHasNewline(s) { // only call this function on things which pass isEmpty return s.items.some(i => i.parts.some(p => p.contents.includes('\n'))); } class ExprParser { constructor(src, opNames) { this.srcIndex = 0; this.textTokOffset = null; // offset into current text node; only meaningful if srcOffset points to a text node this.next = []; this.src = src; this.opNames = opNames; } peek() { if (this.next.length === 0) { this.advance(); } return this.next[0]; } // this method is complicated because the underlying data is a sequence of ecmarkdown fragments, not a string advance() { var _a; const currentProse = []; while (this.srcIndex < this.src.length) { const tok = this.textTokOffset == null ? this.src[this.srcIndex] : { name: 'text', contents: this.src[this.srcIndex].contents.slice(this.textTokOffset), location: { start: { offset: this.src[this.srcIndex].location.start.offset + this.textTokOffset, }, }, }; const match = tok.name === 'text' ? tok.contents.match(tokMatcher) : null; if (tok.name !== 'text' || match == null) { if (!(tok.name === 'text' && tok.contents.length === 0)) { currentProse.push(tok); } ++this.srcIndex; this.textTokOffset = null; continue; } const { groups } = match; const before = tok.contents.slice(0, match.index); if (before.length > 0) { currentProse.push({ name: 'text', contents: before, location: tok.location }); } const matchKind = Object.keys(groups).find(x => groups[x] != null); if (currentProse.length > 0) { this.next.push({ type: 'prose', parts: currentProse }); } this.textTokOffset = ((_a = this.textTokOffset) !== null && _a !== void 0 ? _a : 0) + match.index + match[0].length; this.next.push({ type: matchKind, offset: tok.location.start.offset + match.index, source: groups[matchKind], }); return; } if (currentProse.length > 0) { this.next.push({ type: 'prose', parts: currentProse }); } this.next.push({ type: 'eof', offset: this.src.length === 0 ? 0 : this.src[this.src.length - 1].location.end.offset, source: '', }); } // guarantees the next token is an element of close parseSeq(close) { const items = []; while (true) { const next = this.peek(); switch (next.type) { case 'and': case 'is': case 'period': case 'with_args': case 'comma': { if (!close.includes(next.type)) { addProse(items, next); this.next.shift(); break; } if (items.length === 0) { throw new ParseFailure(`unexpected ${next.type} (expected some content for element/argument)`, next.offset); } return { type: 'seq', items }; } case 'eof': { if (items.length === 0 || !close.includes('eof')) { throw new ParseFailure(`unexpected eof (expected ${formatClose(close)})`, next.offset); } return { type: 'seq', items }; } case 'prose': { addProse(items, next); this.next.shift(); break; } case 'olist': { this.next.shift(); const elements = []; if (this.peek().type !== 'clist') { while (true) { elements.push(this.parseSeq(['clist', 'comma'])); if (this.peek().type === 'clist') { break; } this.next.shift(); } } if (elements.length > 0 && isEmpty(elements[elements.length - 1])) { if (elements.length === 1 || emptyThingHasNewline(elements[elements.length - 1])) { // allow trailing commas when followed by whitespace elements.pop(); } else { throw new ParseFailure(`unexpected list close (expected some content for element)`, this.peek().offset); } } items.push({ type: 'list', elements }); this.next.shift(); // eat the clist break; } case 'clist': { if (!close.includes('clist')) { throw new ParseFailure('unexpected list close without corresponding list open', next.offset); } return { type: 'seq', items }; } case 'oparen': { const lastPart = items[items.length - 1]; if (lastPart != null && lastPart.type === 'prose') { const callee = []; for (let i = lastPart.parts.length - 1; i >= 0; --i) { const ppart = lastPart.parts[i]; if (ppart.name === 'text') { const spaceIndex = ppart.contents.lastIndexOf(' '); if (spaceIndex !== -1) { if (spaceIndex < ppart.contents.length - 1) { const calleePart = ppart.contents.slice(spaceIndex + 1); if (!/\p{Letter}/u.test(calleePart)) { // e.g. -(x + 1) break; } lastPart.parts[i] = { name: 'text', contents: ppart.contents.slice(0, spaceIndex + 1), location: ppart.location, }; callee.unshift({ name: 'text', contents: calleePart, location: { start: { offset: ppart.location.start.offset + spaceIndex + 1 }, }, }); } break; } } else if (ppart.name === 'tag') { break; } callee.unshift(ppart); lastPart.parts.pop(); } if (callee.length > 0) { this.next.shift(); const args = []; if (this.peek().type !== 'cparen') { while (true) { args.push(this.parseSeq(['cparen', 'comma'])); if (this.peek().type === 'cparen') { break; } this.next.shift(); } } if (args.length > 0 && isEmpty(args[args.length - 1])) { if (args.length === 1 || emptyThingHasNewline(args[args.length - 1])) { // allow trailing commas when followed by a newline args.pop(); } else { throw new ParseFailure(`unexpected close parenthesis (expected some content for argument)`, this.peek().offset); } } items.push({ type: 'call', callee: { type: 'prose', parts: callee }, arguments: args, }); this.next.shift(); // eat the cparen break; } } this.next.shift(); items.push({ type: 'paren', items: this.parseSeq(['cparen']).items }); this.next.shift(); // eat the cparen break; } case 'cparen': { if (!close.includes('cparen')) { throw new ParseFailure('unexpected close parenthesis without corresponding open parenthesis', next.offset); } return { type: 'seq', items }; } case 'orec': { this.next.shift(); let type = null; const members = []; while (true) { const nextTok = this.peek(); if (nextTok.type !== 'prose') { throw new ParseFailure('expected to find record field name', nextTok.offset); } if (nextTok.parts[0].name !== 'text') { throw new ParseFailure('expected to find record field name', nextTok.parts[0].location.start.offset); } const { contents } = nextTok.parts[0]; const nameMatch = contents.match(/^\s*\[\[(?<name>\w+)\]\]\s*(?<colon>:?)/); if (nameMatch == null) { if (members.length > 0 && /^\s*$/.test(contents) && contents.includes('\n')) { // allow trailing commas when followed by a newline this.next.shift(); // eat the whitespace if (this.peek().type === 'crec') { this.next.shift(); break; } } throw new ParseFailure('expected to find record field', nextTok.parts[0].location.start.offset + contents.match(/^\s*/)[0].length); } const { name, colon } = nameMatch.groups; if (members.find(x => x.name === name)) { throw new ParseFailure(`duplicate record field name ${name}`, nextTok.parts[0].location.start.offset + contents.match(/^\s*\[\[/)[0].length); } const shortenedText = nextTok.parts[0].contents.slice(nameMatch[0].length); const offset = nextTok.parts[0].location.start.offset + nameMatch[0].length; if (shortenedText.length === 0 && nextTok.parts.length === 1) { this.next.shift(); } else if (shortenedText.length === 0) { this.next[0] = { type: 'prose', parts: nextTok.parts.slice(1), }; } else { const shortened = { name: 'text', contents: shortenedText, location: { start: { offset }, }, }; this.next[0] = { type: 'prose', parts: [shortened, ...nextTok.parts.slice(1)], }; } if (colon) { if (type == null) { type = 'record'; } else if (type === 'record-spec') { throw new ParseFailure('record field has value but preceding field does not', offset - 1); } const value = this.parseSeq(['crec', 'comma']); if (value.items.length === 0) { throw new ParseFailure('expected record field to have value', offset); } members.push({ name, value }); } else { if (type == null) { type = 'record-spec'; } else if (type === 'record') { throw new ParseFailure('expected record field to have value', offset - 1); } members.push({ name }); if (!['crec', 'comma'].includes(this.peek().type)) { throw new ParseFailure(`expected ${formatClose(['crec', 'comma'])}`, offset); } } if (this.peek().type === 'crec') { break; } this.next.shift(); // eat the comma } // @ts-ignore typing this correctly is annoying items.push({ type, members }); this.next.shift(); // eat the crec break; } case 'crec': { if (!close.includes('crec')) { throw new ParseFailure('unexpected end of record without corresponding start of record', next.offset); } return { type: 'seq', items }; } case 'x_of': { this.next.shift(); const callee = next.source.split(' ')[0]; if (!this.opNames.has(callee)) { addProse(items, next); break; } const parseNode = this.parseSeq([ 'eof', 'period', 'comma', 'cparen', 'clist', 'crec', 'with_args', ]); const args = []; if (this.peek().type === 'with_args') { this.next.shift(); while (true) { args.push(this.parseSeq([ 'eof', 'period', 'and', 'is', 'comma', 'cparen', 'clist', 'crec', 'with_args', ])); if (!['and', 'comma'].includes(this.peek().type)) { break; } this.next.shift(); } } items.push({ type: 'sdo-call', callee: { type: 'prose', parts: [ { name: 'text', contents: callee, location: { start: { offset: next.offset } } }, ], }, parseNode, arguments: args, }); break; } default: { // @ts-ignore throw new Error(`unreachable: unknown token type ${next.type}`); } } } } } function parse(src, opNames) { const parser = new ExprParser(src, opNames); try { return parser.parseSeq(['eof']); } catch (e) { if (e instanceof ParseFailure) { return { type: 'failure', message: e.message, offset: e.offset }; } throw e; } } exports.parse = parse; function walk(f, current, path = []) { f(current, path); switch (current.type) { case 'prose': { break; } case 'list': { for (let i = 0; i < current.elements.length; ++i) { path.push({ parent: current, index: i }); walk(f, current.elements[i], path); path.pop(); } break; } case 'record': { for (let i = 0; i < current.members.length; ++i) { path.push({ parent: current, index: i }); walk(f, current.members[i].value, path); path.pop(); } break; } case 'record-spec': { break; } case 'sdo-call': { for (let i = 0; i < current.arguments.length; ++i) { path.push({ parent: current, index: i }); walk(f, current.arguments[i], path); path.pop(); } break; } case 'call': { path.push({ parent: current, index: 'callee' }); walk(f, current.callee, path); path.pop(); for (let i = 0; i < current.arguments.length; ++i) { path.push({ parent: current, index: i }); walk(f, current.arguments[i], path); path.pop(); } break; } case 'paren': case 'seq': { for (let i = 0; i < current.items.length; ++i) { path.push({ parent: current, index: i }); walk(f, current.items[i], path); path.pop(); } break; } default: { // @ts-ignore throw new Error(`unreachable: unknown expression node type ${current.type}`); } } } exports.walk = walk;