ecmarkup
Version:
Custom element definitions and core utilities for markup that specifies ECMAScript and related technologies.
526 lines (525 loc) • 22.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.walk = exports.parse = void 0;
const header_parser_1 = require("./header-parser");
const tokMatcher = /(?<olist>«|«)|(?<clist>»|»)|(?<orec>\{)|(?<crec>\})|(?<oparen>\()|(?<cparen>\))|(?<and>(?:, )?and )|(?<is> is )|(?<comma>,)|(?<period>\.(?= |$))|(?<x_of>\b\w+ of )|(?<with_args> with arguments? )/u;
class ParseFailure extends Error {
constructor(message, offset) {
super(message);
this.offset = offset;
}
}
function formatClose(close) {
const mapped = close.map(c => {
switch (c) {
case 'clist':
return 'list close';
case 'crec':
return 'record close';
case 'cparen':
return 'close parenthesis';
case 'eof':
return 'end of line';
case 'with_args':
return '"with argument(s)"';
case 'comma':
return 'comma';
case 'period':
return 'period';
case 'and':
return '"and"';
case 'is':
return '"is"';
default:
return c;
}
});
return (0, header_parser_1.formatEnglishList)(mapped, 'or');
}
function addProse(items, token) {
// sometimes we determine after seeing a token that it should not have been treated as a token
// in that case we want to join it with the preceding prose, if any
const prev = items[items.length - 1];
if (token.type === 'prose') {
if (prev == null || prev.type !== 'prose') {
items.push(token);
}
else {
const lastPartOfPrev = prev.parts[prev.parts.length - 1];
const firstPartOfThis = token.parts[0];
if ((lastPartOfPrev === null || lastPartOfPrev === void 0 ? void 0 : lastPartOfPrev.name) === 'text' && (firstPartOfThis === null || firstPartOfThis === void 0 ? void 0 : firstPartOfThis.name) === 'text') {
items[items.length - 1] = {
type: 'prose',
parts: [
...prev.parts.slice(0, -1),
{
name: 'text',
contents: lastPartOfPrev.contents + firstPartOfThis.contents,
location: { start: { offset: lastPartOfPrev.location.start.offset } },
},
...token.parts.slice(1),
],
};
}
else {
items[items.length - 1] = {
type: 'prose',
parts: [...prev.parts, ...token.parts],
};
}
}
}
else {
addProse(items, {
type: 'prose',
parts: [
{
name: 'text',
contents: token.source,
location: { start: { offset: token.offset } },
},
],
});
}
}
function isWhitespace(x) {
return x.parts.every(p => p.name === 'text' && /^\s*$/.test(p.contents));
}
function isEmpty(s) {
return s.items.every(i => i.type === 'prose' && isWhitespace(i));
}
function emptyThingHasNewline(s) {
// only call this function on things which pass isEmpty
return s.items.some(i => i.parts.some(p => p.contents.includes('\n')));
}
class ExprParser {
constructor(src, opNames) {
this.srcIndex = 0;
this.textTokOffset = null; // offset into current text node; only meaningful if srcOffset points to a text node
this.next = [];
this.src = src;
this.opNames = opNames;
}
peek() {
if (this.next.length === 0) {
this.advance();
}
return this.next[0];
}
// this method is complicated because the underlying data is a sequence of ecmarkdown fragments, not a string
advance() {
var _a;
const currentProse = [];
while (this.srcIndex < this.src.length) {
const tok = this.textTokOffset == null
? this.src[this.srcIndex]
: {
name: 'text',
contents: this.src[this.srcIndex].contents.slice(this.textTokOffset),
location: {
start: {
offset: this.src[this.srcIndex].location.start.offset + this.textTokOffset,
},
},
};
const match = tok.name === 'text' ? tok.contents.match(tokMatcher) : null;
if (tok.name !== 'text' || match == null) {
if (!(tok.name === 'text' && tok.contents.length === 0)) {
currentProse.push(tok);
}
++this.srcIndex;
this.textTokOffset = null;
continue;
}
const { groups } = match;
const before = tok.contents.slice(0, match.index);
if (before.length > 0) {
currentProse.push({ name: 'text', contents: before, location: tok.location });
}
const matchKind = Object.keys(groups).find(x => groups[x] != null);
if (currentProse.length > 0) {
this.next.push({ type: 'prose', parts: currentProse });
}
this.textTokOffset = ((_a = this.textTokOffset) !== null && _a !== void 0 ? _a : 0) + match.index + match[0].length;
this.next.push({
type: matchKind,
offset: tok.location.start.offset + match.index,
source: groups[matchKind],
});
return;
}
if (currentProse.length > 0) {
this.next.push({ type: 'prose', parts: currentProse });
}
this.next.push({
type: 'eof',
offset: this.src.length === 0 ? 0 : this.src[this.src.length - 1].location.end.offset,
source: '',
});
}
// guarantees the next token is an element of close
parseSeq(close) {
const items = [];
while (true) {
const next = this.peek();
switch (next.type) {
case 'and':
case 'is':
case 'period':
case 'with_args':
case 'comma': {
if (!close.includes(next.type)) {
addProse(items, next);
this.next.shift();
break;
}
if (items.length === 0) {
throw new ParseFailure(`unexpected ${next.type} (expected some content for element/argument)`, next.offset);
}
return { type: 'seq', items };
}
case 'eof': {
if (items.length === 0 || !close.includes('eof')) {
throw new ParseFailure(`unexpected eof (expected ${formatClose(close)})`, next.offset);
}
return { type: 'seq', items };
}
case 'prose': {
addProse(items, next);
this.next.shift();
break;
}
case 'olist': {
this.next.shift();
const elements = [];
if (this.peek().type !== 'clist') {
while (true) {
elements.push(this.parseSeq(['clist', 'comma']));
if (this.peek().type === 'clist') {
break;
}
this.next.shift();
}
}
if (elements.length > 0 && isEmpty(elements[elements.length - 1])) {
if (elements.length === 1 || emptyThingHasNewline(elements[elements.length - 1])) {
// allow trailing commas when followed by whitespace
elements.pop();
}
else {
throw new ParseFailure(`unexpected list close (expected some content for element)`, this.peek().offset);
}
}
items.push({ type: 'list', elements });
this.next.shift(); // eat the clist
break;
}
case 'clist': {
if (!close.includes('clist')) {
throw new ParseFailure('unexpected list close without corresponding list open', next.offset);
}
return { type: 'seq', items };
}
case 'oparen': {
const lastPart = items[items.length - 1];
if (lastPart != null && lastPart.type === 'prose') {
const callee = [];
for (let i = lastPart.parts.length - 1; i >= 0; --i) {
const ppart = lastPart.parts[i];
if (ppart.name === 'text') {
const spaceIndex = ppart.contents.lastIndexOf(' ');
if (spaceIndex !== -1) {
if (spaceIndex < ppart.contents.length - 1) {
const calleePart = ppart.contents.slice(spaceIndex + 1);
if (!/\p{Letter}/u.test(calleePart)) {
// e.g. -(x + 1)
break;
}
lastPart.parts[i] = {
name: 'text',
contents: ppart.contents.slice(0, spaceIndex + 1),
location: ppart.location,
};
callee.unshift({
name: 'text',
contents: calleePart,
location: {
start: { offset: ppart.location.start.offset + spaceIndex + 1 },
},
});
}
break;
}
}
else if (ppart.name === 'tag') {
break;
}
callee.unshift(ppart);
lastPart.parts.pop();
}
if (callee.length > 0) {
this.next.shift();
const args = [];
if (this.peek().type !== 'cparen') {
while (true) {
args.push(this.parseSeq(['cparen', 'comma']));
if (this.peek().type === 'cparen') {
break;
}
this.next.shift();
}
}
if (args.length > 0 && isEmpty(args[args.length - 1])) {
if (args.length === 1 || emptyThingHasNewline(args[args.length - 1])) {
// allow trailing commas when followed by a newline
args.pop();
}
else {
throw new ParseFailure(`unexpected close parenthesis (expected some content for argument)`, this.peek().offset);
}
}
items.push({
type: 'call',
callee: { type: 'prose', parts: callee },
arguments: args,
});
this.next.shift(); // eat the cparen
break;
}
}
this.next.shift();
items.push({ type: 'paren', items: this.parseSeq(['cparen']).items });
this.next.shift(); // eat the cparen
break;
}
case 'cparen': {
if (!close.includes('cparen')) {
throw new ParseFailure('unexpected close parenthesis without corresponding open parenthesis', next.offset);
}
return { type: 'seq', items };
}
case 'orec': {
this.next.shift();
let type = null;
const members = [];
while (true) {
const nextTok = this.peek();
if (nextTok.type !== 'prose') {
throw new ParseFailure('expected to find record field name', nextTok.offset);
}
if (nextTok.parts[0].name !== 'text') {
throw new ParseFailure('expected to find record field name', nextTok.parts[0].location.start.offset);
}
const { contents } = nextTok.parts[0];
const nameMatch = contents.match(/^\s*\[\[(?<name>\w+)\]\]\s*(?<colon>:?)/);
if (nameMatch == null) {
if (members.length > 0 && /^\s*$/.test(contents) && contents.includes('\n')) {
// allow trailing commas when followed by a newline
this.next.shift(); // eat the whitespace
if (this.peek().type === 'crec') {
this.next.shift();
break;
}
}
throw new ParseFailure('expected to find record field', nextTok.parts[0].location.start.offset + contents.match(/^\s*/)[0].length);
}
const { name, colon } = nameMatch.groups;
if (members.find(x => x.name === name)) {
throw new ParseFailure(`duplicate record field name ${name}`, nextTok.parts[0].location.start.offset + contents.match(/^\s*\[\[/)[0].length);
}
const shortenedText = nextTok.parts[0].contents.slice(nameMatch[0].length);
const offset = nextTok.parts[0].location.start.offset + nameMatch[0].length;
if (shortenedText.length === 0 && nextTok.parts.length === 1) {
this.next.shift();
}
else if (shortenedText.length === 0) {
this.next[0] = {
type: 'prose',
parts: nextTok.parts.slice(1),
};
}
else {
const shortened = {
name: 'text',
contents: shortenedText,
location: {
start: { offset },
},
};
this.next[0] = {
type: 'prose',
parts: [shortened, ...nextTok.parts.slice(1)],
};
}
if (colon) {
if (type == null) {
type = 'record';
}
else if (type === 'record-spec') {
throw new ParseFailure('record field has value but preceding field does not', offset - 1);
}
const value = this.parseSeq(['crec', 'comma']);
if (value.items.length === 0) {
throw new ParseFailure('expected record field to have value', offset);
}
members.push({ name, value });
}
else {
if (type == null) {
type = 'record-spec';
}
else if (type === 'record') {
throw new ParseFailure('expected record field to have value', offset - 1);
}
members.push({ name });
if (!['crec', 'comma'].includes(this.peek().type)) {
throw new ParseFailure(`expected ${formatClose(['crec', 'comma'])}`, offset);
}
}
if (this.peek().type === 'crec') {
break;
}
this.next.shift(); // eat the comma
}
// @ts-ignore typing this correctly is annoying
items.push({ type, members });
this.next.shift(); // eat the crec
break;
}
case 'crec': {
if (!close.includes('crec')) {
throw new ParseFailure('unexpected end of record without corresponding start of record', next.offset);
}
return { type: 'seq', items };
}
case 'x_of': {
this.next.shift();
const callee = next.source.split(' ')[0];
if (!this.opNames.has(callee)) {
addProse(items, next);
break;
}
const parseNode = this.parseSeq([
'eof',
'period',
'comma',
'cparen',
'clist',
'crec',
'with_args',
]);
const args = [];
if (this.peek().type === 'with_args') {
this.next.shift();
while (true) {
args.push(this.parseSeq([
'eof',
'period',
'and',
'is',
'comma',
'cparen',
'clist',
'crec',
'with_args',
]));
if (!['and', 'comma'].includes(this.peek().type)) {
break;
}
this.next.shift();
}
}
items.push({
type: 'sdo-call',
callee: {
type: 'prose',
parts: [
{ name: 'text', contents: callee, location: { start: { offset: next.offset } } },
],
},
parseNode,
arguments: args,
});
break;
}
default: {
// @ts-ignore
throw new Error(`unreachable: unknown token type ${next.type}`);
}
}
}
}
}
function parse(src, opNames) {
const parser = new ExprParser(src, opNames);
try {
return parser.parseSeq(['eof']);
}
catch (e) {
if (e instanceof ParseFailure) {
return { type: 'failure', message: e.message, offset: e.offset };
}
throw e;
}
}
exports.parse = parse;
function walk(f, current, path = []) {
f(current, path);
switch (current.type) {
case 'prose': {
break;
}
case 'list': {
for (let i = 0; i < current.elements.length; ++i) {
path.push({ parent: current, index: i });
walk(f, current.elements[i], path);
path.pop();
}
break;
}
case 'record': {
for (let i = 0; i < current.members.length; ++i) {
path.push({ parent: current, index: i });
walk(f, current.members[i].value, path);
path.pop();
}
break;
}
case 'record-spec': {
break;
}
case 'sdo-call': {
for (let i = 0; i < current.arguments.length; ++i) {
path.push({ parent: current, index: i });
walk(f, current.arguments[i], path);
path.pop();
}
break;
}
case 'call': {
path.push({ parent: current, index: 'callee' });
walk(f, current.callee, path);
path.pop();
for (let i = 0; i < current.arguments.length; ++i) {
path.push({ parent: current, index: i });
walk(f, current.arguments[i], path);
path.pop();
}
break;
}
case 'paren':
case 'seq': {
for (let i = 0; i < current.items.length; ++i) {
path.push({ parent: current, index: i });
walk(f, current.items[i], path);
path.pop();
}
break;
}
default: {
// @ts-ignore
throw new Error(`unreachable: unknown expression node type ${current.type}`);
}
}
}
exports.walk = walk;