fruitsconfits
Version:
FruitsConfits - A well typed and sugared parser combinator framework for TypeScript/JavaScript.
672 lines (584 loc) • 21.4 kB
text/typescript
// Copyright (c) 2019 Shellyl_N and Authors
// license: ISC
// https://github.com/shellyln
// tslint:disable: no-implicit-dependencies
import { ParserInputWithCtx,
parserInput,
ParserFnWithCtx } from '../../lib/types';
import { formatErrorMessage } from '../../lib/parser';
import { getStringParsers } from '../../lib/string-parser';
import { getObjectParsers } from '../../lib/object-parser';
import * as liyad from 'liyad';
interface SxOp {
'op': string;
}
type AstChild = liyad.SxTokenChild | SxOp | undefined;
type Ctx = undefined;
type Ast = liyad.SxToken | AstChild | SxOp | undefined;
const $s = getStringParsers<Ctx, Ast>({
rawToToken: rawToken => rawToken,
concatTokens: tokens => (tokens.length ?
// eslint-disable-next-line @typescript-eslint/restrict-plus-operands
[tokens.reduce((a, b) => String(a) + b)] : []),
});
const $o = getObjectParsers<Ast[], Ctx, Ast>({
rawToToken: rawToken => rawToken,
concatTokens: tokens => (tokens.length ?
// eslint-disable-next-line @typescript-eslint/restrict-plus-operands
[tokens.reduce((a, b) => String(a) + b)] : []),
comparator: (a, b) => a === b,
});
const {seq, cls, notCls, clsFn, classes, numbers, cat,
once, repeat, qty, zeroWidth, err, beginning, end,
first, or, combine, erase, trans, ahead, rules,
makeProgram} = $s;
const lineComment =
combine(
seq('//'),
repeat(notCls('\r\n', '\n', '\r')),
classes.newline,
);
const hashLineComment =
combine(
seq('#'),
repeat(notCls('\r\n', '\n', '\r')),
classes.newline,
);
const blockComment =
combine(
seq('/*'),
repeat(notCls('*/')),
seq('*/'),
);
const commentOrSpace =
first(classes.space, lineComment, hashLineComment, blockComment);
const trueValue =
trans(tokens => [true])(
seq('true'));
const falseValue =
trans(tokens => [false])(
seq('false'));
const nullValue =
trans(tokens => [null])(
seq('null'));
const undefinedValue =
trans(tokens => [void 0])(
seq('undefined'));
const positiveInfinityValue =
trans(tokens => [Number.POSITIVE_INFINITY])(
qty(0, 1)(seq('+')), seq('Infinity'));
const negativeInfinityValue =
trans(tokens => [Number.NEGATIVE_INFINITY])(
seq('-Infinity'));
const nanValue =
trans(tokens => [Number.NaN])(
seq('NaN'));
const binaryIntegerValue =
trans(tokens => [Number.parseInt((tokens as string[])[0].replace(/_/g, ''), 2)])(
numbers.bin(seq('0b')));
const octalIntegerValue =
trans(tokens => [Number.parseInt((tokens as string[])[0].replace(/_/g, ''), 8)])(
numbers.oct(seq('0o'), seq('0')));
const hexIntegerValue =
trans(tokens => [Number.parseInt((tokens as string[])[0].replace(/_/g, ''), 16)])(
numbers.hex(seq('0x'), seq('0X')));
const decimalIntegerValue =
trans(tokens => [Number.parseInt((tokens as string[])[0].replace(/_/g, ''), 10)])(
numbers.int);
const floatingPointNumberValue =
trans(tokens => [Number.parseFloat((tokens as string[])[0].replace(/_/g, ''))])(
numbers.float);
const numberValue =
first(octalIntegerValue,
hexIntegerValue,
binaryIntegerValue,
floatingPointNumberValue,
decimalIntegerValue,
positiveInfinityValue,
negativeInfinityValue,
nanValue);
const stringEscapeSeq = first(
trans(t => ['\''])(seq('\\\'')),
trans(t => ['"'])(seq('\\"')),
trans(t => ['`'])(seq('\\`')),
trans(t => ['\\'])(seq('\\\\')),
trans(t => [''])(seq('\\\r\n')),
trans(t => [''])(seq('\\\r')),
trans(t => [''])(seq('\\\n')),
trans(t => ['\n'])(seq('\\n')),
trans(t => ['\r'])(seq('\\r')),
trans(t => ['\v'])(seq('\\v')),
trans(t => ['\t'])(seq('\\t')),
trans(t => ['\b'])(seq('\\b')),
trans(t => ['\f'])(seq('\\f')),
trans(t => [String.fromCodePoint(Number.parseInt((t as string[])[0], 16))])(
cat(erase(seq('\\u')),
qty(4, 4)(classes.hex), )),
trans(t => [String.fromCodePoint(Number.parseInt((t as string[])[0], 16))])(
cat(erase(seq('\\u{')),
qty(1, 6)(classes.hex),
erase(seq('}')), )),
trans(t => [String.fromCodePoint(Number.parseInt((t as string[])[0], 16))])(
cat(erase(seq('\\x')),
qty(2, 2)(classes.hex), )),
trans(t => [String.fromCodePoint(Number.parseInt((t as string[])[0], 8))])(
cat(erase(seq('\\')),
qty(3, 3)(classes.oct), )));
const signleQuotStringValue =
trans(tokens => [tokens[0]])(
erase(seq("'")),
cat(repeat(first(
stringEscapeSeq,
combine(cls('\r', '\n'), err('Line breaks within strings are not allowed.')),
notCls("'"),
))),
erase(seq("'")), );
const doubleQuotStringValue =
trans(tokens => [tokens[0]])(
erase(seq('"')),
cat(repeat(first(
stringEscapeSeq,
combine(cls('\r', '\n'), err('Line breaks within strings are not allowed.')),
notCls('"'),
))),
erase(seq('"')), );
const backQuotStringValue =
trans(tokens => [tokens[0]])(
erase(seq('`')),
cat(repeat(first(
stringEscapeSeq,
notCls('`'),
))),
erase(seq('`')), );
const stringValue =
first(signleQuotStringValue, doubleQuotStringValue, backQuotStringValue);
const atomValue =
first(trueValue, falseValue, nullValue, undefinedValue,
numberValue, stringValue);
const symbolName =
trans(tokens => [{symbol: (tokens as string[])[0]}])(
cat(combine(classes.alpha, repeat(classes.alnum))));
const objKey =
first(stringValue, symbolName);
const listValue = first(
trans(tokens => [[]])(erase(
seq('['),
repeat(commentOrSpace),
seq(']'),
)),
trans(tokens => {
const ast: Ast = [{symbol: '$list'}];
for (const token of tokens) {
ast.push(token as any);
}
return [ast];
})(
erase(seq('[')),
once(combine(
erase(repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => expr(first(seq(','), seq(']')), false)(input), ),
erase(repeat(commentOrSpace)), )),
repeat(combine(
erase(repeat(commentOrSpace),
seq(','),
repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => expr(first(seq(','), seq(']')), false)(input), ),
erase(repeat(commentOrSpace)), )),
qty(0, 1)(erase(
seq(','),
repeat(commentOrSpace), )),
first(ahead(seq(']')), err('Unexpected token has appeared.')),
erase(seq(']'))
)
);
const objectKeyValuePair =
combine(
objKey,
erase(repeat(commentOrSpace),
first(seq(':'), err('":" is needed.')),
repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => expr(first(seq(','), seq('}')), false)(input),
err('object value is needed.')),
);
const objectValue = first(
trans(tokens => [[{symbol: '#'}]])(erase(
seq('{'),
repeat(commentOrSpace),
seq('}'),
)),
trans(tokens => {
const ast: Ast = [{symbol: '#'}];
for (let i = 0; i < tokens.length; i += 2) {
if (tokens[i] === '__proto__') {
continue; // NOTE: prevent prototype pollution attacks
}
ast.push([tokens[i], tokens[i + 1]]);
}
return [ast];
})(
erase(seq('{')),
once(combine(
erase(repeat(commentOrSpace)),
objectKeyValuePair,
erase(repeat(commentOrSpace)), )),
repeat(combine(
erase(seq(','),
repeat(commentOrSpace)),
objectKeyValuePair,
erase(repeat(commentOrSpace)), )),
qty(0, 1)(erase(
seq(','),
repeat(commentOrSpace), )),
first(ahead(seq('}')), err('Unexpected token has appeared.')),
erase(seq('}')),
)
);
const unaryOp = (op: string, op1: any) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return [{symbol: op}, op1];
};
const binaryOp = (op: string, op1: any, op2: any) => {
if (op === ',') {
const operands: Ast[] = [];
if (Array.isArray(op1) && liyad.isSymbol(op1[0], '$last')) {
operands.push(...op1.slice(1));
} else {
operands.push(op1);
}
if (Array.isArray(op2) && liyad.isSymbol(op2[0], '$last')) {
operands.push(...op2.slice(1));
} else {
operands.push(op2);
}
return [{symbol: '$last'}, ...operands];
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return [{symbol: op}, op1, op2];
};
const ternaryOp = (op: string, op1: any, op2: any, op3: any) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return [{symbol: op}, op1, op2, op3];
};
const isOperator = (v: any, op: string) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
if (typeof v === 'object' && v.op === op) {
return true;
}
return false;
};
const isValue = (v: any) => {
switch (typeof v) {
case 'number': case 'boolean': case 'string': case 'undefined': case 'bigint': case 'function':
return true;
case 'symbol':
return false;
}
if (v === null) {
return true;
}
if (Object.prototype.hasOwnProperty.call(v, '#')) {
return true;
}
if (Array.isArray(v)) {
return true;
}
return false;
};
const exprOpsTokens = ['**', '*', '/', '%', '+', '-', '?', ':'];
const edgeOpsTokens = exprOpsTokens.concat(',', '(');
const exprOps = cls(...exprOpsTokens);
const transformOp = (op: ParserFnWithCtx<string, Ctx, Ast>) =>
trans(tokens => [{op: tokens[0] as string}])(op);
const beginningOrEdgeOp =
$o.first($o.beginning(() => ({op: '$noop'})),
$o.behind(1, () => ({op: '$noop'}))(
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
$o.clsFn(t => t && edgeOpsTokens.includes((t as any).op) ? true : false)), );
// production rule:
// beginning S -> beginning "(" E ")"
// op S -> op "(" E ")"
const exprRule20 = $o.trans(tokens => {
return [tokens[2]];
})(
beginningOrEdgeOp,
$o.clsFn(t => isOperator(t, '(')),
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, ')')),
);
// production rule:
// S -> S<<symbol>> "(" S ")"
// S -> S<<value>> "(" S ")"
// S -> S<<symbol>> "(" ")"
// S -> S<<value>> "(" ")"
const exprRule18 = $o.trans(tokens => {
if (Array.isArray(tokens[1]) && liyad.isSymbol((tokens[1] as Ast[])[0], '$last')) {
return [[tokens[0], ...(tokens[1] as Ast[]).slice(1)]];
} else {
return [[tokens[0], ...(tokens.length > 1 ? [tokens[1]] : [])]];
}
})(
$o.first($o.clsFn(t => liyad.isSymbol(t) ? true : false),
$o.clsFn(t => isValue(t)), ),
$o.erase($o.clsFn(t => isOperator(t, '('))),
$o.qty(0, 1)($o.first($o.clsFn(t => Array.isArray(t) && liyad.isSymbol(t[0], '$last') ? true : false),
$o.clsFn(t => isValue(t)), )),
$o.erase($o.clsFn(t => isOperator(t, ')'))),
);
// production rule:
// beginning S -> beginning "+" S
// op S -> op "+" S
// beginning S -> beginning "-" S
// op S -> op "-" S
const exprRule16 = $o.trans(tokens => {
return ([unaryOp((tokens[1] as SxOp).op, tokens[2])]);
})(
beginningOrEdgeOp,
$o.clsFn(t => isOperator(t, '+') || isOperator(t, '-')),
$o.clsFn(t => isValue(t)),
);
// production rule:
// S -> S "**" S
const exprRule15 = $o.trans(tokens => [binaryOp((tokens[1] as SxOp).op, tokens[0], tokens[2])])(
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, '**')),
$o.clsFn(t => isValue(t)),
);
// production rules:
// S -> S "*" S
// S -> S "/" S
// S -> S "%" S
const exprRule14 = $o.trans(tokens => [binaryOp((tokens[1] as SxOp).op, tokens[0], tokens[2])])(
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, '*') || isOperator(t, '/') || isOperator(t, '%')),
$o.clsFn(t => isValue(t)),
);
// production rules:
// S -> S "+" S
// S -> S "-" S
const exprRule13 = $o.trans(tokens => [binaryOp((tokens[1] as SxOp).op, tokens[0], tokens[2])])(
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, '+') || isOperator(t, '-')),
$o.clsFn(t => isValue(t)),
);
// production rule:
// beginning S -> beginning S "?" S ":" S
// "," S -> "," S "?" S ":" S
const exprRule4 = $o.trans(tokens => {
return [ternaryOp('$if', tokens[1], tokens[3], tokens[5])];
})(
$o.first($o.beginning(() => ({op: '$noop'})),
$o.behind(1, () => ({op: '$noop'}))($o.clsFn(t => isOperator(t, ','))), ),
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, '?')),
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, ':')),
$o.clsFn(t => isValue(t)),
);
// production rule:
// beginning S -> beginning S "," S
// "(" S -> "(" S "," S
const exprRule1 = $o.trans(tokens => {
return [binaryOp((tokens[2] as SxOp).op, tokens[1], tokens[3])];
})(
$o.first($o.beginning(() => ({op: '$noop'})),
$o.behind(1, () => ({op: '$noop'}))($o.clsFn(t => isOperator(t, '('))), ),
$o.clsFn(t => isValue(t)),
$o.clsFn(t => isOperator(t, ',')),
$o.clsFn(t => isValue(t)),
);
const exprNested =
(input: ParserInputWithCtx<string, Ctx>) => exprInner(cls(')'), true)(input);
const exprInner: (edge: ParserFnWithCtx<string, undefined, Ast>, nested: boolean) =>
ParserFnWithCtx<string, undefined, Ast> = (edge, nested) => combine(
qty(1)(first(
erase(commentOrSpace),
transformOp(combine(cls('+', '-'), ahead(classes.num))),
listValue,
objectValue,
// TODO: lambdaFnValue
atomValue,
symbolName,
transformOp(nested ? first(exprOps, cls(',')) : exprOps),
combine(
transformOp(cls('(')),
qty(0, 1)(exprNested),
transformOp(cls(')')),
),
)),
ahead(repeat(commentOrSpace), edge),
);
const expr = (edge: ParserFnWithCtx<string, Ctx, Ast>, nested: boolean) => rules({
rules: [
exprRule20,
exprRule18,
{ parser: exprRule16, rtol: true },
exprRule15,
exprRule14,
exprRule13,
{ parser: exprRule4, rtol: true },
exprRule1,
],
check: $o.combine($o.classes.any, $o.end()),
})(exprInner(edge, nested));
const exprStatement =
expr(first(end(), seq(';')), true);
const letStatementInner =
combine(
symbolName,
erase(repeat(commentOrSpace)),
erase(seq('=')),
erase(repeat(commentOrSpace)),
expr(first(end(), seq(';')), true), );
const letStatement =
combine(
erase(seq('let')),
erase(qty(1)(commentOrSpace)),
first(
combine(letStatementInner,
repeat(combine(
erase(repeat(commentOrSpace), seq(','), repeat(commentOrSpace)),
letStatementInner, ))),
err('Unexpected token has appeared.'),
));
const singleStatement =
first(exprStatement, letStatement);
const singleStatementSC =
combine(singleStatement,
erase(repeat(commentOrSpace)),
first(ahead(end()),
ahead(cls('{', '}')),
ahead(seq('let'), first(commentOrSpace, cls('{'))),
ahead(seq('for'), first(commentOrSpace, cls('{'))),
ahead(seq('while'), first(commentOrSpace, cls('{'))),
ahead(seq('do'), first(commentOrSpace, cls('{'))),
ahead(seq('if'), first(commentOrSpace, cls('{'))),
erase(seq(';')),
err('Unexpected token has appeared.')));
const blockStatement =
combine(
erase(seq('{')),
(input) => statements(input),
erase(seq('}')),
);
// TODO: for, while, do, if(if-elseif-else), switch, return, break statements
const ifStatement =
combine(
erase(seq('if')),
erase(repeat(commentOrSpace)),
erase(seq('(')),
erase(repeat(commentOrSpace)),
expr(first(seq(')')), true),
erase(repeat(commentOrSpace)),
erase(seq(')')),
erase(repeat(commentOrSpace)),
first(blockStatement, singleStatementSC),
);
const switchStatement =
combine(
erase(seq('switch')),
erase(repeat(commentOrSpace)),
erase(seq('(')),
erase(repeat(commentOrSpace)),
expr(first(seq(')')), true),
erase(repeat(commentOrSpace)),
erase(seq(')')),
erase(repeat(commentOrSpace)),
erase(seq('{')),
erase(repeat(commentOrSpace)),
repeat(first(
combine(
erase(seq('case')),
erase(repeat(commentOrSpace)),
expr(first(seq(':')), true),
erase(seq(':')),
erase(repeat(commentOrSpace)),
repeat(first(blockStatement, singleStatementSC)),
erase(repeat(commentOrSpace)), ),
combine(
erase(seq('default')),
erase(repeat(commentOrSpace)),
erase(seq(':')),
erase(repeat(commentOrSpace)),
repeat(first(blockStatement, singleStatementSC)),
erase(repeat(commentOrSpace)), ))),
erase(seq('}')),
);
const forStatement =
combine(
erase(seq('while')),
erase(repeat(commentOrSpace)),
erase(seq('(')),
erase(repeat(commentOrSpace)),
expr(first(seq(';')), true),
erase(seq(';')),
erase(repeat(commentOrSpace)),
expr(first(seq(';')), true),
erase(seq(';')),
erase(repeat(commentOrSpace)),
expr(first(seq(')')), true),
erase(seq(')')),
erase(repeat(commentOrSpace)),
first(blockStatement, singleStatementSC),
);
const whileStatement =
combine(
erase(seq('while')),
erase(repeat(commentOrSpace)),
erase(seq('(')),
erase(repeat(commentOrSpace)),
expr(first(seq(')')), true),
erase(seq(')')),
erase(repeat(commentOrSpace)),
first(blockStatement, singleStatementSC),
);
const doWhileStatement =
combine(
erase(seq('do')),
erase(repeat(commentOrSpace)),
first(blockStatement, singleStatementSC),
erase(repeat(commentOrSpace)),
erase(seq('while')),
erase(seq('(')),
erase(repeat(commentOrSpace)),
expr(first(seq(')')), true),
erase(seq(')')),
);
const statements =
qty(1)(first(
blockStatement,
ifStatement,
switchStatement,
forStatement,
whileStatement,
doWhileStatement,
singleStatementSC,
));
const program = makeProgram(trans(tokens => tokens)(
erase(repeat(commentOrSpace)),
expr(end(), true),
erase(repeat(commentOrSpace)),
end(), ));
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export function parse(s: string) {
const z = program(parserInput(s));
if (! z.succeeded) {
throw new Error(formatErrorMessage(z));
}
return z.tokens[0];
}
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export function evaluate(s: string) {
const z = parse(s);
liyad.lisp.setGlobals({
max: Math.max,
twice: (x: number) => x * 2,
one: () => 1,
});
return liyad.lisp.evaluateAST([z] as any);
}