fruitsconfits
Version:
FruitsConfits - A well typed and sugared parser combinator framework for TypeScript/JavaScript.
478 lines (407 loc) • 16.4 kB
text/typescript
// Copyright (c) 2019 Shellyl_N and Authors
// license: ISC
// https://github.com/shellyln
// tslint:disable: no-implicit-dependencies
// tslint:disable: interface-over-type-literal
// tslint:disable: align
import { ParserInputWithCtx,
parserInput,
ParserFnWithCtx } from '../../lib/types';
import { formatErrorMessage } from '../../lib/parser';
import { getStringParsers } from '../../lib/string-parser';
import { getObjectParsers } from '../../lib/object-parser';
type AstValuesT = number | string | boolean | BigInt | null | Record<string, unknown> | any[] | undefined;
type Ctx = undefined;
type Ast = {token: string, type?: string, value?: AstValuesT};
const $s = getStringParsers<Ctx, Ast>({
rawToToken: rawToken => ({token: rawToken}),
concatTokens: tokens => (tokens.length ?
[tokens.reduce((a, b) => ({token: a.token + b.token}))] : []),
});
const $o = getObjectParsers<Ast[], Ctx, Ast>({
rawToToken: rawToken => rawToken,
concatTokens: tokens => (tokens.length ?
[tokens.reduce((a, b) => ({token: a.token + b.token}))] : []),
comparator: (a, b) => a.type === b.type && a.value === b.value,
});
const {seq, cls, notCls, clsFn, classes, numbers, cat,
once, repeat, qty, zeroWidth, err, beginning, end,
first, or, combine, erase, trans, ahead, rules,
makeProgram} = $s;
const lineComment =
combine(
seq('//'),
repeat(notCls('\r\n', '\n', '\r')),
classes.newline,
);
const hashLineComment =
combine(
seq('#'),
repeat(notCls('\r\n', '\n', '\r')),
classes.newline,
);
const blockComment =
combine(
seq('/*'),
repeat(notCls('*/')),
seq('*/'),
);
const commentOrSpace =
first(classes.space, lineComment, hashLineComment, blockComment);
const trueValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: true}])(
seq('true'));
const falseValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: false}])(
seq('false'));
const nullValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: null}])(
seq('null'));
const undefinedValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: void 0}])(
seq('undefined'));
const positiveInfinityValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.POSITIVE_INFINITY}])(
qty(0, 1)(seq('+')), seq('Infinity'));
const negativeInfinityValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.NEGATIVE_INFINITY}])(
seq('-Infinity'));
const nanValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.NaN}])(
seq('NaN'));
const binaryIntegerValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.parseInt(tokens[0].token.replace(/_/g, ''), 2)}])(
numbers.bin(seq('0b')));
const octalIntegerValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.parseInt(tokens[0].token.replace(/_/g, ''), 8)}])(
numbers.oct(seq('0o'), seq('0')));
const hexIntegerValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.parseInt(tokens[0].token.replace(/_/g, ''), 16)}])(
numbers.hex(seq('0x'), seq('0X')));
const decimalIntegerValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.parseInt(tokens[0].token.replace(/_/g, ''), 10)}])(
numbers.int);
const bigDecimalIntegerValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: BigInt(tokens[0].token.replace(/_/g, ''))}])(
numbers.bigint);
const floatingPointNumberValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: Number.parseFloat(tokens[0].token.replace(/_/g, ''))}])(
numbers.float);
const numberValue =
first(octalIntegerValue,
hexIntegerValue,
binaryIntegerValue,
floatingPointNumberValue,
bigDecimalIntegerValue,
decimalIntegerValue,
positiveInfinityValue,
negativeInfinityValue,
nanValue);
const stringEscapeSeq = first(
trans(t => [{token: '\''}])(seq('\\\'')),
trans(t => [{token: '"'}])(seq('\\"')),
trans(t => [{token: '`'}])(seq('\\`')),
trans(t => [{token: '\\'}])(seq('\\\\')),
trans(t => [{token: ''}])(seq('\\\r\n')),
trans(t => [{token: ''}])(seq('\\\r')),
trans(t => [{token: ''}])(seq('\\\n')),
trans(t => [{token: '\n'}])(seq('\\n')),
trans(t => [{token: '\r'}])(seq('\\r')),
trans(t => [{token: '\v'}])(seq('\\v')),
trans(t => [{token: '\t'}])(seq('\\t')),
trans(t => [{token: '\b'}])(seq('\\b')),
trans(t => [{token: '\f'}])(seq('\\f')),
trans(t => [{token: String.fromCodePoint(Number.parseInt(t[0].token, 16))}])(
cat(erase(seq('\\u')),
qty(4, 4)(classes.hex), )),
trans(t => [{token: String.fromCodePoint(Number.parseInt(t[0].token, 16))}])(
cat(erase(seq('\\u{')),
qty(1, 6)(classes.hex),
erase(seq('}')), )),
trans(t => [{token: String.fromCodePoint(Number.parseInt(t[0].token, 16))}])(
cat(erase(seq('\\x')),
qty(2, 2)(classes.hex), )),
trans(t => [{token: String.fromCodePoint(Number.parseInt(t[0].token, 8))}])(
cat(erase(seq('\\')),
qty(3, 3)(classes.oct), )));
const signleQuotStringValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: tokens[0].token}])(
erase(seq("'")),
cat(repeat(first(
stringEscapeSeq,
combine(cls('\r', '\n'), err('Line breaks within strings are not allowed.')),
notCls("'"),
))),
erase(seq("'")), );
const doubleQuotStringValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: tokens[0].token}])(
erase(seq('"')),
cat(repeat(first(
stringEscapeSeq,
combine(cls('\r', '\n'), err('Line breaks within strings are not allowed.')),
notCls('"'),
))),
erase(seq('"')), );
const backQuotStringValue =
trans(tokens => [{token: tokens[0].token, type: 'value',
value: tokens[0].token}])(
erase(seq('`')),
cat(repeat(first(
stringEscapeSeq,
notCls('`'),
))),
erase(seq('`')), );
const stringValue =
first(signleQuotStringValue, doubleQuotStringValue, backQuotStringValue);
const atomValue =
first(trueValue, falseValue, nullValue, undefinedValue,
numberValue, stringValue);
const symbolName =
cat(combine(classes.alpha, repeat(classes.alnum)));
const objKey =
first(stringValue, symbolName);
const listValue = first(
trans(tokens => [{token: '[]', type: 'list', value: []}])(erase(
seq('['),
repeat(commentOrSpace),
seq(']'),
)),
trans(tokens => {
const ast: Ast = {token: '[]', type: 'list', value: []};
for (const token of tokens) {
(ast.value as AstValuesT[]).push(token.value);
}
return [ast];
})(
erase(seq('[')),
once(combine(
erase(repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => constExpr(first(seq(','), seq(']')))(input), ),
erase(repeat(commentOrSpace)), )),
repeat(combine(
erase(repeat(commentOrSpace),
seq(','),
repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => constExpr(first(seq(','), seq(']')))(input), ),
erase(repeat(commentOrSpace)), )),
qty(0, 1)(erase(
seq(','),
repeat(commentOrSpace), )),
first(ahead(seq(']')), err('Unexpected token has appeared.')),
erase(seq(']'))
)
);
const objectKeyValuePair =
combine(
objKey,
erase(repeat(commentOrSpace),
first(seq(':'), err('":" is needed.')),
repeat(commentOrSpace)),
first(input => listValue(input), // NOTE: recursive definitions
input => objectValue(input), // should place as lambda.
input => constExpr(first(seq(','), seq('}')))(input),
err('object value is needed.')),
);
const objectValue = first(
trans(tokens => [{token: '{}', type: 'object', value: {}}])(erase(
seq('{'),
repeat(commentOrSpace),
seq('}'),
)),
trans(tokens => {
const ast: Ast = {token: '{}', type: 'object', value: {}};
for (let i = 0; i < tokens.length; i += 2) {
if (tokens[i].token === '__proto__') {
continue; // NOTE: prevent prototype pollution attacks
}
(ast.value as Record<string, unknown>)[tokens[i].token] = tokens[i + 1].value;
}
return [ast];
})(
erase(seq('{')),
once(combine(
erase(repeat(commentOrSpace)),
objectKeyValuePair,
erase(repeat(commentOrSpace)), )),
repeat(combine(
erase(seq(','),
repeat(commentOrSpace)),
objectKeyValuePair,
erase(repeat(commentOrSpace)), )),
qty(0, 1)(erase(
seq(','),
repeat(commentOrSpace), )),
first(ahead(seq('}')), err('Unexpected token has appeared.')),
erase(seq('}')),
)
);
const exprOpsTokens = ['**', '*', '/', '%', '+', '-'];
const edgeOpsTokens = exprOpsTokens.concat(',');
const constExprOps = cls(...exprOpsTokens);
const transformOp = (op: ParserFnWithCtx<string, Ctx, Ast>) => trans(tokens => [{
token: tokens[0].token, type: 'op', value: tokens[0].token}])(op);
const beginningOrEdgeOp =
$o.first($o.beginning(() => ({token: '$noop', type: 'op', value: '$noop'})),
$o.behind(1, () => ({token: '$noop', type: 'op', value: '$noop'}))(
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
$o.clsFn(t => t && edgeOpsTokens.includes((t as any).op) ? true : false)), );
const unaryOp = (op: string, op1: any) => {
switch (op) {
case '+':
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return op1;
case '-':
return -op1;
default:
throw new Error('Unknown operator has appeared.' + op);
}
};
const binaryOp = (op: string, op1: any, op2: any) => {
switch (op) {
case '**':
return op1 ** op2;
case '*':
return op1 * op2;
case '/':
return op1 / op2;
case '%':
return op1 % op2;
case '+':
// eslint-disable-next-line @typescript-eslint/no-unsafe-return, @typescript-eslint/restrict-plus-operands
return op1 + op2;
case '-':
return op1 - op2;
case ',':
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return op2;
default:
throw new Error('Unknown operator has appeared.' + op);
}
};
// NOTE: Use the following function to return AST (abstract syntax tree).
// const binaryOp = (op: string, op1: any, op2: any) => {
// return ({
// operator: op,
// operands: [op1, op2],
// });
// };
// const ternaryOp = (op: string, op1: any, op2: any, op3: any) => {
// };
// production rule:
// S -> "(" E ")"
const constExprRule20 = $o.trans(tokens => [tokens[1]])(
$o.clsFn(t => t.token === '('),
$o.clsFn(t => t.type === 'value'),
$o.clsFn(t => t.token === ')'),
);
// production rules:
// beginning S -> beginning "+" S
// op S -> op "+" S
// beginning S -> beginning "-" S
// op S -> op "-" S
const constExprRule16 = $o.trans(tokens => [{ token: tokens[2].token, type: 'value',
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
value: unaryOp(tokens[1].token, tokens[2].value)}])(
beginningOrEdgeOp,
$o.clsFn(t => t.token === '+' || t.token === '-'),
$o.clsFn(t => t.type === 'value'),
);
// production rule:
// S -> S "**" S
const constExprRule15 = $o.trans(tokens => [{token: tokens[1].token, type: 'value',
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
value: binaryOp(tokens[1].token, tokens[0].value, tokens[2].value)}])(
$o.clsFn(t => t.type === 'value'),
$o.clsFn(t => t.token === '**'),
$o.clsFn(t => t.type === 'value'),
);
// production rules:
// S -> S "*" S
// S -> S "/" S
// S -> S "%" S
const constExprRule14 = $o.trans(tokens => [{token: tokens[1].token, type: 'value',
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
value: binaryOp(tokens[1].token, tokens[0].value, tokens[2].value)}])(
$o.clsFn(t => t.type === 'value'),
$o.clsFn(t => t.token === '*' || t.token === '/' || t.token === '%'),
$o.clsFn(t => t.type === 'value'),
);
// production rules:
// S -> S "+" S
// S -> S "-" S
const constExprRule13 = $o.trans(tokens => [{token: tokens[1].token, type: 'value',
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
value: binaryOp(tokens[1].token, tokens[0].value, tokens[2].value)}])(
$o.clsFn(t => t.type === 'value'),
$o.clsFn(t => t.token === '+' || t.token === '-'),
$o.clsFn(t => t.type === 'value'),
);
// production rule:
// S -> S "," S
const constExprRule1 = $o.trans(tokens => [{token: tokens[1].token, type: 'value',
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
value: binaryOp(tokens[1].token, tokens[0].value, tokens[2].value)}])(
$o.clsFn(t => t.type === 'value'),
$o.clsFn(t => t.token === ','),
$o.clsFn(t => t.type === 'value'),
);
const constExprNested =
(input: ParserInputWithCtx<string, Ctx>) => constExprInner(cls(')'), true)(input);
const constExprInner: (edge: ParserFnWithCtx<string, undefined, Ast>, nested: boolean) =>
ParserFnWithCtx<string, undefined, Ast> = (edge, nested) => combine(
qty(1)(first(
erase(commentOrSpace),
transformOp(combine(cls('+', '-'), ahead(classes.num))),
atomValue,
transformOp(nested ? first(constExprOps, cls(',')) : constExprOps),
combine(
transformOp(cls('(')),
constExprNested,
transformOp(cls(')')),
),
)),
ahead(repeat(commentOrSpace), edge),
);
const constExpr = (edge: ParserFnWithCtx<string, Ctx, Ast>) => rules({
rules: [
constExprRule20,
{ parser: constExprRule16, rtol: true },
constExprRule15,
constExprRule14,
constExprRule13,
constExprRule1,
],
check: $o.combine($o.classes.any, $o.end()),
})(constExprInner(edge, false));
const program = makeProgram(trans(tokens => tokens)(
erase(repeat(commentOrSpace)),
first(listValue, objectValue, constExpr(end())),
erase(repeat(commentOrSpace)),
end(), ));
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export function parse(s: string) {
const z = program(parserInput(s));
if (! z.succeeded) {
throw new Error(formatErrorMessage(z));
}
return z.tokens[0].value;
}