fruitsconfits
Version:
FruitsConfits - A well typed and sugared parser combinator framework for TypeScript/JavaScript.
478 lines (432 loc) • 16.5 kB
text/typescript
// Copyright (c) 2019 Shellyl_N and Authors
// license: ISC
// https://github.com/shellyln
import { StringParserFnWithCtx } from './types';
import { zeroWidth,
zeroWidthError,
beginning,
end,
quantify,
first,
or,
transform,
lookAhead,
lookBehind,
ApplyProductionRulesArg,
applyProductionRules,
makeProgram } from './parser';
export function charSequence<C, R>(
helper: (token: string) => R
): (needle: string) => StringParserFnWithCtx<C, R> {
return (needle => {
return (input => {
const src = input.src.slice(input.start, input.end);
return (src.startsWith(needle) ? {
succeeded: true,
next: {
src: input.src,
start: input.start + needle.length,
end: input.end,
context: input.context,
templateArgs: input.templateArgs,
templateArgsPos: input.templateArgsPos,
},
tokens: [helper(needle)],
} : {
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "charSequence(${needle})"`,
});
});
});
}
export function charClass<C, R>(
helper: (token: string) => R
): (...needles: string[]) => StringParserFnWithCtx<C, R> {
// NOTE: needles[i] should be one character. surrogate pair and/or ligature are accepted.
return ((...needles) => {
return (input => {
const src = input.src.slice(input.start, input.end);
let index = -1;
const succeeded = needles.some((needle, idx) => {
const matched = src.startsWith(needle);
if (matched) {
index = idx;
return true;
}
});
return (succeeded ? {
succeeded: true,
next: {
src: input.src,
start: input.start + needles[index].length,
end: input.end,
context: input.context,
templateArgs: input.templateArgs,
templateArgsPos: input.templateArgsPos,
},
tokens: [helper(needles[index])],
} : {
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "charClass(${needles.join(',')})"`,
});
});
});
}
export function charClassNot<C, R>(
helper: (token: string) => R
): (...needles: string[]) => StringParserFnWithCtx<C, R> {
// NOTE: needles[i] should be one character. surrogate pair and/or ligature are accepted.
return ((...needles) => {
return (input => {
const src = input.src.slice(input.start, input.end);
for (const needle of needles) {
const matched = src.startsWith(needle);
if (matched) {
return ({
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "charClassNot(${needles.join(',')})"`,
});
}
}
const p = input.src.codePointAt(input.start);
if (p === void 0) {
return ({
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "charClassNot(${needles.join(',')})"`,
});
}
const c = String.fromCodePoint(p);
return ({
succeeded: true,
next: {
src: input.src,
start: input.start + c.length,
end: input.end,
context: input.context,
templateArgs: input.templateArgs,
templateArgsPos: input.templateArgsPos,
},
tokens: [helper(c)],
});
});
});
}
export function charClassByNeedleFn<C, R>(
helper: (token: string) => R
): (needle: (src: string) => number) => StringParserFnWithCtx<C, R> {
// NOTE: needles[i] should be one character. surrogate pair and/or ligature are accepted.
return (needle => {
return (input => {
const src = input.src.slice(input.start, input.end);
const len = needle(src);
return (len >= 0 ? {
succeeded: true,
next: {
src: input.src,
start: input.start + len,
end: input.end,
context: input.context,
templateArgs: input.templateArgs,
templateArgsPos: input.templateArgsPos,
},
tokens: [helper(src.substring(0, len))],
} : {
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "charClassByNeedleFn"`,
});
});
});
}
export function templateStringsParam<C, R>(
criteria: (o: any) => boolean, conv?: (o: any) => any): StringParserFnWithCtx<C, R> {
return (input => {
const src = input.src.slice(input.start, input.start + 1);
if (src === '\x00') {
if (input.templateArgsPos) {
let argIdx = -1;
const strIdx = input.templateArgsPos.findIndex((v, i) => {
argIdx = i;
return v === input.start;
});
if (0 <= strIdx) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-non-null-assertion
const o = input.templateArgs![argIdx];
if (criteria(o)) {
return ({
succeeded: true,
next: {
src: input.src,
start: input.start + 1,
end: input.end,
context: input.context,
templateArgs: input.templateArgs,
templateArgsPos: input.templateArgsPos,
},
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
tokens: [(conv ? conv(o) : o)],
});
}
}
}
}
return ({
succeeded: false,
error: false,
src: input.src,
pos: input.start,
message: `operator "stringTemplatesParam()"`,
});
});
}
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export function getStringParsers<C, R>(
params: {
rawToToken: (rawToken: string) => R,
concatTokens: (tokens: R[]) => R[],
}) {
const seq = charSequence<C, R>(params.rawToToken);
const cls = charClass<C, R>(params.rawToToken);
const notCls = charClassNot<C, R>(params.rawToToken);
const clsFn = charClassByNeedleFn<C, R>(params.rawToToken);
const cat = transform<string, C, R>(params.concatTokens);
const once = quantify<string, C, R>(1, 1);
const repeat = quantify<string, C, R>();
// TODO: reduce unneccessary call for adding types.
const qty = (min?: number, max?: number) => quantify<string, C, R>(min, max);
const combine = transform<string, C, R>();
const erase = transform<string, C, R>(tokens => []);
const isAlpha = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((
('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z')) ? c.length : -1);
});
const isUpper = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('A' <= c && c <= 'Z') ? c.length : -1);
});
const isLower = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('a' <= c && c <= 'z') ? c.length : -1);
});
const isNumber = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('0' <= c && c <= '9') ? c.length : -1);
});
const isNonZeroNumber = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('1' <= c && c <= '9') ? c.length : -1);
});
const isBinNum = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('0' <= c && c <= '1') ? c.length : -1);
});
const isOctNum = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
('0' <= c && c <= '7') ? c.length : -1);
});
const isHexNum = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((
('A' <= c && c <= 'F') ||
('a' <= c && c <= 'f') ||
('0' <= c && c <= '9')) ? c.length : -1);
});
const isAlNum = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((
('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z') ||
('0' <= c && c <= '9')) ? c.length : -1);
});
const isSpace = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((' \f\n\r\t\v\u00a0\u1680\u180e' +
'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' +
'\u2028\u2029\u202f\u205f\u3000\ufeff').includes(c) ? c.length : -1);
});
const isSpaceWithinSingleLine = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((' \f\t\v\u00a0\u1680\u180e' +
'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' +
'\u2028\u2029\u202f\u205f\u3000\ufeff').includes(c) ? c.length : -1);
});
const isControl = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return ((
(0x0000 <= p && p <= 0x001f) ||
(0x007f <= p && p <= 0x009f)) ? c.length : -1);
});
const isWord = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return (
((' \f\n\r\t\v\u00a0\u1680\u180e' +
'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' +
'\u2028\u2029\u202f\u205f\u3000\ufeff').includes(c)) ||
((0x0000 <= p && p <= 0x001f) ||
(0x007f <= p && p <= 0x009f)) ?
-1 : c.length);
});
const isNewline = cls('\r\n', '\n', '\r');
const isAny = clsFn(src => {
const p = src.codePointAt(0);
if (p === void 0) {
return -1;
}
const c = String.fromCodePoint(p);
return c.length;
});
const binSep =
first(isBinNum, cls('_'));
const octSep =
first(isOctNum, cls('_'));
const hexSep =
first(isHexNum, cls('_'));
const binaryIntegerNumber = (...prefixes: Array<StringParserFnWithCtx<C, R>>) =>
combine(erase(first(...prefixes)),
cat(once(isBinNum), repeat(binSep)), );
const octalIntegerNumber = (...prefixes: Array<StringParserFnWithCtx<C, R>>) =>
combine(erase(first(...prefixes)),
cat(once(isOctNum), repeat(octSep)), );
const hexIntegerValue = (...prefixes: Array<StringParserFnWithCtx<C, R>>) =>
combine(erase(first(...prefixes)),
cat(once(isHexNum), repeat(hexSep)), );
const decimalIntegerNumber =
combine(cat(qty(0, 1)(cls('+', '-')),
first(combine(once(isNonZeroNumber), repeat(first(isNumber, cls('_')))),
seq('0'), )));
const bigDecimalIntegerNumber =
combine(cat(decimalIntegerNumber,
erase(seq('n')), ));
const floatingPointNumber =
combine(cat(qty(0, 1)(cls('+', '-')),
first(combine(once(isNonZeroNumber), repeat(first(isNumber, cls('_')))),
seq('0'), ),
qty(0, 1)(combine(seq('.'),
qty(1)(first(isNumber, cls('_'))), )),
qty(0, 1)(combine(cls('E', 'e'), qty(0, 1)(cls('+', '-')),
first(combine(once(isNonZeroNumber), repeat(isNumber)), seq('0')), ))));
// TODO: reduce unneccessary call for adding types.
return ({
seq,
cls,
notCls,
clsFn,
classes: {
alpha: isAlpha,
upper: isUpper,
lower: isLower,
num: isNumber,
nonzero: isNonZeroNumber,
bin: isBinNum,
oct: isOctNum,
hex: isHexNum,
alnum: isAlNum,
space: isSpace,
spaceWithinSingleLine: isSpaceWithinSingleLine,
ctrl: isControl,
newline: isNewline,
word: isWord,
any: isAny,
},
numbers: {
bin: binaryIntegerNumber,
oct: octalIntegerNumber,
hex: hexIntegerValue,
int: decimalIntegerNumber,
bigint: bigDecimalIntegerNumber,
float: floatingPointNumber,
},
isParam: templateStringsParam,
cat,
once,
repeat,
qty,
zeroWidth: (helper?: () => R) => zeroWidth<string, C, R>(helper), // TODO:
err: (message: string) => zeroWidthError<string, C, R>(message), // TODO:
beginning: (helper?: () => R) => beginning<string, C, R>(helper), // TODO:
end: (helper?: () => R) => end<string, C, R>(helper), // TODO:
first: (...parsers: Array<StringParserFnWithCtx<C, R>>) => first<string, C, R>(...parsers), // TODO:
or: (...parsers: Array<StringParserFnWithCtx<C, R>>) => or<string, C, R>(...parsers), // TODO:
combine,
erase,
trans: (fn: (tokens: R[]) => R[]) => transform<string, C, R>(fn), // TODO:
ahead: (...parsers: Array<StringParserFnWithCtx<C, R>>) => lookAhead<string, C, R>(...parsers), // TODO:
behind: (n: number, helper?: () => R) => lookBehind<string, C, R>(n, helper),
rules: (args: ApplyProductionRulesArg<string, C, R>) => applyProductionRules<string, C, R>(args), // TODO:
makeProgram,
});
}