functionalscript
Version:
FunctionalScript is a purely functional subset of JavaScript
521 lines (520 loc) • 22.9 kB
JavaScript
import * as operator from "../../types/function/operator/module.f.js";
import * as range_map from "../../types/range_map/module.f.js";
const { merge, fromRange, get } = range_map;
import * as list from "../../types/list/module.f.js";
import * as map from "../../types/ordered_map/module.f.js";
const { at } = map;
import * as _range from "../../types/range/module.f.js";
const { one } = _range;
const { empty, stateScan, flat, toArray, reduce: listReduce, scan, map: listMap } = list;
const { fromCharCode } = String;
import * as ascii from "../../text/ascii/module.f.js";
const { range } = ascii;
const {
//
backspace, ht, lf, ff, cr,
//
exclamationMark, percentSign, ampersand, asterisk, lessThanSign, equalsSign, greaterThanSign, questionMark, circumflexAccent, verticalLine, tilde,
//
space, quotationMark, leftParenthesis, rightParenthesis, plusSign, comma, hyphenMinus, fullStop, solidus,
//
digitRange, digit0, colon,
//
latinCapitalLetterRange, latinCapitalLetterA, latinCapitalLetterE,
//
leftSquareBracket, reverseSolidus, rightSquareBracket, lowLine,
//
latinSmallLetterRange, latinSmallLetterA, latinSmallLetterB, latinSmallLetterE, latinSmallLetterF, latinSmallLetterN, latinSmallLetterR, latinSmallLetterT, latinSmallLetterU,
//
leftCurlyBracket, rightCurlyBracket, dollarSign } = ascii;
const rangeOneNine = range('19');
const rangeSetNewLine = [
one(lf),
one(cr)
];
const rangeSetWhiteSpace = [
one(ht),
one(space)
];
const rangeSetTerminalForNumber = [
...rangeSetWhiteSpace,
...rangeSetNewLine,
one(exclamationMark),
one(percentSign),
one(ampersand),
one(leftParenthesis),
one(rightParenthesis),
one(asterisk),
one(comma),
one(solidus),
one(colon),
one(lessThanSign),
one(equalsSign),
one(greaterThanSign),
one(questionMark),
one(circumflexAccent),
one(leftSquareBracket),
one(rightSquareBracket),
one(leftCurlyBracket),
one(verticalLine),
one(rightCurlyBracket),
one(tilde),
];
const rangeSmallAF = range('af');
const rangeCapitalAF = range('AF');
const rangeIdStart = [
latinSmallLetterRange,
latinCapitalLetterRange,
one(lowLine),
one(dollarSign)
];
const rangeOpStart = [
one(exclamationMark),
one(percentSign),
one(ampersand),
one(leftParenthesis),
one(rightParenthesis),
one(asterisk),
one(plusSign),
one(comma),
one(hyphenMinus),
one(fullStop),
one(solidus),
one(colon),
one(lessThanSign),
one(equalsSign),
one(greaterThanSign),
one(questionMark),
one(circumflexAccent),
one(leftSquareBracket),
one(rightSquareBracket),
one(leftCurlyBracket),
one(verticalLine),
one(rightCurlyBracket),
one(tilde)
];
const rangeId = [digitRange, ...rangeIdStart];
const appendChar = old => input => `${old}${fromCharCode(input)}`;
const union = def => a => b => {
if (a === def || a === b) {
return b;
}
if (b === def) {
return a;
}
throw [a, b];
};
const rangeMapMerge = def => merge({
union: union(def),
equal: operator.strictEqual,
def,
});
const rangeFunc = r => f => def => fromRange(def)(r)(f);
const scanRangeOp = def => f => [f(def), scanRangeOp(def)];
const reduceRangeMap = def => a => {
const rm = scan(scanRangeOp(def))(a);
return toArray(listReduce(rangeMapMerge(def))(empty)(rm));
};
const scanRangeSetOp = def => f => r => [fromRange(def)(r)(f), scanRangeSetOp(def)(f)];
const rangeSetFunc = rs => f => def => {
const rm = scan(scanRangeSetOp(def)(f))(rs);
return toArray(listReduce(rangeMapMerge(def))(empty)(rm));
};
const create = (def) => (a) => {
const i = reduceRangeMap(def)(a);
const x = get(def);
return v => c => x(c)(i)(v)(c);
};
const digitToBigInt = d => BigInt(d - digit0);
const startNumber = digit => ({ s: 1n, m: digitToBigInt(digit), f: 0, es: 1, e: 0 });
/*
const startNegativeNumber
: ParseNumberBuffer
= { s: -1n, m: 0n, f: 0, es: 1, e: 0 }
*/
const addIntDigit = digit => b => ({ ...b, m: b.m * 10n + digitToBigInt(digit) });
const addFracDigit = digit => b => ({ ...b, m: b.m * 10n + digitToBigInt(digit), f: b.f - 1 });
const addExpDigit = digit => b => ({ ...b, e: b.e * 10 + digit - digit0 });
const bufferToNumberToken = ({ numberKind, value, b }) => {
if (numberKind === 'bigint')
return { kind: 'bigint', value: b.s * b.m };
return { kind: 'number', value: value, bf: [b.s * b.m, b.f + b.es * b.e] };
};
/**
* @link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#keywords
*/
const keywordEntries = [
['arguments', { kind: 'arguments' }],
['await', { kind: 'await' }],
['break', { kind: 'break' }],
['case', { kind: 'case' }],
['catch', { kind: 'catch' }],
['class', { kind: 'class' }],
['const', { kind: 'const' }],
['continue', { kind: 'continue' }],
['debugger', { kind: 'debugger' }],
['default', { kind: 'default' }],
['delete', { kind: 'delete' }],
['do', { kind: 'do' }],
['else', { kind: 'else' }],
['enum', { kind: 'enum' }],
['eval', { kind: 'eval' }],
['export', { kind: 'export' }],
['extends', { kind: 'extends' }],
['false', { kind: 'false' }],
['finally', { kind: 'finally' }],
['for', { kind: 'for' }],
['function', { kind: 'function' }],
['if', { kind: 'if' }],
['implements', { kind: 'implements' }],
['import', { kind: 'import' }],
['in', { kind: 'in' }],
['instanceof', { kind: 'instanceof' }],
['interface', { kind: 'interface' }],
['let', { kind: 'let' }],
['new', { kind: 'new' }],
['null', { kind: 'null' }],
['package', { kind: 'package' }],
['private', { kind: 'private' }],
['protected', { kind: 'protected' }],
['public', { kind: 'public' }],
['return', { kind: 'return' }],
['static', { kind: 'static' }],
['super', { kind: 'super' }],
['switch', { kind: 'switch' }],
['this', { kind: 'this' }],
['throw', { kind: 'throw' }],
['true', { kind: 'true' }],
['try', { kind: 'try' }],
['typeof', { kind: 'typeof' }],
['undefined', { kind: 'undefined' }],
['var', { kind: 'var' }],
['void', { kind: 'void' }],
['while', { kind: 'while' }],
['with', { kind: 'with' }],
['yield', { kind: 'yield' }],
];
const keywordMap = map.fromEntries(keywordEntries);
export const isKeywordToken = token => at(token.kind)(keywordMap) !== null;
/**
* @link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators
*/
const operatorEntries = [
['!', { kind: '!' }],
['!=', { kind: '!=' }],
['!==', { kind: '!==' }],
['%', { kind: '%' }],
['%=', { kind: '%=' }],
['&', { kind: '&' }],
['&&', { kind: '&&' }],
['&&=', { kind: '&&=' }],
['&=', { kind: '&=' }],
['(', { kind: '(' }],
[')', { kind: ')' }],
['*', { kind: '*' }],
['**', { kind: '**' }],
['**=', { kind: '**=' }],
['*=', { kind: '*=' }],
['+', { kind: '+' }],
['++', { kind: '++' }],
['+=', { kind: '+=' }],
[',', { kind: ',' }],
['-', { kind: '-' }],
['--', { kind: '--' }],
['-=', { kind: '-=' }],
['.', { kind: '.' }],
['/', { kind: '/' }],
['/=', { kind: '/=' }],
[':', { kind: ':' }],
['<', { kind: '<' }],
['<<', { kind: '<<' }],
['<<=', { kind: '<<=' }],
['<=', { kind: '<=' }],
['=', { kind: '=' }],
['==', { kind: '==' }],
['===', { kind: '===' }],
['=>', { kind: '=>' }],
['>', { kind: '>' }],
['>=', { kind: '>=' }],
['>>', { kind: '>>' }],
['>>=', { kind: '>>=' }],
['>>>', { kind: '>>>' }],
['>>>=', { kind: '>>>=' }],
['?', { kind: '?' }],
['?.', { kind: '?.' }],
['??', { kind: '??' }],
['??=', { kind: '??=' }],
['^', { kind: '^' }],
['^=', { kind: '^=' }],
['[', { kind: '[' }],
[']', { kind: ']' }],
['{', { kind: '{' }],
['|', { kind: '|' }],
['|=', { kind: '|=' }],
['||', { kind: '||' }],
['||=', { kind: '||=' }],
['}', { kind: '}' }],
['~', { kind: '~' }]
];
const operatorMap = map.fromEntries(operatorEntries);
const getOperatorToken = op => at(op)(operatorMap) ?? { kind: 'error', message: 'invalid token' };
const hasOperatorToken = op => at(op)(operatorMap) !== null;
const initialStateOp = create((state) => () => [[{ kind: 'error', message: 'unexpected character' }], state])([
rangeFunc(rangeOneNine)(() => input => [empty, { kind: 'number', value: fromCharCode(input), b: startNumber(input), numberKind: 'int' }]),
rangeSetFunc(rangeIdStart)(() => input => [empty, { kind: 'id', value: fromCharCode(input) }]),
rangeSetFunc(rangeSetWhiteSpace)(() => () => [empty, { kind: 'ws' }]),
rangeSetFunc(rangeSetNewLine)(() => () => [empty, { kind: 'nl' }]),
rangeFunc(one(quotationMark))(() => () => [empty, { kind: 'string', value: '' }]),
rangeFunc(one(digit0))(() => input => [empty, { kind: 'number', value: fromCharCode(input), b: startNumber(input), numberKind: '0' }]),
rangeSetFunc(rangeOpStart)(() => input => [empty, { kind: 'op', value: fromCharCode(input) }])
]);
const invalidNumberToToken = () => input => {
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]];
};
const fullStopToToken = state => input => {
switch (state.numberKind) {
case '0':
case 'int': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: '.' }];
default: return tokenizeOp({ kind: 'invalidNumber' })(input);
}
};
const digit0ToToken = state => input => {
switch (state.numberKind) {
case '0': return tokenizeOp({ kind: 'invalidNumber' })(input);
case '.':
case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addFracDigit(input)(state.b), numberKind: 'fractional' }];
case 'e':
case 'e+':
case 'e-':
case 'expDigits': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addExpDigit(input)(state.b), numberKind: 'expDigits' }];
default: return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addIntDigit(input)(state.b), numberKind: state.numberKind }];
}
};
const digit19ToToken = state => input => {
switch (state.numberKind) {
case '0': return tokenizeOp({ kind: 'invalidNumber' })(input);
case '.':
case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addFracDigit(input)(state.b), numberKind: 'fractional' }];
case 'e':
case 'e+':
case 'e-':
case 'expDigits': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addExpDigit(input)(state.b), numberKind: 'expDigits' }];
default: return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addIntDigit(input)(state.b), numberKind: 'int' }];
}
};
const expToToken = state => input => {
switch (state.numberKind) {
case '0':
case 'int':
case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: 'e' }];
default: return tokenizeOp({ kind: 'invalidNumber' })(input);
}
};
const hyphenMinusToToken = state => input => {
switch (state.numberKind) {
case 'e': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: { ...state.b, es: -1 }, numberKind: 'e-' }];
default: return terminalToToken(state)(input);
}
};
const plusSignToToken = state => input => {
switch (state.numberKind) {
case 'e': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: 'e+' }];
default: return tokenizeOp({ kind: 'invalidNumber' })(input);
}
};
const terminalToToken = state => input => {
switch (state.numberKind) {
case '.':
case 'e':
case 'e+':
case 'e-':
{
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]];
}
default:
{
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: bufferToNumberToken(state), tail: next[0] }, next[1]];
}
}
};
const bigintToToken = state => input => {
switch (state.numberKind) {
case '0':
case 'int':
{
return [empty, { kind: 'number', value: state.value, b: state.b, numberKind: 'bigint' }];
}
default:
{
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]];
}
}
};
const parseNumberStateOp = create(invalidNumberToToken)([
rangeFunc(one(fullStop))(fullStopToToken),
rangeFunc(one(digit0))(digit0ToToken),
rangeFunc(rangeOneNine)(digit19ToToken),
rangeSetFunc([one(latinSmallLetterE), one(latinCapitalLetterE)])(expToToken),
rangeFunc(one(hyphenMinus))(hyphenMinusToToken),
rangeFunc(one(plusSign))(plusSignToToken),
rangeSetFunc(rangeSetTerminalForNumber)(terminalToToken),
rangeFunc(one(latinSmallLetterN))(bigintToToken),
]);
const invalidNumberStateOp = create(() => () => [empty, { kind: 'invalidNumber' }])([
rangeSetFunc(rangeSetTerminalForNumber)(() => input => {
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]];
})
]);
const parseStringStateOp = create((state) => input => [empty, { kind: 'string', value: appendChar(state.value)(input) }])([
rangeFunc(one(quotationMark))(state => () => [[{ kind: 'string', value: state.value }], { kind: 'initial' }]),
rangeFunc(one(reverseSolidus))(state => () => [empty, { kind: 'escapeChar', value: state.value }]),
rangeSetFunc(rangeSetNewLine)(() => () => [[{ kind: 'error', message: 'unterminated string literal' }], { kind: 'nl' }])
]);
const parseEscapeDefault = state => input => {
const next = tokenizeOp({ kind: 'string', value: state.value })(input);
return [{ first: { kind: 'error', message: 'unescaped character' }, tail: next[0] }, next[1]];
};
const parseEscapeCharStateOp = create(parseEscapeDefault)([
rangeSetFunc([one(quotationMark), one(reverseSolidus), one(solidus)])(state => input => [empty, { kind: 'string', value: appendChar(state.value)(input) }]),
rangeFunc(one(latinSmallLetterB))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(backspace) }]),
rangeFunc(one(latinSmallLetterF))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(ff) }]),
rangeFunc(one(latinSmallLetterN))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(lf) }]),
rangeFunc(one(latinSmallLetterR))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(cr) }]),
rangeFunc(one(latinSmallLetterT))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(ht) }]),
rangeFunc(one(latinSmallLetterU))(state => () => [empty, { kind: 'unicodeChar', value: state.value, unicode: 0, hexIndex: 0 }]),
]);
const parseUnicodeCharDefault = state => input => {
const next = tokenizeOp({ kind: 'string', value: state.value })(input);
return [{ first: { kind: 'error', message: 'invalid hex value' }, tail: next[0] }, next[1]];
};
const parseUnicodeCharHex = offset => state => input => {
const hexValue = input - offset;
const newUnicode = state.unicode | (hexValue << (3 - state.hexIndex) * 4);
return [empty, state.hexIndex === 3 ?
{ kind: 'string', value: appendChar(state.value)(newUnicode) } :
{ kind: 'unicodeChar', value: state.value, unicode: newUnicode, hexIndex: state.hexIndex + 1 }];
};
const parseUnicodeCharStateOp = create(parseUnicodeCharDefault)([
rangeFunc(digitRange)(parseUnicodeCharHex(digit0)),
rangeFunc(rangeSmallAF)(parseUnicodeCharHex(latinSmallLetterA - 10)),
rangeFunc(rangeCapitalAF)(parseUnicodeCharHex(latinCapitalLetterA - 10))
]);
const idToToken = s => at(s)(keywordMap) ?? { kind: 'id', value: s };
const parseIdDefault = state => input => {
const keyWordToken = idToToken(state.value);
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: keyWordToken, tail: next[0] }, next[1]];
};
const parseIdStateOp = create(parseIdDefault)([
rangeSetFunc(rangeId)(state => input => [empty, { kind: 'id', value: appendChar(state.value)(input) }])
]);
const parseOperatorStateOp = state => input => {
const nextStateValue = appendChar(state.value)(input);
switch (nextStateValue) {
case '//': return [empty, { kind: '//', value: '', newLine: false }];
case '/*': return [empty, { kind: '/*', value: '', newLine: false }];
default: {
if (hasOperatorToken(nextStateValue))
return [empty, { kind: 'op', value: nextStateValue }];
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: getOperatorToken(state.value), tail: next[0] }, next[1]];
}
}
};
const parseSinglelineCommentStateOp = create((state) => input => [empty, { ...state, value: appendChar(state.value)(input) }])([
rangeSetFunc(rangeSetNewLine)(state => () => [[{ kind: '//', value: state.value }], { kind: 'nl' }])
]);
const parseMultilineCommentStateOp = create((state) => input => [empty, { ...state, value: appendChar(state.value)(input) }])([
rangeFunc(one(asterisk))(state => () => [empty, { ...state, kind: '/**' }]),
rangeSetFunc(rangeSetNewLine)(state => input => [empty, { ...state, value: appendChar(state.value)(input), newLine: true }]),
]);
const parseMultilineCommentAsteriskStateOp = create((state) => input => [empty, { ...state, kind: '/*', value: appendChar(appendChar(state.value)(asterisk))(input) }])([
rangeFunc(one(asterisk))(state => () => [empty, { ...state, value: appendChar(state.value)(asterisk) }]),
rangeSetFunc(rangeSetNewLine)(state => input => [empty, { kind: '/*', value: appendChar(appendChar(state.value)(asterisk))(input), newLine: true }]),
rangeFunc(one(solidus))(state => () => {
const tokens = state.newLine ? [{ kind: '/*', value: state.value }, { kind: 'nl' }] : [{ kind: '/*', value: state.value }];
return [tokens, { kind: 'initial' }];
})
]);
const parseWhitespaceDefault = () => input => {
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'ws' }, tail: next[0] }, next[1]];
};
const parseWhitespaceStateOp = create(parseWhitespaceDefault)([
rangeSetFunc(rangeSetWhiteSpace)(state => () => [empty, state]),
rangeSetFunc(rangeSetNewLine)(() => () => [empty, { kind: 'nl' }])
]);
const parseNewLineDefault = _ => input => {
const next = tokenizeOp({ kind: 'initial' })(input);
return [{ first: { kind: 'nl' }, tail: next[0] }, next[1]];
};
const parseNewLineStateOp = create(parseNewLineDefault)([
rangeSetFunc(rangeSetWhiteSpace)(state => () => [empty, state]),
rangeSetFunc(rangeSetNewLine)(state => () => [empty, state])
]);
const eofStateOp = create((state) => () => [[{ kind: 'error', message: 'eof' }], state])([]);
const tokenizeCharCodeOp = state => {
switch (state.kind) {
case 'initial': return initialStateOp(state);
case 'id': return parseIdStateOp(state);
case 'string': return parseStringStateOp(state);
case 'escapeChar': return parseEscapeCharStateOp(state);
case 'unicodeChar': return parseUnicodeCharStateOp(state);
case 'invalidNumber': return invalidNumberStateOp(state);
case 'number': return parseNumberStateOp(state);
case 'op': return parseOperatorStateOp(state);
case '//': return parseSinglelineCommentStateOp(state);
case '/*': return parseMultilineCommentStateOp(state);
case '/**': return parseMultilineCommentAsteriskStateOp(state);
case 'ws': return parseWhitespaceStateOp(state);
case 'nl': return parseNewLineStateOp(state);
case 'eof': return eofStateOp(state);
}
};
const tokenizeEofOp = state => {
switch (state.kind) {
case 'initial': return [[{ kind: 'eof' }], { kind: 'eof' }];
case 'id': return [[idToToken(state.value), { kind: 'eof' }], { kind: 'eof' }];
case 'string':
case 'escapeChar':
case 'unicodeChar': return [[{ kind: 'error', message: '" are missing' }, { kind: 'eof' }], { kind: 'eof' }];
case 'invalidNumber': return [[{ kind: 'error', message: 'invalid number' }, { kind: 'eof' }], { kind: 'eof' }];
case 'number':
switch (state.numberKind) {
case '.':
case 'e':
case 'e+':
case 'e-': return [[{ kind: 'error', message: 'invalid number' }, { kind: 'eof' }], { kind: 'eof', }];
}
return [[bufferToNumberToken(state), { kind: 'eof' }], { kind: 'eof' }];
case 'op': return [[getOperatorToken(state.value), { kind: 'eof' }], { kind: 'eof' }];
case '//': return [[{ kind: '//', value: state.value }, { kind: 'eof' }], { kind: 'eof' }];
case '/*':
case '/**': return [[{ kind: 'error', message: '*/ expected' }, { kind: 'eof' }], { kind: 'eof', }];
case 'ws': return [[{ kind: 'ws' }, { kind: 'eof' }], { kind: 'eof' }];
case 'nl': return [[{ kind: 'nl' }, { kind: 'eof' }], { kind: 'eof' }];
case 'eof': return [[{ kind: 'error', message: 'eof' }, { kind: 'eof' }], state];
}
};
const tokenizeOp = state => input => input === null ? tokenizeEofOp(state) : tokenizeCharCodeOp(state)(input);
const mapTokenWithMetadata = metadata => token => { return { token, metadata }; };
const tokenizeWithPositionOp = ({ state, metadata }) => input => {
if (input == null) {
const newState = tokenizeEofOp(state);
return [listMap(mapTokenWithMetadata(metadata))(newState[0]), { state: newState[1], metadata }];
}
const newState = tokenizeCharCodeOp(state)(input);
const isNewLine = input == lf;
const newMetadata = { path: metadata.path, line: isNewLine ? metadata.line + 1 : metadata.line, column: isNewLine ? 1 : metadata.column + 1 };
return [listMap(mapTokenWithMetadata(metadata))(newState[0]), { state: newState[1], metadata: newMetadata }];
};
const scanTokenize = stateScan(tokenizeWithPositionOp);
export const tokenize = input => path => {
const scan = scanTokenize({ state: { kind: 'initial' }, metadata: { path, line: 1, column: 1 } });
return flat(scan(flat([input, [null]])));
};