UNPKG

functionalscript

Version:

FunctionalScript is a purely functional subset of JavaScript

521 lines (520 loc) 22.9 kB
import * as operator from "../../types/function/operator/module.f.js"; import * as range_map from "../../types/range_map/module.f.js"; const { merge, fromRange, get } = range_map; import * as list from "../../types/list/module.f.js"; import * as map from "../../types/ordered_map/module.f.js"; const { at } = map; import * as _range from "../../types/range/module.f.js"; const { one } = _range; const { empty, stateScan, flat, toArray, reduce: listReduce, scan, map: listMap } = list; const { fromCharCode } = String; import * as ascii from "../../text/ascii/module.f.js"; const { range } = ascii; const { // backspace, ht, lf, ff, cr, // exclamationMark, percentSign, ampersand, asterisk, lessThanSign, equalsSign, greaterThanSign, questionMark, circumflexAccent, verticalLine, tilde, // space, quotationMark, leftParenthesis, rightParenthesis, plusSign, comma, hyphenMinus, fullStop, solidus, // digitRange, digit0, colon, // latinCapitalLetterRange, latinCapitalLetterA, latinCapitalLetterE, // leftSquareBracket, reverseSolidus, rightSquareBracket, lowLine, // latinSmallLetterRange, latinSmallLetterA, latinSmallLetterB, latinSmallLetterE, latinSmallLetterF, latinSmallLetterN, latinSmallLetterR, latinSmallLetterT, latinSmallLetterU, // leftCurlyBracket, rightCurlyBracket, dollarSign } = ascii; const rangeOneNine = range('19'); const rangeSetNewLine = [ one(lf), one(cr) ]; const rangeSetWhiteSpace = [ one(ht), one(space) ]; const rangeSetTerminalForNumber = [ ...rangeSetWhiteSpace, ...rangeSetNewLine, one(exclamationMark), one(percentSign), one(ampersand), one(leftParenthesis), one(rightParenthesis), one(asterisk), one(comma), one(solidus), one(colon), one(lessThanSign), one(equalsSign), one(greaterThanSign), one(questionMark), one(circumflexAccent), one(leftSquareBracket), one(rightSquareBracket), one(leftCurlyBracket), one(verticalLine), one(rightCurlyBracket), one(tilde), ]; const rangeSmallAF = range('af'); const rangeCapitalAF = range('AF'); const rangeIdStart = [ latinSmallLetterRange, latinCapitalLetterRange, one(lowLine), one(dollarSign) ]; const rangeOpStart = [ one(exclamationMark), one(percentSign), one(ampersand), one(leftParenthesis), one(rightParenthesis), one(asterisk), one(plusSign), one(comma), one(hyphenMinus), one(fullStop), one(solidus), one(colon), one(lessThanSign), one(equalsSign), one(greaterThanSign), one(questionMark), one(circumflexAccent), one(leftSquareBracket), one(rightSquareBracket), one(leftCurlyBracket), one(verticalLine), one(rightCurlyBracket), one(tilde) ]; const rangeId = [digitRange, ...rangeIdStart]; const appendChar = old => input => `${old}${fromCharCode(input)}`; const union = def => a => b => { if (a === def || a === b) { return b; } if (b === def) { return a; } throw [a, b]; }; const rangeMapMerge = def => merge({ union: union(def), equal: operator.strictEqual, def, }); const rangeFunc = r => f => def => fromRange(def)(r)(f); const scanRangeOp = def => f => [f(def), scanRangeOp(def)]; const reduceRangeMap = def => a => { const rm = scan(scanRangeOp(def))(a); return toArray(listReduce(rangeMapMerge(def))(empty)(rm)); }; const scanRangeSetOp = def => f => r => [fromRange(def)(r)(f), scanRangeSetOp(def)(f)]; const rangeSetFunc = rs => f => def => { const rm = scan(scanRangeSetOp(def)(f))(rs); return toArray(listReduce(rangeMapMerge(def))(empty)(rm)); }; const create = (def) => (a) => { const i = reduceRangeMap(def)(a); const x = get(def); return v => c => x(c)(i)(v)(c); }; const digitToBigInt = d => BigInt(d - digit0); const startNumber = digit => ({ s: 1n, m: digitToBigInt(digit), f: 0, es: 1, e: 0 }); /* const startNegativeNumber : ParseNumberBuffer = { s: -1n, m: 0n, f: 0, es: 1, e: 0 } */ const addIntDigit = digit => b => ({ ...b, m: b.m * 10n + digitToBigInt(digit) }); const addFracDigit = digit => b => ({ ...b, m: b.m * 10n + digitToBigInt(digit), f: b.f - 1 }); const addExpDigit = digit => b => ({ ...b, e: b.e * 10 + digit - digit0 }); const bufferToNumberToken = ({ numberKind, value, b }) => { if (numberKind === 'bigint') return { kind: 'bigint', value: b.s * b.m }; return { kind: 'number', value: value, bf: [b.s * b.m, b.f + b.es * b.e] }; }; /** * @link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#keywords */ const keywordEntries = [ ['arguments', { kind: 'arguments' }], ['await', { kind: 'await' }], ['break', { kind: 'break' }], ['case', { kind: 'case' }], ['catch', { kind: 'catch' }], ['class', { kind: 'class' }], ['const', { kind: 'const' }], ['continue', { kind: 'continue' }], ['debugger', { kind: 'debugger' }], ['default', { kind: 'default' }], ['delete', { kind: 'delete' }], ['do', { kind: 'do' }], ['else', { kind: 'else' }], ['enum', { kind: 'enum' }], ['eval', { kind: 'eval' }], ['export', { kind: 'export' }], ['extends', { kind: 'extends' }], ['false', { kind: 'false' }], ['finally', { kind: 'finally' }], ['for', { kind: 'for' }], ['function', { kind: 'function' }], ['if', { kind: 'if' }], ['implements', { kind: 'implements' }], ['import', { kind: 'import' }], ['in', { kind: 'in' }], ['instanceof', { kind: 'instanceof' }], ['interface', { kind: 'interface' }], ['let', { kind: 'let' }], ['new', { kind: 'new' }], ['null', { kind: 'null' }], ['package', { kind: 'package' }], ['private', { kind: 'private' }], ['protected', { kind: 'protected' }], ['public', { kind: 'public' }], ['return', { kind: 'return' }], ['static', { kind: 'static' }], ['super', { kind: 'super' }], ['switch', { kind: 'switch' }], ['this', { kind: 'this' }], ['throw', { kind: 'throw' }], ['true', { kind: 'true' }], ['try', { kind: 'try' }], ['typeof', { kind: 'typeof' }], ['undefined', { kind: 'undefined' }], ['var', { kind: 'var' }], ['void', { kind: 'void' }], ['while', { kind: 'while' }], ['with', { kind: 'with' }], ['yield', { kind: 'yield' }], ]; const keywordMap = map.fromEntries(keywordEntries); export const isKeywordToken = token => at(token.kind)(keywordMap) !== null; /** * @link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators */ const operatorEntries = [ ['!', { kind: '!' }], ['!=', { kind: '!=' }], ['!==', { kind: '!==' }], ['%', { kind: '%' }], ['%=', { kind: '%=' }], ['&', { kind: '&' }], ['&&', { kind: '&&' }], ['&&=', { kind: '&&=' }], ['&=', { kind: '&=' }], ['(', { kind: '(' }], [')', { kind: ')' }], ['*', { kind: '*' }], ['**', { kind: '**' }], ['**=', { kind: '**=' }], ['*=', { kind: '*=' }], ['+', { kind: '+' }], ['++', { kind: '++' }], ['+=', { kind: '+=' }], [',', { kind: ',' }], ['-', { kind: '-' }], ['--', { kind: '--' }], ['-=', { kind: '-=' }], ['.', { kind: '.' }], ['/', { kind: '/' }], ['/=', { kind: '/=' }], [':', { kind: ':' }], ['<', { kind: '<' }], ['<<', { kind: '<<' }], ['<<=', { kind: '<<=' }], ['<=', { kind: '<=' }], ['=', { kind: '=' }], ['==', { kind: '==' }], ['===', { kind: '===' }], ['=>', { kind: '=>' }], ['>', { kind: '>' }], ['>=', { kind: '>=' }], ['>>', { kind: '>>' }], ['>>=', { kind: '>>=' }], ['>>>', { kind: '>>>' }], ['>>>=', { kind: '>>>=' }], ['?', { kind: '?' }], ['?.', { kind: '?.' }], ['??', { kind: '??' }], ['??=', { kind: '??=' }], ['^', { kind: '^' }], ['^=', { kind: '^=' }], ['[', { kind: '[' }], [']', { kind: ']' }], ['{', { kind: '{' }], ['|', { kind: '|' }], ['|=', { kind: '|=' }], ['||', { kind: '||' }], ['||=', { kind: '||=' }], ['}', { kind: '}' }], ['~', { kind: '~' }] ]; const operatorMap = map.fromEntries(operatorEntries); const getOperatorToken = op => at(op)(operatorMap) ?? { kind: 'error', message: 'invalid token' }; const hasOperatorToken = op => at(op)(operatorMap) !== null; const initialStateOp = create((state) => () => [[{ kind: 'error', message: 'unexpected character' }], state])([ rangeFunc(rangeOneNine)(() => input => [empty, { kind: 'number', value: fromCharCode(input), b: startNumber(input), numberKind: 'int' }]), rangeSetFunc(rangeIdStart)(() => input => [empty, { kind: 'id', value: fromCharCode(input) }]), rangeSetFunc(rangeSetWhiteSpace)(() => () => [empty, { kind: 'ws' }]), rangeSetFunc(rangeSetNewLine)(() => () => [empty, { kind: 'nl' }]), rangeFunc(one(quotationMark))(() => () => [empty, { kind: 'string', value: '' }]), rangeFunc(one(digit0))(() => input => [empty, { kind: 'number', value: fromCharCode(input), b: startNumber(input), numberKind: '0' }]), rangeSetFunc(rangeOpStart)(() => input => [empty, { kind: 'op', value: fromCharCode(input) }]) ]); const invalidNumberToToken = () => input => { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]]; }; const fullStopToToken = state => input => { switch (state.numberKind) { case '0': case 'int': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: '.' }]; default: return tokenizeOp({ kind: 'invalidNumber' })(input); } }; const digit0ToToken = state => input => { switch (state.numberKind) { case '0': return tokenizeOp({ kind: 'invalidNumber' })(input); case '.': case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addFracDigit(input)(state.b), numberKind: 'fractional' }]; case 'e': case 'e+': case 'e-': case 'expDigits': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addExpDigit(input)(state.b), numberKind: 'expDigits' }]; default: return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addIntDigit(input)(state.b), numberKind: state.numberKind }]; } }; const digit19ToToken = state => input => { switch (state.numberKind) { case '0': return tokenizeOp({ kind: 'invalidNumber' })(input); case '.': case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addFracDigit(input)(state.b), numberKind: 'fractional' }]; case 'e': case 'e+': case 'e-': case 'expDigits': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addExpDigit(input)(state.b), numberKind: 'expDigits' }]; default: return [empty, { kind: 'number', value: appendChar(state.value)(input), b: addIntDigit(input)(state.b), numberKind: 'int' }]; } }; const expToToken = state => input => { switch (state.numberKind) { case '0': case 'int': case 'fractional': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: 'e' }]; default: return tokenizeOp({ kind: 'invalidNumber' })(input); } }; const hyphenMinusToToken = state => input => { switch (state.numberKind) { case 'e': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: { ...state.b, es: -1 }, numberKind: 'e-' }]; default: return terminalToToken(state)(input); } }; const plusSignToToken = state => input => { switch (state.numberKind) { case 'e': return [empty, { kind: 'number', value: appendChar(state.value)(input), b: state.b, numberKind: 'e+' }]; default: return tokenizeOp({ kind: 'invalidNumber' })(input); } }; const terminalToToken = state => input => { switch (state.numberKind) { case '.': case 'e': case 'e+': case 'e-': { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]]; } default: { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: bufferToNumberToken(state), tail: next[0] }, next[1]]; } } }; const bigintToToken = state => input => { switch (state.numberKind) { case '0': case 'int': { return [empty, { kind: 'number', value: state.value, b: state.b, numberKind: 'bigint' }]; } default: { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]]; } } }; const parseNumberStateOp = create(invalidNumberToToken)([ rangeFunc(one(fullStop))(fullStopToToken), rangeFunc(one(digit0))(digit0ToToken), rangeFunc(rangeOneNine)(digit19ToToken), rangeSetFunc([one(latinSmallLetterE), one(latinCapitalLetterE)])(expToToken), rangeFunc(one(hyphenMinus))(hyphenMinusToToken), rangeFunc(one(plusSign))(plusSignToToken), rangeSetFunc(rangeSetTerminalForNumber)(terminalToToken), rangeFunc(one(latinSmallLetterN))(bigintToToken), ]); const invalidNumberStateOp = create(() => () => [empty, { kind: 'invalidNumber' }])([ rangeSetFunc(rangeSetTerminalForNumber)(() => input => { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'error', message: 'invalid number' }, tail: next[0] }, next[1]]; }) ]); const parseStringStateOp = create((state) => input => [empty, { kind: 'string', value: appendChar(state.value)(input) }])([ rangeFunc(one(quotationMark))(state => () => [[{ kind: 'string', value: state.value }], { kind: 'initial' }]), rangeFunc(one(reverseSolidus))(state => () => [empty, { kind: 'escapeChar', value: state.value }]), rangeSetFunc(rangeSetNewLine)(() => () => [[{ kind: 'error', message: 'unterminated string literal' }], { kind: 'nl' }]) ]); const parseEscapeDefault = state => input => { const next = tokenizeOp({ kind: 'string', value: state.value })(input); return [{ first: { kind: 'error', message: 'unescaped character' }, tail: next[0] }, next[1]]; }; const parseEscapeCharStateOp = create(parseEscapeDefault)([ rangeSetFunc([one(quotationMark), one(reverseSolidus), one(solidus)])(state => input => [empty, { kind: 'string', value: appendChar(state.value)(input) }]), rangeFunc(one(latinSmallLetterB))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(backspace) }]), rangeFunc(one(latinSmallLetterF))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(ff) }]), rangeFunc(one(latinSmallLetterN))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(lf) }]), rangeFunc(one(latinSmallLetterR))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(cr) }]), rangeFunc(one(latinSmallLetterT))(state => () => [empty, { kind: 'string', value: appendChar(state.value)(ht) }]), rangeFunc(one(latinSmallLetterU))(state => () => [empty, { kind: 'unicodeChar', value: state.value, unicode: 0, hexIndex: 0 }]), ]); const parseUnicodeCharDefault = state => input => { const next = tokenizeOp({ kind: 'string', value: state.value })(input); return [{ first: { kind: 'error', message: 'invalid hex value' }, tail: next[0] }, next[1]]; }; const parseUnicodeCharHex = offset => state => input => { const hexValue = input - offset; const newUnicode = state.unicode | (hexValue << (3 - state.hexIndex) * 4); return [empty, state.hexIndex === 3 ? { kind: 'string', value: appendChar(state.value)(newUnicode) } : { kind: 'unicodeChar', value: state.value, unicode: newUnicode, hexIndex: state.hexIndex + 1 }]; }; const parseUnicodeCharStateOp = create(parseUnicodeCharDefault)([ rangeFunc(digitRange)(parseUnicodeCharHex(digit0)), rangeFunc(rangeSmallAF)(parseUnicodeCharHex(latinSmallLetterA - 10)), rangeFunc(rangeCapitalAF)(parseUnicodeCharHex(latinCapitalLetterA - 10)) ]); const idToToken = s => at(s)(keywordMap) ?? { kind: 'id', value: s }; const parseIdDefault = state => input => { const keyWordToken = idToToken(state.value); const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: keyWordToken, tail: next[0] }, next[1]]; }; const parseIdStateOp = create(parseIdDefault)([ rangeSetFunc(rangeId)(state => input => [empty, { kind: 'id', value: appendChar(state.value)(input) }]) ]); const parseOperatorStateOp = state => input => { const nextStateValue = appendChar(state.value)(input); switch (nextStateValue) { case '//': return [empty, { kind: '//', value: '', newLine: false }]; case '/*': return [empty, { kind: '/*', value: '', newLine: false }]; default: { if (hasOperatorToken(nextStateValue)) return [empty, { kind: 'op', value: nextStateValue }]; const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: getOperatorToken(state.value), tail: next[0] }, next[1]]; } } }; const parseSinglelineCommentStateOp = create((state) => input => [empty, { ...state, value: appendChar(state.value)(input) }])([ rangeSetFunc(rangeSetNewLine)(state => () => [[{ kind: '//', value: state.value }], { kind: 'nl' }]) ]); const parseMultilineCommentStateOp = create((state) => input => [empty, { ...state, value: appendChar(state.value)(input) }])([ rangeFunc(one(asterisk))(state => () => [empty, { ...state, kind: '/**' }]), rangeSetFunc(rangeSetNewLine)(state => input => [empty, { ...state, value: appendChar(state.value)(input), newLine: true }]), ]); const parseMultilineCommentAsteriskStateOp = create((state) => input => [empty, { ...state, kind: '/*', value: appendChar(appendChar(state.value)(asterisk))(input) }])([ rangeFunc(one(asterisk))(state => () => [empty, { ...state, value: appendChar(state.value)(asterisk) }]), rangeSetFunc(rangeSetNewLine)(state => input => [empty, { kind: '/*', value: appendChar(appendChar(state.value)(asterisk))(input), newLine: true }]), rangeFunc(one(solidus))(state => () => { const tokens = state.newLine ? [{ kind: '/*', value: state.value }, { kind: 'nl' }] : [{ kind: '/*', value: state.value }]; return [tokens, { kind: 'initial' }]; }) ]); const parseWhitespaceDefault = () => input => { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'ws' }, tail: next[0] }, next[1]]; }; const parseWhitespaceStateOp = create(parseWhitespaceDefault)([ rangeSetFunc(rangeSetWhiteSpace)(state => () => [empty, state]), rangeSetFunc(rangeSetNewLine)(() => () => [empty, { kind: 'nl' }]) ]); const parseNewLineDefault = _ => input => { const next = tokenizeOp({ kind: 'initial' })(input); return [{ first: { kind: 'nl' }, tail: next[0] }, next[1]]; }; const parseNewLineStateOp = create(parseNewLineDefault)([ rangeSetFunc(rangeSetWhiteSpace)(state => () => [empty, state]), rangeSetFunc(rangeSetNewLine)(state => () => [empty, state]) ]); const eofStateOp = create((state) => () => [[{ kind: 'error', message: 'eof' }], state])([]); const tokenizeCharCodeOp = state => { switch (state.kind) { case 'initial': return initialStateOp(state); case 'id': return parseIdStateOp(state); case 'string': return parseStringStateOp(state); case 'escapeChar': return parseEscapeCharStateOp(state); case 'unicodeChar': return parseUnicodeCharStateOp(state); case 'invalidNumber': return invalidNumberStateOp(state); case 'number': return parseNumberStateOp(state); case 'op': return parseOperatorStateOp(state); case '//': return parseSinglelineCommentStateOp(state); case '/*': return parseMultilineCommentStateOp(state); case '/**': return parseMultilineCommentAsteriskStateOp(state); case 'ws': return parseWhitespaceStateOp(state); case 'nl': return parseNewLineStateOp(state); case 'eof': return eofStateOp(state); } }; const tokenizeEofOp = state => { switch (state.kind) { case 'initial': return [[{ kind: 'eof' }], { kind: 'eof' }]; case 'id': return [[idToToken(state.value), { kind: 'eof' }], { kind: 'eof' }]; case 'string': case 'escapeChar': case 'unicodeChar': return [[{ kind: 'error', message: '" are missing' }, { kind: 'eof' }], { kind: 'eof' }]; case 'invalidNumber': return [[{ kind: 'error', message: 'invalid number' }, { kind: 'eof' }], { kind: 'eof' }]; case 'number': switch (state.numberKind) { case '.': case 'e': case 'e+': case 'e-': return [[{ kind: 'error', message: 'invalid number' }, { kind: 'eof' }], { kind: 'eof', }]; } return [[bufferToNumberToken(state), { kind: 'eof' }], { kind: 'eof' }]; case 'op': return [[getOperatorToken(state.value), { kind: 'eof' }], { kind: 'eof' }]; case '//': return [[{ kind: '//', value: state.value }, { kind: 'eof' }], { kind: 'eof' }]; case '/*': case '/**': return [[{ kind: 'error', message: '*/ expected' }, { kind: 'eof' }], { kind: 'eof', }]; case 'ws': return [[{ kind: 'ws' }, { kind: 'eof' }], { kind: 'eof' }]; case 'nl': return [[{ kind: 'nl' }, { kind: 'eof' }], { kind: 'eof' }]; case 'eof': return [[{ kind: 'error', message: 'eof' }, { kind: 'eof' }], state]; } }; const tokenizeOp = state => input => input === null ? tokenizeEofOp(state) : tokenizeCharCodeOp(state)(input); const mapTokenWithMetadata = metadata => token => { return { token, metadata }; }; const tokenizeWithPositionOp = ({ state, metadata }) => input => { if (input == null) { const newState = tokenizeEofOp(state); return [listMap(mapTokenWithMetadata(metadata))(newState[0]), { state: newState[1], metadata }]; } const newState = tokenizeCharCodeOp(state)(input); const isNewLine = input == lf; const newMetadata = { path: metadata.path, line: isNewLine ? metadata.line + 1 : metadata.line, column: isNewLine ? 1 : metadata.column + 1 }; return [listMap(mapTokenWithMetadata(metadata))(newState[0]), { state: newState[1], metadata: newMetadata }]; }; const scanTokenize = stateScan(tokenizeWithPositionOp); export const tokenize = input => path => { const scan = scanTokenize({ state: { kind: 'initial' }, metadata: { path, line: 1, column: 1 } }); return flat(scan(flat([input, [null]]))); };