UNPKG

@fink/larix

Version:

A parser for generating fink's AST.

695 lines (587 loc) 14.4 kB
import { slice } from "@fink/std-lib/str.js"; import { match_all, rx, split } from "@fink/std-lib/regex.js"; import { unfold_ac, length } from "@fink/std-lib/iter.js"; export const get_loc = (start, text) => { const text_len = length(text); const lines = split(text, rx`\n`); const lines_len = length(lines); const line = start.line - 1 + lines_len; let _do_result; ˆmatch_2: { const ˆvalue_1 = lines_len; /* istanbul ignore else */ if (ˆvalue_1 === 1) { _do_result = start.column + text_len; break ˆmatch_2; } { { let _do_result2; { const [...ˆitems_3] = lines; _do_result2 = [ˆitems_3.slice(0, -1), ˆitems_3.slice(-1)]; } const [, [last]] = _do_result2; _do_result2 = undefined; _do_result = length(last); } break ˆmatch_2; } } const column = _do_result; _do_result = undefined; return { pos: start.pos + text_len, line, column }; }; export const fink_lex = rx`^( (?<block>( fn|pipe|match|else| rec|list| import| await )(?=\s|:)) |(?<kwop>(in|or|and|not)(?=\s)) |(?<value>(true|false)\b) |(?<ignorable>\s+) |(?<comment_sl>#.*?(?=\n)) |(?<comment_ml>---[\s\S]*?---) |(?<str_start>\'|") |(?<jsx_elem_close>/>) |(?<jsx_frag_open><>) |(?<jsx_elem_start>(<(?=\w))) |(?<compare>((!=)|(==)|(<=)|(>=)|<)) |(?<assign>=) |(?<overloaded>[>{}]) |(?<arithm>[-+*/%^](?=\s)) |(?<prefix_neg>-(?=\S)) |(?<empty>\b_\b) |(?<number>[0-9][\._a-fA-Fxobe+0-9-]*) |(?<ident>[_$\p{L}][-_$\p{L}\p{N}]*) |(?<spread>\.\.\.) |(?<member>\.) |(?<operator>[?|]) |(?<grouping>[()[\]]) |(?<terminator>[,:]) |(?<error>.) |(?<end>$) )`; export const str_sq_lex = rx`^( (?<str_end>') |(?<str_expr_start>\$\{) |(?<str_text>(?:[^'\\]|\\.)+?(?='|$|(\$\{))) |(?<end>$) ) `; export const str_dq_lex = rx`^( (?<str_end>") |(?<str_expr_start>\$\{) |(?<str_text>(?:[^"\\]|\\.)+?(?="|$|(\$\{))) |(?<end>$) ) `; export const jsx_lex = rx`^( (?<jsx_frag_open><>) |(?<jsx_frag_close></>) |(?<jsx_elem_start>(<(?=\w))) |(?<jsx_elem_close>(</.+?>)) |(?<jsx_expr_start>\{) |(?<jsx_text>[^<{]+) |(?<end>$) ) `; export const jsx_elem_lex = rx`^( (?<ignorable>\s+) |(?<comment_sl>#.*?(?=\n)) |(?<comment_ml>---[\s\S]*?---) |(?<value>(true|false)\b) |(?<str_start>\'|") |(?<jsx_elem_close>/>) |(?<jsx_frag_open><>) |(?<jsx_elem_start>(<(?=\w))) |(?<compare>((!=)|(==)|(<=)|(>=)|<)) |(?<assign>=) |(?<overloaded>[>{}]) |(?<arithm>[-+*/%^](?=\s)) |(?<prefix_neg>-(?=\S)) |(?<empty>\b_\b) |(?<number>[0-9][\._a-fA-Fxobe+0-9-]*) |(?<ident>[_$\p{L}][-_$\p{L}\p{N}]*) |(?<spread>\.\.\.) |(?<member>\.) |(?<operator>[?|]) |(?<grouping>[()[\]]) |(?<terminator>[,:]) |(?<error>.) |(?<end>$) )`; export const get_token_type = (matched, value, matchers) => { const ˆvalue_4 = matched.groups; /* istanbul ignore else */ if (ˆvalue_4 != null) { const { str_start: ˆp_6 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_6 != null) { return `str-start`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { str_text: ˆp_7 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_7 != null) { return `str-text`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { str_end: ˆp_8 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_8 != null) { return `str-end`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { str_expr_start: ˆp_9 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_9 != null) { return `str-expr-start`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { comment_sl: ˆp_10 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_10 != null) { return `comment-sl`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { comment_ml: ˆp_11 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_11 != null) { return `comment-ml`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { ignorable: ˆp_12 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_12 != null) { return `ignorable`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { prefix_neg: ˆp_13 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_13 != null) { return `prefix_neg`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { ident: ˆp_14 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_14 != null) { return `ident`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { number: ˆp_15 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_15 != null) { return `number`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { error: ˆp_16 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_16 != null) { return `error`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { end: ˆp_17 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_17 != null) { return `end`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_elem_start: ˆp_18 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_18 != null) { return `jsx-elem-start`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_elem_close: ˆp_19 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_19 != null) { return `jsx-elem-close`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_frag_open: ˆp_20 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_20 != null) { return `jsx-frag-open`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_frag_close: ˆp_21 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_21 != null) { return `jsx-elem-close`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_expr_start: ˆp_22 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_22 != null) { return `jsx-expr-start`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { jsx_text: ˆp_23 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_23 != null) { return `jsx-text`; } } /* istanbul ignore else */ if (ˆvalue_4 != null) { const { overloaded: ˆp_24 } = ˆvalue_4; /* istanbul ignore else */ if (ˆp_24 != null) { { const [[curr_matcher, cc], parent] = matchers; { const ˆvalue_25 = value; /* istanbul ignore else */ if (ˆvalue_25 === `{`) { { const ˆvalue_27 = curr_matcher; /* istanbul ignore else */ if (ˆvalue_27 === jsx_elem_lex) { return `jsx-expr-start`; } { return value; } } } /* istanbul ignore else */ if (ˆvalue_25 === `}`) { { const [parent_lex] = parent; { const ˆvalue_29 = true; /* istanbul ignore else */ if (ˆvalue_29 === (cc === 0 && parent_lex === str_dq_lex)) { return `str-expr-end`; } /* istanbul ignore else */ if (ˆvalue_29 === (cc === 0 && parent_lex === str_sq_lex)) { return `str-expr-end`; } /* istanbul ignore else */ if (ˆvalue_29 === (cc === 0 && parent_lex === jsx_lex)) { return `jsx-expr-end`; } /* istanbul ignore else */ if (ˆvalue_29 === (cc === 0 && parent_lex === jsx_elem_lex)) { return `jsx-expr-end`; } { return value; } } } } /* istanbul ignore else TODO: cov should be done by loxia */ /* istanbul ignore else */ if (ˆvalue_25 === `>`) { { const ˆvalue_31 = curr_matcher; /* istanbul ignore else */ if (ˆvalue_31 === jsx_elem_lex) { return `jsx-elem-end`; } { return value; } } } } } } } { return value; } }; export const get_next_token = (matchers, code, start) => { const [[lex]] = matchers; const code_slice = slice(code, start.pos); const [matched] = match_all(code_slice, lex); const [value] = matched; const type = get_token_type(matched, value, matchers); const end = get_loc(start, value); const token = { type, value, loc: { start, end } }; return [token, end]; }; export const get_next_matchers = (token, matchers) => { const [curr_matcher, ...parent_matchers] = matchers; { const ˆvalue_33 = token; // {type: 'jsx-elem-close', value: '/>'}: parent_matchers /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_35 } = ˆvalue_33; // {type: 'jsx-elem-close', value: '/>'}: parent_matchers /* istanbul ignore else */ if (ˆp_35 === `jsx-elem-close`) { return parent_matchers; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_36 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_36 === `jsx-frag-open`) { return [[jsx_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_37 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_37 === `jsx-elem-start`) { return [[jsx_elem_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_38 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_38 === `jsx-elem-end`) { return [[jsx_lex, 0], ...parent_matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_39 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_39 === `jsx-expr-start`) { return [[fink_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_40 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_40 === `jsx-expr-end`) { return parent_matchers; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_41, value: ˆp_42 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_41 === `str-start`) /* istanbul ignore else */ if (ˆp_42 === `"`) { return [[str_dq_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_43, value: ˆp_44 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_43 === `str-start`) /* istanbul ignore else */ if (ˆp_44 === `'`) { return [[str_sq_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_45 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_45 === `str-end`) { return parent_matchers; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_46 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_46 === `str-expr-start`) { return [[fink_lex, 0], ...matchers]; } } /* istanbul ignore else */ if (ˆvalue_33 != null) { const { type: ˆp_47 } = ˆvalue_33; /* istanbul ignore else */ if (ˆp_47 === `str-expr-end`) { return parent_matchers; } } { return [curr_matcher, ...parent_matchers]; } } }; export const update_matcher_state = (token, matchers) => { const [[curr_m, curr_cc], ...rest] = matchers; let _do_result3; ˆmatch_49: { const ˆvalue_48 = token; /* istanbul ignore else */ if (ˆvalue_48 != null) { const { value: ˆp_50 } = ˆvalue_48; /* istanbul ignore else */ if (ˆp_50 === `{`) { _do_result3 = curr_cc + 1; break ˆmatch_49; } } /* istanbul ignore else */ if (ˆvalue_48 != null) { const { value: ˆp_51 } = ˆvalue_48; /* istanbul ignore else */ if (ˆp_51 === `}`) { _do_result3 = curr_cc - 1; break ˆmatch_49; } } { _do_result3 = curr_cc; break ˆmatch_49; } } const next_cc = _do_result3; _do_result3 = undefined; return [[curr_m, next_cc], ...rest]; }; export const tokenize = (code, start = { pos: 0, line: 1, column: 0 }) => { const initial_ctx = { code, start, matchers: [[fink_lex, 0], []] }; { let ˆpipe_result_52 = undefined; return ˆpipe_result_52 = unfold_ac((ˆ_53, { code, start, matchers } = initial_ctx) => { const [token, next_start] = get_next_token(matchers, code, start); const curr_matchers = update_matcher_state(token, matchers); const next_matchers = get_next_matchers(token, curr_matchers); return [token, { code, start: next_start, matchers: next_matchers }]; })(ˆpipe_result_52); } }; // TODO: should prattler allow registering expr builders for token-type + value // if so, we could simply test on token type being 'keyword' rather than // testing type in keywords or just use 'ident' for keywords export const keywords = [`import`, `fn`, `pipe`, `match`, `else`, `list`, `rec`, `await`, `and`, `or`, `not`, `in`, `true`, `false`];