@fink/larix
Version:
A parser for generating fink's AST.
695 lines (587 loc) • 14.4 kB
JavaScript
import { slice } from "@fink/std-lib/str.js";
import { match_all, rx, split } from "@fink/std-lib/regex.js";
import { unfold_ac, length } from "@fink/std-lib/iter.js";
export const get_loc = (start, text) => {
const text_len = length(text);
const lines = split(text, rx`\n`);
const lines_len = length(lines);
const line = start.line - 1 + lines_len;
let _do_result;
ˆmatch_2: {
const ˆvalue_1 = lines_len;
/* istanbul ignore else */
if (ˆvalue_1 === 1) {
_do_result = start.column + text_len;
break ˆmatch_2;
}
{
{
let _do_result2;
{
const [...ˆitems_3] = lines;
_do_result2 = [ˆitems_3.slice(0, -1), ˆitems_3.slice(-1)];
}
const [, [last]] = _do_result2;
_do_result2 = undefined;
_do_result = length(last);
}
break ˆmatch_2;
}
}
const column = _do_result;
_do_result = undefined;
return {
pos: start.pos + text_len,
line,
column
};
};
export const fink_lex = rx`^(
(?<block>(
fn|pipe|match|else|
rec|list|
import|
await
)(?=\s|:))
|(?<kwop>(in|or|and|not)(?=\s))
|(?<value>(true|false)\b)
|(?<ignorable>\s+)
|(?<comment_sl>#.*?(?=\n))
|(?<comment_ml>---[\s\S]*?---)
|(?<str_start>\'|")
|(?<jsx_elem_close>/>)
|(?<jsx_frag_open><>)
|(?<jsx_elem_start>(<(?=\w)))
|(?<compare>((!=)|(==)|(<=)|(>=)|<))
|(?<assign>=)
|(?<overloaded>[>{}])
|(?<arithm>[-+*/%^](?=\s))
|(?<prefix_neg>-(?=\S))
|(?<empty>\b_\b)
|(?<number>[0-9][\._a-fA-Fxobe+0-9-]*)
|(?<ident>[_$\p{L}][-_$\p{L}\p{N}]*)
|(?<spread>\.\.\.)
|(?<member>\.)
|(?<operator>[?|])
|(?<grouping>[()[\]])
|(?<terminator>[,:])
|(?<error>.)
|(?<end>$)
)`;
export const str_sq_lex = rx`^(
(?<str_end>')
|(?<str_expr_start>\$\{)
|(?<str_text>(?:[^'\\]|\\.)+?(?='|$|(\$\{)))
|(?<end>$)
)
`;
export const str_dq_lex = rx`^(
(?<str_end>")
|(?<str_expr_start>\$\{)
|(?<str_text>(?:[^"\\]|\\.)+?(?="|$|(\$\{)))
|(?<end>$)
)
`;
export const jsx_lex = rx`^(
(?<jsx_frag_open><>)
|(?<jsx_frag_close></>)
|(?<jsx_elem_start>(<(?=\w)))
|(?<jsx_elem_close>(</.+?>))
|(?<jsx_expr_start>\{)
|(?<jsx_text>[^<{]+)
|(?<end>$)
)
`;
export const jsx_elem_lex = rx`^(
(?<ignorable>\s+)
|(?<comment_sl>#.*?(?=\n))
|(?<comment_ml>---[\s\S]*?---)
|(?<value>(true|false)\b)
|(?<str_start>\'|")
|(?<jsx_elem_close>/>)
|(?<jsx_frag_open><>)
|(?<jsx_elem_start>(<(?=\w)))
|(?<compare>((!=)|(==)|(<=)|(>=)|<))
|(?<assign>=)
|(?<overloaded>[>{}])
|(?<arithm>[-+*/%^](?=\s))
|(?<prefix_neg>-(?=\S))
|(?<empty>\b_\b)
|(?<number>[0-9][\._a-fA-Fxobe+0-9-]*)
|(?<ident>[_$\p{L}][-_$\p{L}\p{N}]*)
|(?<spread>\.\.\.)
|(?<member>\.)
|(?<operator>[?|])
|(?<grouping>[()[\]])
|(?<terminator>[,:])
|(?<error>.)
|(?<end>$)
)`;
export const get_token_type = (matched, value, matchers) => {
const ˆvalue_4 = matched.groups;
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
str_start: ˆp_6
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_6 != null) {
return `str-start`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
str_text: ˆp_7
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_7 != null) {
return `str-text`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
str_end: ˆp_8
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_8 != null) {
return `str-end`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
str_expr_start: ˆp_9
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_9 != null) {
return `str-expr-start`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
comment_sl: ˆp_10
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_10 != null) {
return `comment-sl`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
comment_ml: ˆp_11
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_11 != null) {
return `comment-ml`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
ignorable: ˆp_12
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_12 != null) {
return `ignorable`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
prefix_neg: ˆp_13
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_13 != null) {
return `prefix_neg`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
ident: ˆp_14
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_14 != null) {
return `ident`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
number: ˆp_15
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_15 != null) {
return `number`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
error: ˆp_16
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_16 != null) {
return `error`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
end: ˆp_17
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_17 != null) {
return `end`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_elem_start: ˆp_18
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_18 != null) {
return `jsx-elem-start`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_elem_close: ˆp_19
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_19 != null) {
return `jsx-elem-close`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_frag_open: ˆp_20
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_20 != null) {
return `jsx-frag-open`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_frag_close: ˆp_21
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_21 != null) {
return `jsx-elem-close`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_expr_start: ˆp_22
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_22 != null) {
return `jsx-expr-start`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
jsx_text: ˆp_23
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_23 != null) {
return `jsx-text`;
}
}
/* istanbul ignore else */
if (ˆvalue_4 != null) {
const {
overloaded: ˆp_24
} = ˆvalue_4;
/* istanbul ignore else */
if (ˆp_24 != null) {
{
const [[curr_matcher, cc], parent] = matchers;
{
const ˆvalue_25 = value;
/* istanbul ignore else */
if (ˆvalue_25 === `{`) {
{
const ˆvalue_27 = curr_matcher;
/* istanbul ignore else */
if (ˆvalue_27 === jsx_elem_lex) {
return `jsx-expr-start`;
}
{
return value;
}
}
}
/* istanbul ignore else */
if (ˆvalue_25 === `}`) {
{
const [parent_lex] = parent;
{
const ˆvalue_29 = true;
/* istanbul ignore else */
if (ˆvalue_29 === (cc === 0 && parent_lex === str_dq_lex)) {
return `str-expr-end`;
}
/* istanbul ignore else */
if (ˆvalue_29 === (cc === 0 && parent_lex === str_sq_lex)) {
return `str-expr-end`;
}
/* istanbul ignore else */
if (ˆvalue_29 === (cc === 0 && parent_lex === jsx_lex)) {
return `jsx-expr-end`;
}
/* istanbul ignore else */
if (ˆvalue_29 === (cc === 0 && parent_lex === jsx_elem_lex)) {
return `jsx-expr-end`;
}
{
return value;
}
}
}
}
/* istanbul ignore else TODO: cov should be done by loxia */
/* istanbul ignore else */
if (ˆvalue_25 === `>`) {
{
const ˆvalue_31 = curr_matcher;
/* istanbul ignore else */
if (ˆvalue_31 === jsx_elem_lex) {
return `jsx-elem-end`;
}
{
return value;
}
}
}
}
}
}
}
{
return value;
}
};
export const get_next_token = (matchers, code, start) => {
const [[lex]] = matchers;
const code_slice = slice(code, start.pos);
const [matched] = match_all(code_slice, lex);
const [value] = matched;
const type = get_token_type(matched, value, matchers);
const end = get_loc(start, value);
const token = {
type,
value,
loc: {
start,
end
}
};
return [token, end];
};
export const get_next_matchers = (token, matchers) => {
const [curr_matcher, ...parent_matchers] = matchers;
{
const ˆvalue_33 = token;
// {type: 'jsx-elem-close', value: '/>'}: parent_matchers
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_35
} = ˆvalue_33;
// {type: 'jsx-elem-close', value: '/>'}: parent_matchers
/* istanbul ignore else */
if (ˆp_35 === `jsx-elem-close`) {
return parent_matchers;
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_36
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_36 === `jsx-frag-open`) {
return [[jsx_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_37
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_37 === `jsx-elem-start`) {
return [[jsx_elem_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_38
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_38 === `jsx-elem-end`) {
return [[jsx_lex, 0], ...parent_matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_39
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_39 === `jsx-expr-start`) {
return [[fink_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_40
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_40 === `jsx-expr-end`) {
return parent_matchers;
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_41,
value: ˆp_42
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_41 === `str-start`)
/* istanbul ignore else */
if (ˆp_42 === `"`) {
return [[str_dq_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_43,
value: ˆp_44
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_43 === `str-start`)
/* istanbul ignore else */
if (ˆp_44 === `'`) {
return [[str_sq_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_45
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_45 === `str-end`) {
return parent_matchers;
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_46
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_46 === `str-expr-start`) {
return [[fink_lex, 0], ...matchers];
}
}
/* istanbul ignore else */
if (ˆvalue_33 != null) {
const {
type: ˆp_47
} = ˆvalue_33;
/* istanbul ignore else */
if (ˆp_47 === `str-expr-end`) {
return parent_matchers;
}
}
{
return [curr_matcher, ...parent_matchers];
}
}
};
export const update_matcher_state = (token, matchers) => {
const [[curr_m, curr_cc], ...rest] = matchers;
let _do_result3;
ˆmatch_49: {
const ˆvalue_48 = token;
/* istanbul ignore else */
if (ˆvalue_48 != null) {
const {
value: ˆp_50
} = ˆvalue_48;
/* istanbul ignore else */
if (ˆp_50 === `{`) {
_do_result3 = curr_cc + 1;
break ˆmatch_49;
}
}
/* istanbul ignore else */
if (ˆvalue_48 != null) {
const {
value: ˆp_51
} = ˆvalue_48;
/* istanbul ignore else */
if (ˆp_51 === `}`) {
_do_result3 = curr_cc - 1;
break ˆmatch_49;
}
}
{
_do_result3 = curr_cc;
break ˆmatch_49;
}
}
const next_cc = _do_result3;
_do_result3 = undefined;
return [[curr_m, next_cc], ...rest];
};
export const tokenize = (code, start = {
pos: 0,
line: 1,
column: 0
}) => {
const initial_ctx = {
code,
start,
matchers: [[fink_lex, 0], []]
};
{
let ˆpipe_result_52 = undefined;
return ˆpipe_result_52 = unfold_ac((ˆ_53, {
code,
start,
matchers
} = initial_ctx) => {
const [token, next_start] = get_next_token(matchers, code, start);
const curr_matchers = update_matcher_state(token, matchers);
const next_matchers = get_next_matchers(token, curr_matchers);
return [token, {
code,
start: next_start,
matchers: next_matchers
}];
})(ˆpipe_result_52);
}
};
// TODO: should prattler allow registering expr builders for token-type + value
// if so, we could simply test on token type being 'keyword' rather than
// testing type in keywords or just use 'ident' for keywords
export const keywords = [`import`, `fn`, `pipe`, `match`, `else`, `list`, `rec`, `await`, `and`, `or`, `not`, `in`, `true`, `false`];