UNPKG

tolkfmt-test-dev

Version:

Code formatter for the Tolk programming language

697 lines (620 loc) 22.1 kB
/** * @file Tolk grammar for tree-sitter * @author TON Blockchain * @license MIT */ /// <reference types="tree-sitter-cli/dsl" /> // @ts-check function commaSep(rule) { return optional(commaSep1(rule)) } function pipeSep1(rule) { return seq(rule, repeat1(seq("|", rule))) } function commaSep1(rule) { return seq(rule, repeat(seq(",", rule))) } function commaSep2(rule) { return seq(rule, repeat1(seq(",", rule))) } const TOLK_GRAMMAR = { source_file: $ => repeat($._top_level_declaration), // ---------------------------------------------------------- // top-level declarations _top_level_declaration: $ => choice( $.tolk_required_version, $.import_directive, $.global_var_declaration, $.constant_declaration, $.type_alias_declaration, $.struct_declaration, $.enum_declaration, $.function_declaration, $.method_declaration, $.get_method_declaration, $.empty_statement, ), tolk_required_version: $ => seq("tolk", field("value", $.version_value)), version_value: $ => /(\d+)(.\d+)?(.\d+)?/, import_directive: $ => seq("import", field("path", $.string_literal)), global_var_declaration: $ => prec.right( seq( optional(field("annotations", $.annotation_list)), "global", field("name", $.identifier), ":", field("type", $._type_hint), optional(";"), ), ), constant_declaration: $ => prec.right( seq( optional(field("annotations", $.annotation_list)), "const", field("name", $.identifier), optional(seq(":", field("type", $._type_hint))), "=", field("value", $._expression), optional(";"), ), ), type_alias_declaration: $ => prec.right( seq( optional(field("annotations", $.annotation_list)), "type", field("name", $.identifier), optional(field("type_parameters", $.type_parameters)), "=", optional("|"), field("underlying_type", choice($._type_hint, $.builtin_specifier)), optional(";"), ), ), struct_declaration: $ => seq( optional(field("annotations", $.annotation_list)), "struct", optional(seq("(", field("pack_prefix", $.number_literal), ")")), field("name", $.identifier), optional(field("type_parameters", $.type_parameters)), optional(field("body", $.struct_body)), ), struct_body: $ => seq( "{", optional( seq( $.struct_field_declaration, repeat(seq(optional(choice(",", ";")), $.struct_field_declaration)), ), ), optional(choice(",", ";")), "}", ), struct_field_declaration: $ => seq( field("modifiers", optional($.struct_field_modifiers)), field("name", $.identifier), ":", field("type", $._type_hint), optional(seq("=", field("default", $._expression))), ), struct_field_modifiers: $ => repeat1(choice("readonly", "private")), enum_declaration: $ => seq( optional(field("annotations", $.annotation_list)), "enum", field("name", $.identifier), optional(seq(":", field("backed_type", $._type_hint))), optional(field("body", $.enum_body)), ), enum_body: $ => seq( "{", optional( seq( $.enum_member_declaration, repeat(seq(optional(","), $.enum_member_declaration)), ), ), optional(","), "}", ), enum_member_declaration: $ => seq(field("name", $.identifier), optional(seq("=", field("default", $._expression)))), // ---------------------------------------------------------- // functions and their body _function_body: $ => choice( field("body", $.block_statement), field("asm_body", $.asm_body), field("builtin_specifier", $.builtin_specifier), ), function_declaration: $ => seq( optional(field("annotations", $.annotation_list)), "fun", field("name", $.identifier), optional(field("type_parameters", $.type_parameters)), optional(field("parameters", $.parameter_list)), optional(seq(":", field("return_type", optional($._type_hint)))), optional($._function_body), ), method_receiver: $ => seq(field("receiver_type", $._type_hint), "."), method_declaration: $ => seq( optional(field("annotations", $.annotation_list)), "fun", field("receiver", $.method_receiver), field("name", $.identifier), optional(field("type_parameters", $.type_parameters)), optional(field("parameters", $.parameter_list)), optional(seq(":", field("return_type", optional($._type_hint)))), optional($._function_body), ), get_method_declaration: $ => seq( optional(field("annotations", $.annotation_list)), "get", optional("fun"), field("name", $.identifier), optional(field("parameters", $.parameter_list)), optional(seq(":", field("return_type", optional($._type_hint)))), optional(field("body", $._function_body)), ), annotation_list: $ => repeat1($.annotation), annotation: $ => seq( "@", optional(field("name", $.identifier)), optional(field("arguments", $.annotation_arguments)), ), annotation_arguments: $ => seq("(", commaSep($._expression), optional(","), ")"), type_parameters: $ => seq("<", commaSep($.type_parameter), optional(","), ">"), type_parameter: $ => seq(field("name", $.identifier), optional(seq("=", field("default", $._type_hint)))), parameter_list: $ => seq("(", commaSep($.parameter_declaration), optional(","), ")"), parameter_declaration: $ => seq( field("mutate", optional("mutate")), field("name", $.identifier), optional(seq(":", field("type", $._type_hint))), optional(seq("=", field("default", $._expression))), ), asm_body: $ => prec.right( seq("asm", optional($.asm_body_rearrange), repeat1($.string_literal), optional(";")), ), asm_body_rearrange: $ => seq( "(", optional(field("params", $.asm_body_rearrange_params)), optional(field("return", $.asm_body_rearrange_return)), ")", ), asm_body_rearrange_params: $ => repeat1($.identifier), asm_body_rearrange_return: $ => seq("->", repeat($.number_literal)), builtin_specifier: $ => "builtin", // ---------------------------------------------------------- // statements _statement_ending_with_brace: $ => choice( $.block_statement, $.if_statement, $.while_statement, $.repeat_statement, $.try_catch_statement, $.empty_statement, $.match_statement, ), _statement_require_semicolon_unless_last: $ => choice( $.local_vars_declaration, $.return_statement, $.do_while_statement, $.break_statement, $.continue_statement, $.throw_statement, $.assert_statement, $.expression_statement, ), _statement: $ => choice( $._statement_ending_with_brace, prec.right(seq($._statement_require_semicolon_unless_last, optional(";"))), ), local_vars_declaration: $ => seq( field("kind", choice("var", "val")), field("lhs", $._var_declaration_lhs), optional(seq("=", field("assigned_val", $._expression))), ), tuple_vars_declaration: $ => seq("[", field("vars", commaSep1($._var_declaration_lhs)), optional(","), "]"), tensor_vars_declaration: $ => seq("(", field("vars", commaSep1($._var_declaration_lhs)), optional(","), ")"), var_declaration: $ => seq( field("name", $.identifier), optional(choice(seq(":", field("type", $._type_hint)), field("redef", "redef"))), ), _var_declaration_lhs: $ => choice($.tuple_vars_declaration, $.tensor_vars_declaration, $.var_declaration), block_statement: $ => prec.dynamic( 100, seq( "{", repeat($._statement), optional($._statement_require_semicolon_unless_last), "}", ), ), return_statement: $ => prec.right(seq("return", optional(field("body", $._expression)))), repeat_statement: $ => seq("repeat", "(", field("count", $._expression), ")", field("body", $.block_statement)), if_statement: $ => prec.right( seq( "if", "(", field("condition", $._expression), ")", optional(field("body", $.block_statement)), optional( choice( seq("else", field("alternative", $.if_statement)), seq("else", field("alternative", $.block_statement)), ), ), ), ), do_while_statement: $ => seq( "do", field("body", $.block_statement), "while", "(", field("condition", $._expression), ")", ), while_statement: $ => seq("while", "(", field("condition", $._expression), ")", field("body", $.block_statement)), break_statement: $ => "break", continue_statement: $ => "continue", throw_statement: $ => seq( "throw", field("excNo", $._expression), // excNo, (excNo), (excNo, arg); but (1,2,3) will be also acceptable ), assert_statement: $ => seq( "assert", choice( seq( "(", field("condition", $._expression), ")", "throw", field("excNo", $._expression), ), seq( "(", field("condition", $._expression), ",", field("excNo", $._expression), ")", ), ), ), catch_clause: $ => seq( optional( seq( "(", field("catch_var1", $.identifier), optional(seq(",", field("catch_var2", $.identifier))), ")", ), ), field("catch_body", $.block_statement), ), try_catch_statement: $ => seq("try", field("try_body", $.block_statement), "catch", field("catch", $.catch_clause)), empty_statement: $ => ";", expression_statement: $ => $._expression, // ---------------------------------------------------------- // expressions _expression: $ => choice( $.assignment, $.set_assignment, $.ternary_operator, $.binary_operator, $.unary_operator, $.lazy_expression, $.cast_as_operator, $.is_type_operator, $.not_null_operator, $.dot_access, $.function_call, $.generic_instantiation, $.parenthesized_expression, $.match_expression, $.object_literal, $.tensor_expression, $.typed_tuple, $.lambda_expression, $.number_literal, $.string_literal, $.boolean_literal, $.null_literal, $.underscore, $.identifier, ), assignment: $ => prec.right(10, seq(field("left", $._expression), "=", field("right", $._expression))), set_assignment: $ => prec.right( 10, seq( field("left", $._expression), field( "operator_name", choice("+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^="), ), field("right", $._expression), ), ), ternary_operator: $ => prec.right( 10, seq( field("condition", $._expression), "?", field("consequence", $._expression), ":", field("alternative", $._expression), ), ), _brackets_lt_gt: _ => choice("<", ">"), // extracted specially to resolve conflicts between `<` and `f<int>` _comparison_lt_gt: $ => prec.left(15, seq($._expression, field("operator_name", $._brackets_lt_gt), $._expression)), binary_operator: $ => choice( prec.left( 13, seq($._expression, field("operator_name", choice("&&", "||")), $._expression), ), prec.left( 14, seq($._expression, field("operator_name", choice("&", "|", "^")), $._expression), ), prec.left( 15, seq( $._expression, field("operator_name", choice("==", "!=", "<=", ">=", "<=>")), $._expression, ), ), $._comparison_lt_gt, prec.left( 17, seq( $._expression, field("operator_name", choice("<<", ">>", "~>>", "^>>")), $._expression, ), ), prec.left( 20, seq($._expression, field("operator_name", choice("-", "+")), $._expression), ), prec.left( 30, seq( $._expression, field("operator_name", choice("*", "/", "%", "~/", "^/")), $._expression, ), ), ), unary_operator: $ => choice( prec.left( 75, seq( field("operator_name", choice("!", "~", "-", "+")), field("argument", $._expression), ), ), ), lazy_expression: $ => choice(prec.right(5, seq("lazy", field("argument", $._expression)))), cast_as_operator: $ => prec(40, seq(field("expr", $._expression), "as", field("casted_to", $._type_hint))), is_type_operator: $ => prec( 40, seq( field("expr", $._expression), field("operator", choice("is", "!is")), field("rhs_type", $._type_hint), ), ), dot_access: $ => prec( 80, seq( field("obj", $._expression), ".", field("field", choice($.identifier, $.numeric_index)), // for method call, dot_access is wrapped into function_call, "field" actually means method name ), ), not_null_operator: $ => prec(90, seq(field("inner", $._expression), "!")), function_call: $ => prec.left( 90, seq( field("callee", $._expression), // callee can be generic_instantiation or dot_access field("arguments", $.argument_list), ), ), argument_list: $ => seq("(", commaSep($.call_argument), optional(","), ")"), call_argument: $ => seq(optional("mutate"), field("expr", $._expression)), generic_instantiation: $ => prec( 10, seq(field("expr", $._expression), field("instantiationTs", $.instantiationT_list)), ), instantiationT_list: $ => prec.dynamic( 1, seq( // prec.dynamic is important "<", field("types", commaSep1($._type_hint)), ">", ), ), match_statement: $ => prec(99, $.match_expression), match_expression: $ => prec.right( seq( "match", "(", field("expr", choice($._expression, $.local_vars_declaration)), ")", optional(field("body", $.match_body)), ), ), match_body: $ => seq("{", optional(repeat1($.match_arm)), "}"), match_arm: $ => seq( choice( field("pattern_type", $._type_hint), field("pattern_expr", $._expression), field("pattern_else", "else"), ), "=>", field( "body", choice( field("block", $.block_statement), field("return", $.return_statement), field("throw", $.throw_statement), field("expr", $._expression), ), ), optional(","), // todo now `match (...) { 1 => 1 2 => 2 }` is ok, but actually comma is required ), object_literal: $ => prec.dynamic( 99, seq(optional(field("type", $._type_hint)), field("arguments", $.object_literal_body)), ), object_literal_body: $ => seq("{", commaSep($.instance_argument), optional(","), "}"), instance_argument: $ => seq( field("name", $.identifier), optional(seq(":", optional(field("value", $._expression)))), ), parenthesized_expression: $ => seq("(", field("inner", $._expression), optional(","), ")"), tensor_expression: $ => choice(seq("(", ")"), seq("(", commaSep2($._expression), optional(","), ")")), typed_tuple: $ => seq("[", commaSep($._expression), optional(","), "]"), lambda_expression: $ => prec.right( seq( "fun", field("parameters", $.parameter_list), optional(seq(":", field("return_type", optional($._type_hint)))), optional(field("body", $._function_body)), ), ), // ---------------------------------------------------------- // type system _type_hint: $ => prec.dynamic( 100, choice( alias($.identifier, $.type_identifier), $.type_instantiatedTs, $.tensor_type, $.tuple_type, $.parenthesized_type, $.fun_callable_type, $.nullable_type, $.union_type, $.null_literal, ), ), type_instantiatedTs: $ => prec.dynamic( 104, seq( field("name", alias($.identifier, $.type_identifier)), field("arguments", $.instantiationT_list), ), ), tensor_type: $ => prec.dynamic(103, choice(seq("(", ")"), seq("(", commaSep2($._type_hint), ")"))), tuple_type: $ => prec(103, seq("[", commaSep($._type_hint), "]")), parenthesized_type: $ => prec(103, seq("(", field("inner", $._type_hint), ")")), fun_callable_type: $ => prec.right( 101, seq(field("param_types", $._type_hint), "->", field("return_type", $._type_hint)), ), nullable_type: $ => prec.right(110, seq(field("inner", $._type_hint), "?")), union_type: $ => prec.right( 102, seq( optional("|"), field("lhs", $._type_hint), "|", field("rhs", $._type_hint), optional("|"), ), ), // ---------------------------------------------------------- // common constructions number_literal: $ => token(choice(seq("0x", /[0-9a-fA-F]+/), seq("0b", /[01]+/), /[0-9]+/)), string_literal: $ => token( choice( seq('"""', repeat(choice(/[^"]/, /"[^"]/, /""[^"]/)), '"""'), /"(?:[^"\\\n]|\\.)*"/, // sing quote ), ), boolean_literal: $ => choice("true", "false"), null_literal: $ => "null", underscore: $ => "_", identifier: $ => /`[^`]+`|[a-zA-Z$_][a-zA-Z0-9$_]*/, numeric_index: $ => /[0-9]+/, // http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890 comment: $ => token(choice(seq("//", /[^\r\n]*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"))), } module.exports = grammar({ name: "tolk", conflicts: $ => [ [$.instantiationT_list, $._brackets_lt_gt], [$._comparison_lt_gt, $.binary_operator, $.generic_instantiation], [$._expression, $.instance_argument], [$._expression, $._type_hint], [$.type_parameter, $._type_hint], [$.type_instantiatedTs, $._type_hint], [$._expression, $.type_instantiatedTs], [$.tensor_type, $.tensor_expression], [$.union_type], [$.block_statement, $.object_literal_body], ], extras: $ => [/\s/, $.comment], word: $ => $.identifier, rules: TOLK_GRAMMAR, })