tree-sitter-mips
Version:
A syntax parser for the MIPS Instruction Set Architecture.
295 lines (266 loc) • 9.05 kB
JavaScript
/**
* @file A syntax parser for the MIPS Instruction Set Architecture.
* @author Oskar Meyenburg <oskar.meyenburg@gmail.com>
* @license MIT
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
module.exports = grammar({
name: 'mips',
externals: $ => [
$._operand_separator,
$._operator_separator,
$._line_separator,
$._data_separator,
],
extras: $ => [
/[ \t\r]/,
$.comment,
],
conflicts: $ => [
[$.macro_parameters],
[$.control_operands],
[$.integer_operands],
[$.float_operands],
[$._operand, $._expression_argument],
],
rules: {
program: $ => seq(
repeat($._statement),
optional(choice(
$.directive,
$.instruction,
$._label,
)),
optional($.comment),
),
_statement: $ => prec(1, choice(
';',
'\r',
'\n',
choice(
seq($.directive, choice(';', seq(optional($.comment), $._line_separator))),
seq($.instruction, choice(';', seq(optional($.comment), optional('\r'), '\n'))),
),
$._label,
seq($.comment, optional('\r'), '\n'),
)),
comment: $ => /#.*/,
_whitespace: $ => /[ \t]+/,
// TODO: Why is this not just _operand_separator?
_directive_operand_separator: $ => choice(/[ \t]+/, /[ \t]*,[ \t]*/),
directive: $ => seq(choice(
$._macro_directive,
$._integer_directive,
$._float_directive,
$._string_directive,
$._control_directive,
)),
_macro_directive: $ => seq(
field('mnemonic', $.macro_mnemonic),
$._whitespace,
field('name', $.symbol),
optional(choice(
seq(optional($._whitespace), '(', optional(field('parameters', $.macro_parameters)), optional(choice(' ', '\t')), ')'),
seq($._whitespace, field('parameters', $.macro_parameters)),
)),
),
macro_mnemonic: $ => '.macro',
macro_parameters: $ => seq($._macro_parameter, repeat(seq(choice(' ', '\t', seq(optional(choice(' ', '\t')), ',')), $._macro_parameter))),
_macro_parameter: $ => choice(
$.macro_variable,
$.symbol,
),
// Operands can be on multiple lines
_integer_directive: $ => seq(
field('mnemonic', $.integer_mnemonic),
$._whitespace,
field('operands', $.integer_operands),
optional(repeat(choice('\r', '\n', ' ', '\t'))),
),
integer_mnemonic: $ => choice('.word', '.half', '.hword', '.byte', '.dword'),
integer_operands: $ => seq(
$._expression,
repeat(seq(
choice(
' ',
'\t',
/[ \t]*,[ \t]*/,
seq(optional(choice(' ', '\t')), optional($.comment), $._data_separator),
),
$._expression,
)),
optional(repeat($._data_separator)),
),
_float_directive: $ => seq(
field('mnemonic', $.float_mnemonic),
$._whitespace,
field('operands', $.float_operands),
optional(repeat(choice('\r', '\n', ' ', '\t'))),
),
float_mnemonic: $ => choice('.float', '.double'),
float_operands: $ => seq(
$.float,
repeat(seq(
choice(
' ',
'\t',
/[ \t]*,[ \t]*/,
seq(optional(choice(' ', '\t')), optional($.comment), $._data_separator),
),
$.float,
)),
optional(repeat($._data_separator)),
),
_float_operand: $ => choice($.float, $.macro_variable),
_string_directive: $ => seq(
field('mnemonic', $.string_mnemonic),
$._whitespace,
field('string', $._string_operand),
/[ \t]*/,
),
string_mnemonic: $ => choice(
'.ascii',
'.asciiz',
'.string',
),
_string_operand: $ => choice($.string, $.macro_variable),
// Catch-all directive
_control_directive: $ => seq(
field('mnemonic', $.control_mnemonic),
optional(choice(seq(
$._whitespace,
field('operands', $.control_operands),
optional($._directive_operand_separator),
), /[ \t]+/)),
),
control_mnemonic: $ => prec(-1, /\.[a-z_]+/),
control_operands: $ => seq(
$._control_operand,
repeat(seq(
$._directive_operand_separator,
$._control_operand,
)),
),
_control_operand: $ => choice(
$._expression,
$.string,
),
// NOTE: Mars does also allow this: %macro()
instruction: $ => seq(
field('opcode', $.opcode),
optional(choice(
$._call_expression,
seq(
/[ \t]+/,
optional(field('operands', choice(
$.operands,
$._call_expression,
))),
),
)),
),
opcode: $ => token(prec(1, /[a-zA-Z_][a-zA-Z0-9_.]*/)),
operands: $ => seq(
$._operand,
repeat(seq(
choice(',', $._operand_separator),
$._operand,
)),
optional($._operand_separator),
),
_operand: $ => choice(
$.address,
$._expression,
$.float,
$.string,
$.modulo,
),
// Support macro-style calling.
// Examples: `exit(0)`, `for($t0, 0, 3)`
_call_expression: $ => seq('(', optional(field('arguments', $.operands)), ')'),
// Standalone fallback, because it gets in trouble with macro_variable.
// Used as operand in instruction.
// Example: `2 % 5` but not `2%5` or `2% 5`
modulo: $ => token(prec(-1, '%')),
// Matches primitives, registers, macro variables and compound expressions.
// Does not match floats, floats are not accepted in expressions, but only
// as standalone operands or in directives.
// Examples: `1`, `%var + 3`, `(label + 7)`
// TODO: Why does this match registers?
_expression: $ => choice(
$.binary_expression,
$.unary_expression,
$.parenthesized_expression,
$.macro_variable,
$.register,
$.symbol,
$.char,
$.octal,
$.decimal,
$.hexadecimal,
),
binary_expression: $ => choice(
prec.left(1, seq($._left_expression, '||', $._right_expression)),
prec.left(2, seq($._left_expression, '&&', $._right_expression)),
prec.left(3, seq($._left_expression, '|', $._right_expression)),
prec.left(4, seq($._left_expression, '^', $._right_expression)),
prec.left(5, seq($._left_expression, '&', $._right_expression)),
prec.left(6, seq($._left_expression, '==', $._right_expression)),
prec.left(6, seq($._left_expression, '!=', $._right_expression)),
prec.left(7, seq($._left_expression, '<', $._right_expression)),
prec.left(7, seq($._left_expression, '>', $._right_expression)),
prec.left(7, seq($._left_expression, '<=', $._right_expression)),
prec.left(7, seq($._left_expression, '>=', $._right_expression)),
prec.left(8, seq($._left_expression, '<<', $._right_expression)),
prec.left(8, seq($._left_expression, '>>', $._right_expression)),
prec.left(9, seq($._left_expression, '+', $._right_expression)),
prec.left(9, seq($._left_expression, '-', $._right_expression)),
prec.left(10, seq($._left_expression, '*', $._right_expression)),
prec.left(10, seq($._left_expression, '/', $._right_expression)),
prec.left(10, seq($._left_expression, '%', $._right_expression)),
prec.left(20, seq($._left_expression, '=', $._right_expression)),
),
_left_expression: $ => prec(1, seq(field('left', $._expression), optional($._operator_separator))),
_right_expression: $ => field('right', $._expression),
parenthesized_expression: $ => seq('(', $._expression_argument, ')'),
unary_expression: $ => choice(
prec.right(11, seq('-', $._expression_argument)),
prec.right(11, seq('~', $._expression_argument)),
prec.right(11, seq('!', $._expression_argument)),
),
_expression_argument: $ => field('argument', $._expression),
// Primitive data types.
char: $ => /'(?:\\.|[^'\\])'/,
string: $ => /"(?:\\.|[^"\\])*"/,
octal: $ => /-?0[0-7]*/,
decimal: $ => /-?\d+/,
hexadecimal: $ => /-?0[xX][0-9a-fA-F]+/,
float: $ => token(choice(
seq(
choice(/-?\d+\.?\d*/, /-?\d*\.\d+/),
optional(/[eE][+-]?\d+/),
),
/-?\d+[eE][+-]?\d+/,
)),
register: $ => token(seq('$', choice(
'zero', 'at', 'gp', 'sp', 'fp', 'ra',
/[vk][01]/, /[ac][0-3]/, /t[0-9]/, /s[0-8]/,
/f?([12]?[0-9]|3[0-1])/,
))),
// Macro variables can start with percent, dollar and backslash.
// Lower precedence than registers, because they overlap.
macro_variable: $ => /[%$\\][0-9a-zA-Z_:$%\\]+/,
// Bare identifier without colon.
symbol: $ => /[a-zA-Z_][a-zA-Z0-9_]*/,
// identifier with colon.
_label: $ => seq($.label, /[ \t]*/),
label: $ => token(prec(2, /[a-zA-Z_][a-zA-Z0-9_]*:/)),
// Examples: `main($s4)`, `value+4($s1)`, `($v1)`, `-0x10($a0)`
// Cannot match expression-like addresses: main, main+2
address: $ => prec(1, seq(
optional(field('offset', $._expression)),
'(', field('base', choice($.register, $.macro_variable)), ')',
)),
},
});