parsing
Version:
JSON grammar-based parser
963 lines (906 loc) • 39.8 kB
JavaScript
/*
* Parsing - JSON grammar-based parser
* Copyright (c) Dan Phillimore (asmblah)
* http://asmblah.github.com/parsing/
*
* Released under the MIT license
* https://github.com/asmblah/parsing/raw/master/MIT-LICENSE.txt
*/
;
var _ = require('microdash'),
expect = require('chai').expect,
nowdoc = require('nowdoc'),
Parser = require('../../src/Parser');
describe('Parser', function () {
describe('parse()', function () {
function checkGrammarAndTextGeneratesAST(grammarSpec, text, expectedAST) {
var grammarSpecString = JSON.stringify(grammarSpec, function (key, value) {
if (value instanceof RegExp) {
return value.toString();
}
return value;
});
it('should return the correct AST when the grammar spec is ' + grammarSpecString, function () {
var parser = new Parser(grammarSpec);
expect(parser.parse(text)).to.deep.equal(expectedAST);
});
}
function check(scenario) {
checkGrammarAndTextGeneratesAST(scenario.grammarSpec, scenario.text, scenario.expectedAST);
}
function parseAndCheck(scenario) {
var parser = new Parser(scenario.grammarSpec);
expect(parser.parse(scenario.text)).to.deep.equal(scenario.expectedAST);
}
describe('when given a single token and grammar contains only a matching rule', function () {
check({
grammarSpec: {
rules: {
'number': {name: 'value', what: /\d+/}
},
start: 'number'
},
text: '128',
expectedAST: {
name: 'number',
value: '128'
}
});
});
describe('when reused for parsing a different string', function () {
it('should return the correct AST', function () {
var parser = new Parser({
rules: {
'number': {name: 'value', what: /\d+/}
},
start: 'number'
});
parser.parse('128');
expect(parser.parse('321')).to.deep.equal({
name: 'number',
value: '321'
});
});
});
describe('"allOf" qualifier', function () {
check({
grammarSpec: {
rules: {
'add': /\+/,
'expression': [{name: 'left', what: 'number'}, {name: 'operator', what: 'add'}, {name: 'right', what: 'number'}],
'number': /\d+/
},
start: 'expression'
},
text: '128+67',
expectedAST: {
name: 'expression',
left: '128',
operator: '+',
right: '67'
}
});
});
describe('"oneOf" qualifier', function () {
check({
grammarSpec: {
rules: {
'thing': [{name: 'value', oneOf: [(/\d+/), (/\w+/)]}, (/;/)]
},
start: 'thing'
},
text: 'hello;',
expectedAST: {
name: 'thing',
value: 'hello'
}
});
});
describe('whitespace delimiter: "ignore" option', function () {
describe('should be able to skip whitespace', function () {
check({
grammarSpec: {
ignore: 'whitespace',
rules: {
'add': /\+/,
'expression': [{name: 'left', what: 'number'}, {name: 'operator', what: 'add'}, {name: 'right', what: 'number'}],
'number': /\d+/,
'whitespace': /\s+/
},
start: 'expression'
},
text: '321 + 89',
expectedAST: {
name: 'expression',
left: '321',
operator: '+',
right: '89'
}
});
});
it('should be able to skip whitespace at the start of the string when there is an error handler', function () {
function ErrorHandler() {}
ErrorHandler.prototype.handle = function (parseError) {
throw parseError;
};
parseAndCheck({
grammarSpec: {
ignore: 'whitespace',
ErrorHandler: ErrorHandler,
rules: {
'add': /\+/,
'expression': [{name: 'left', what: 'number'}, {
name: 'operator',
what: 'add'
}, {name: 'right', what: 'number'}],
'number': /\d+/,
'whitespace': /\s+/
},
start: 'expression'
},
text: ' 321 + 89',
expectedAST: {
name: 'expression',
left: '321',
operator: '+',
right: '89'
}
});
});
describe('should be overridden when "ignoreWhitespace" arg is set', function () {
check({
grammarSpec: {
ignore: 'whitespace',
rules: {
'add': /\+/,
'expression': [
{name: 'left', what: 'string'},
{name: 'operator', what: 'add'},
{name: 'right', what: 'string'}
],
'string': {what: /[\d\s]+/, ignoreWhitespace: false},
'whitespace': /\s+/
},
start: 'expression'
},
text: '321 + 89',
expectedAST: {
name: 'expression',
left: '321 ',
operator: '+',
// Check that space before number is captured too
right: ' 89'
}
});
});
describe('should be inherited by sub-components when "ignoreWhitespace" arg is false', function () {
check({
grammarSpec: {
ignore: 'whitespace',
rules: {
'add': /\+/,
'expression': [
{name: 'left', what: 'string'},
{name: 'operator', what: 'add'},
{name: 'right', what: 'string', ignoreWhitespace: false}
],
'string': /[\d\s]+/,
'whitespace': /\s+/
},
start: 'expression'
},
text: '321 + 89',
expectedAST: {
name: 'expression',
left: '321 ',
operator: '+',
// Check that space before number is captured too
right: ' 89'
}
});
});
describe('should only be inherited by sub-components until "ignoreWhitespace" arg is true again', function () {
check({
grammarSpec: {
ignore: 'whitespace',
rules: {
'add': /\+/,
'expression': [
{name: 'left', what: 'string'},
{name: 'operator', what: 'add'},
// `ignoreWhitespace` arg here should be overridden by the one on the rule below
{name: 'right', what: 'string', ignoreWhitespace: false}
],
'string': {what: /[\d\s]+/, ignoreWhitespace: true},
'whitespace': /\s+/
},
start: 'expression'
},
text: '321 + 89',
expectedAST: {
name: 'expression',
left: '321 ',
operator: '+',
// Space before number should not be captured, as the `ignoreWhitespace` arg was overridden again
right: '89'
}
});
});
});
describe('to prevent overriding the component\'s owner rule\'s name', function () {
check({
grammarSpec: {
rules: {
'name': 'string',
'string': {
components: {name: 'value', what: /\w+/}
}
},
start: 'name'
},
text: 'hello',
expectedAST: {
// Make sure "string" is the capture name, not "name"
name: 'string',
value: 'hello'
}
});
});
describe('when the index of the capturing group to capture is specified', function () {
check({
grammarSpec: {
rules: {
'expression': [{name: 'left', what: 'string'}, (/\s*\.\s*/), {name: 'right', what: 'string'}],
'string': {
components: [{what: /"([^"]*)"/, captureIndex: 1}]
}
},
start: 'expression'
},
text: '"test" . "world"',
expectedAST: {
// Make sure "string" is the capture name, not "name"
name: 'expression',
left: 'test',
right: 'world'
}
});
});
// Support specifying
describe('"ifNoMatch" option', function () {
describe('when specifying to return a particular component as the result if a component does not match', function () {
check({
grammarSpec: {
rules: {
'thing': {
components: [{name: 'name', what: (/\w+/)}, (/=/), {name: 'value', optionally: (/\w+/)}],
ifNoMatch: {component: 'value', capture: 'name'}
}
},
start: 'thing'
},
text: 'abc=',
expectedAST: 'abc'
});
});
describe('when specifying to return a particular component as the result if a (possibly uncaptured) component does not match', function () {
check({
grammarSpec: {
rules: {
'thing': {
components: [{name: 'name', what: (/\w+/)}, (/=/), {optionally: {name: 'value', what: (/\w+/)}}],
ifNoMatch: {component: 'value', capture: 'name'}
}
},
start: 'thing'
},
text: 'abc=',
expectedAST: 'abc'
});
});
});
describe('"optionally" qualifier', function () {
describe('when "wrapInArray" is not specified and result is empty', function () {
var scenario = {
grammarSpec: {
rules: {
'thing': {
components: [{name: 'first', what: (/ab/)}, {name: 'second', optionally: (/cd/)}]
}
},
start: 'thing'
},
text: 'ab',
expectedAST: {
name: 'thing',
first: 'ab',
// Note the string instead of empty array
second: ''
}
};
check(scenario);
it('should return a string for the "second" component instead of an empty array', function () {
var parser = new Parser(scenario.grammarSpec);
expect(parser.parse(scenario.text).second).to.equal('');
});
});
describe('when "wrapInArray" is specified and result is empty', function () {
var scenario = {
grammarSpec: {
rules: {
'thing': {
components: [{name: 'first', what: (/ab/)}, {name: 'second', optionally: (/cd/), wrapInArray: true}]
}
},
start: 'thing'
},
text: 'ab',
expectedAST: {
name: 'thing',
first: 'ab',
// Note the empty array instead of string
second: []
}
};
check(scenario);
it('should return an array for the "second" component instead of a string', function () {
var parser = new Parser(scenario.grammarSpec);
expect(parser.parse(scenario.text).second).to.be.an('array');
});
});
describe('when "wrapInArray" is specified and result is not empty', function () {
check({
grammarSpec: {
rules: {
'thing': {
components: [{name: 'first', what: (/ab/)}, {name: 'second', optionally: (/cd/), wrapInArray: true}]
}
},
start: 'thing'
},
text: 'abcd',
expectedAST: {
name: 'thing',
first: 'ab',
second: ['cd']
}
});
});
describe('"options" option', function () {
check({
grammarSpec: {
rules: {
'increment': {
components: [{name: 'operator', what: (/\+\+/)}, {name: 'operand', what: (/\$\w+/)}],
options: {fun: true}
}
},
start: 'increment'
},
text: '++$a',
expectedAST: {
name: 'increment',
fun: true,
operand: '$a',
operator: '++'
}
});
});
});
describe('"what" qualifier', function () {
describe('"replace" arg', function () {
var parser;
_.each({
'replacing one character with another character when "what" arg is a regex': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'operator': /\+/,
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'operator'}, {name: 'right', what: (/\d+/), replace: [{
pattern: /^2$/,
replacement: 't'
}]}]
}
},
start: 'expression'
},
text: '1 + 2',
expectedAST: {
name: 'expression',
left: '1',
operator: '+',
right: 't'
}
},
'replacing one character with another character when "what" arg is a rule reference': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'operator': /\+/,
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'operator'}, {name: 'right', what: 'number', replace: [{
pattern: /^2$/,
replacement: 't'
}]}]
}
},
start: 'expression'
},
text: '1 + 2',
expectedAST: {
name: 'expression',
left: '1',
operator: '+',
right: 't'
}
}
}, function (scenario, description) {
describe(description, function () {
beforeEach(function () {
parser = new Parser(scenario.grammarSpec);
});
it('should return the correct AST when the text is "' + scenario.text + '"', function () {
expect(parser.parse(scenario.text)).to.deep.equal(scenario.expectedAST);
});
});
});
});
});
describe('"allowMerge" arg', function () {
var parser;
_.each({
'when "what" arg is a rule reference': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'add_operator': {
components: {what: /\+/, allowMerge: false}
},
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'add_operator'}, {name: 'right', what: 'number'}]
}
},
start: 'expression'
},
text: '1 + 2',
expectedAST: {
name: 'expression',
left: '1',
operator: {name: 'add_operator'},
right: '2'
}
}
}, function (scenario, description) {
describe(description, function () {
beforeEach(function () {
parser = new Parser(scenario.grammarSpec);
});
it('should return the correct AST when the text is "' + scenario.text + '"', function () {
expect(parser.parse(scenario.text)).to.deep.equal(scenario.expectedAST);
});
});
});
});
describe('"captureBoundsAs" arg', function () {
var parser;
_.each({
'when "what" arg is a rule reference and there are two earlier blank lines': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'operator': /\+/,
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'operator'}, {name: 'right', what: 'number', captureBoundsAs: 'capturedRightBounds'}]
}
},
start: 'expression'
},
text: '\n\n1 + 2',
expectedAST: {
name: 'expression',
left: '1',
operator: '+',
right: '2',
capturedRightBounds: {
start: {
offset: 6,
line: 3,
column: 5,
},
end: {
offset: 7,
line: 3,
column: 6
}
}
}
},
'when "what" arg text is not captured, only its bounds': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'operator': /\+/,
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'operator'}, {what: 'number', captureBoundsAs: 'capturedRightBounds'}]
}
},
start: 'expression'
},
text: '\n\n1 + 2',
expectedAST: {
name: 'expression',
left: '1',
operator: '+',
capturedRightBounds: {
start: {
offset: 6,
line: 3,
column: 5,
},
end: {
offset: 7,
line: 3,
column: 6
}
}
}
},
'with longer match': {
grammarSpec: {
ignore: 'whitespace',
rules: {
'operator': /\+/,
'number': /\d+(?:\.\d+)?/,
'whitespace': /\s+/,
'expression': {
components: [{name: 'left', what: 'number'}, {name: 'operator', what: 'operator'}, {what: 'number', captureBoundsAs: 'capturedRightBounds'}]
}
},
start: 'expression'
},
text: '\n\n126 + 24123',
expectedAST: {
name: 'expression',
left: '126',
operator: '+',
capturedRightBounds: {
start: {
offset: 8,
line: 3,
column: 7,
},
end: {
offset: 13,
line: 3,
column: 12
}
}
}
}
}, function (scenario, description) {
describe(description, function () {
beforeEach(function () {
parser = new Parser(scenario.grammarSpec);
});
it('should return the correct AST when the text is "' + scenario.text + '"', function () {
expect(parser.parse(scenario.text)).to.deep.equal(scenario.expectedAST);
});
});
});
});
describe('when using grammar spec #1', function () {
var grammarSpec,
parser;
beforeEach(function () {
/*
* Based on this EBNF grammar
* - from http://stackoverflow.com/questions/6805172/how-do-you-abstract-some-expression-to-bnf#answer-6805185
*
* AEXP => AS+
* AS => id ':=' EX1 ';'
* EX1 => EX2 (('+' | '-') EX2)*
* EX2 => EX3 (('*' | '/') EX3)*
* EX3 => EX4 ('^' EX3)*
* EX4 => ('+'|'-')? EX5
* EX5 => id | number | '(' EX1 ')'
*/
grammarSpec = {
ignore: 'whitespace',
rules: {
'assign': /:=/,
'character': /[;*\/^+-]/,
'id': /[\w$][\w\d$]*/,
'number': /\d(?:\.\d+)?/,
'whitespace': /\s+/,
'AEXP': {
components: {name: 'assignment', oneOrMoreOf: 'AS'}
},
'AS': {
components: [{name: 'target', what: 'id'}, 'assign', {name: 'expression', what: 'EX1'}, {'character': ';'}]
},
'EX1': {
captureAs: 'EX',
components: [{name: 'left', what: 'EX2'}, {name: 'right', zeroOrMoreOf: [{name: 'operator', oneOf: [{'character': '+'}, {'character': '-'}]}, {name: 'operand', what: 'EX2'}]}],
ifNoMatch: {component: 'right', capture: 'left'}
},
'EX2': {
captureAs: 'EX',
components: [{name: 'left', what: 'EX3'}, {name: 'right', zeroOrMoreOf: [{name: 'operator', oneOf: [{'character': '*'}, {'character': '/'}]}, {name: 'operand', what: 'EX3'}]}],
ifNoMatch: {component: 'right', capture: 'left'}
},
'EX3': {
captureAs: 'EX',
components: [{name: 'left', what: 'EX4'}, {name: 'right', zeroOrMoreOf: [{name: 'operator', what: {'character': '^'}}, {name: 'operand', rule: 'EX3'}]}],
ifNoMatch: {component: 'right', capture: 'left'}
},
'EX4': {
captureAs: 'EX',
components: [{name: 'operator', optionally: {oneOf: [{'character': '+'}, {'character': '-'}]}}, {name: 'operand', what: 'EX5'}],
ifNoMatch: {component: 'operator', capture: 'operand'}
},
'EX5': {
components: [{oneOf: ['id', 'number', [{'character': '('}, 'EX1', {'character': ')'}]]}]
}
},
start: 'AEXP'
};
parser = new Parser(grammarSpec);
});
_.each([
{
text: 'waldo:=1;',
expectedAST: {
name: 'AEXP',
assignment: [{
name: 'AS',
target: 'waldo',
expression: '1'
}]
}
},
{
text: 'waldo:=2+3;',
expectedAST: {
name: 'AEXP',
assignment: [{
name: 'AS',
target: 'waldo',
expression: {
name: 'EX',
left: '2',
right: [{
operator: '+',
operand: '3'
}]
}
}]
}
},
{
// Precedence is equivalent to "waldo := (fern + (alpha / ((-beta) ^ gamma)));"
text: 'waldo := fern + alpha / -beta ^ gamma;',
expectedAST: {
name: 'AEXP',
assignment: [{
name: 'AS',
target: 'waldo',
expression: {
name: 'EX',
left: 'fern',
right: [{
operator: '+',
operand: {
name: 'EX',
left: 'alpha',
right: [{
operator: '/',
operand: {
name: 'EX',
left: {
name: 'EX',
operator: '-',
operand: 'beta'
},
right: [{
operator: '^',
operand: 'gamma'
}]
}
}]
}
}]
}
}]
}
}
], function (scenario) {
it('should return the correct AST when the text is "' + scenario.text + '"', function () {
expect(parser.parse(scenario.text)).to.deep.equal(scenario.expectedAST);
});
});
});
describe('custom start rule', function () {
it('should support specifying a different rule to start parsing from', function () {
var grammarSpec = {
ignore: 'whitespace',
rules: {
'go_statement': {
components: [{what: /go/, allowMerge: false}]
},
'end_statement': {
components: [{what: /end/, allowMerge: false}]
},
'whitespace': /\s+/,
'single_statement': {
components: {oneOf: ['go_statement', 'end_statement']}
},
'statement': {
components: ['single_statement', /;/]
},
'detached_statement': {
components: [{what: /detached/, allowMerge: false}]
},
'program': {
components: {name: 'statements', zeroOrMoreOf: 'statement'}
}
},
start: 'program'
},
parser = new Parser(grammarSpec);
expect(parser.parse('detached', {}, 'detached_statement')).to.deep.equal({
name: 'detached_statement'
});
});
});
describe('capture all bounds option', function () {
it('should support capturing bounds for every AST node with a complex grammar', function () {
var grammarSpec = {
ignore: 'whitespace',
rules: {
'go_statement': {
components: [{what: /go/, allowMerge: false}]
},
'do_something_statement': {
components: [
{what: /do_something_custom/, allowMerge: false},
{name: 'thing', rule: 'do_thing_arg'}
]
},
'some_identifier': {
components: [(/\w/), (/\w+/)] // Multiple components just for testing concatenation
},
'do_thing_arg': {
components: {name: 'fallen_back_identifier', rule: 'with_fallback'}
},
'with_fallback': {
components: [{name: 'something_that_wont_match', optionally: /AAAA/}, {name: 'identifier', rule: 'some_identifier'}],
ifNoMatch: {component: 'something_that_wont_match', capture: 'identifier'}
},
'end_statement': {
components: [{what: /end/, allowMerge: false}],
processor: function () {
return {
name: 'end_statement'
// Ensure `my_bounds` is added to the resulting node,
// even though this processor has ignored it
};
}
},
'whitespace': /\s+/,
'single_statement': {
components: {oneOf: ['go_statement', 'do_something_statement', 'end_statement']}
},
'statement': {
components: ['single_statement', /;/]
},
'program': {
components: {name: 'statements', zeroOrMoreOf: 'statement'}
}
},
start: 'program',
bounds: 'my_bounds'
},
options = {
captureAllBounds: true
},
parser = new Parser(grammarSpec, null, options),
code = nowdoc(function () {/*<<<EOS
go;
do_something_custom
open_it;
end;
EOS
*/;}); //jshint ignore:line
expect(parser.parse(code)).to.deep.equal({
name: 'program',
statements: [
{
name: 'go_statement',
my_bounds: {
start: {
offset: 0,
line: 1,
column: 1
},
end: {
offset: 3,
line: 1,
column: 4
}
}
},
{
name: 'do_something_statement',
thing: {
name: 'do_thing_arg',
fallen_back_identifier: 'open_it',
my_bounds: {
start: {
offset: 37,
line: 5,
column: 10
},
end: {
offset: 44,
line: 5,
column: 17
}
}
},
my_bounds: {
start: {
offset: 8,
line: 4,
column: 3
},
end: {
offset: 45,
line: 5,
column: 18
}
}
},
{
name: 'end_statement',
my_bounds: {
start: {
offset: 52,
line: 8,
column: 5
},
end: {
offset: 56,
line: 8,
column: 9
}
}
}
],
my_bounds: {
start: {
offset: 0,
line: 1,
column: 1
},
end: {
offset: 56,
line: 8,
column: 9
}
}
});
});
});
});
});