@gram-data/gram-parse
Version:
parse text into gram data graphs
892 lines (863 loc) • 20.4 kB
JavaScript
import unified from 'unified';
import nearley from 'nearley';
import moo from 'moo';
import { seq, flatten, cons, node, empty as empty$1, pair, propertiesToRecord, property, boolean as boolean$1, string, tagged, integer as integer$1, decimal as decimal$1, hexadecimal as hexadecimal$1, octal as octal$1, measurement as measurement$1 } from '@gram-data/gram-builder';
import { tokens } from '@gram-data/gram-ast';
function _extends() {
_extends = Object.assign || function (target) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (Object.prototype.hasOwnProperty.call(source, key)) {
target[key] = source[key];
}
}
}
return target;
};
return _extends.apply(this, arguments);
}
function id(d) {
return d[0];
}
var lexer = /*#__PURE__*/moo.compile({
whitespace: {
match: /\s+/,
lineBreaks: true
},
lineComment: {
match: /\/\/.*?\n?$/
},
hexadecimal: tokens.hexadecimal,
octal: tokens.octal,
measurement: tokens.measurement,
decimal: tokens.decimal,
integer: tokens.integer,
taggedString: {
match: tokens.taggedString
},
"boolean": ['true', 'TRUE', 'True', 'false', 'FALSE', 'False'],
symbol: tokens.symbol,
identifier: tokens.identifier,
doubleQuotedString: {
match: tokens.doubleQuotedString,
value: function value(s) {
return s.slice(1, -1);
}
},
singleQuotedString: {
match: tokens.singleQuotedString,
value: function value(s) {
return s.slice(1, -1);
}
},
tickedString: {
match: tokens.tickedString,
value: function value(s) {
return s.slice(1, -1);
}
},
'-->': '-->',
'--': '--',
'<--': '<--',
'-[]->': '-[]->',
'-[]-': '-[]-',
'<-[]-': '<-[]-',
'<-[': '<-[',
']->': ']->',
'-[': '-[',
']-': ']-',
'{': '{',
'}': '}',
'[': '[',
']': ']',
'(': '(',
')': ')',
',': ',',
':': ':',
'`': '`',
"'": "'",
ø: 'ø'
});
var empty = function empty() {
return null;
};
var text = function text(_ref) {
var token = _ref[0];
return token.text;
};
/*
# function extractPairs(pairGroups:Array<any>) {
# return pairGroups.map((pairGroup:Array<any>) => {
# return pairGroup[3];
# })
# }
# function extractArray(valueGroups:Array<any>):Array<any> {
# return valueGroups.map( (valueGroup) => valueGroup[3]);
# }
*/
function separateTagFromString(taggedStringValue) {
var valueParts = taggedStringValue.match(/([^`]+)`(.+)`$/);
if (valueParts === null || valueParts === undefined) throw Error("Malformed tagged string: " + taggedStringValue);
return {
tag: valueParts[1],
value: valueParts[2]
};
}
function separateNumberFromUnits(measurementValue) {
var valueParts = measurementValue.match(/(-?[0-9.]+)([a-zA-Z]+)/);
if (valueParts === null || valueParts === undefined) throw Error("Malformed measurement : " + measurementValue);
return {
value: valueParts[1],
unit: valueParts[2]
};
}
var grammar = {
Lexer: lexer,
ParserRules: [{
name: 'GramSeq$ebnf$1$subexpression$1',
symbols: ['Path'],
postprocess: function postprocess(_ref2) {
var pp = _ref2[0];
return pp;
}
}, {
name: 'GramSeq$ebnf$1',
symbols: ['GramSeq$ebnf$1$subexpression$1']
}, {
name: 'GramSeq$ebnf$1$subexpression$2',
symbols: ['Path'],
postprocess: function postprocess(_ref3) {
var pp = _ref3[0];
return pp;
}
}, {
name: 'GramSeq$ebnf$1',
symbols: ['GramSeq$ebnf$1', 'GramSeq$ebnf$1$subexpression$2'],
postprocess: function postprocess(d) {
return d[0].concat([d[1]]);
}
}, {
name: 'GramSeq',
symbols: ['_', 'GramSeq$ebnf$1'],
postprocess: function postprocess(_ref4) {
var pp = _ref4[1];
return seq(flatten(pp));
}
}, {
name: 'Path',
symbols: ['NodePattern'],
postprocess: id
}, {
name: 'Path',
symbols: ['PathComposition'],
postprocess: id
}, {
name: 'Path',
symbols: ['PathPair'],
postprocess: id
}, {
name: 'NodePattern',
symbols: ['Node', '_', 'Edge', '_', 'NodePattern'],
postprocess: function postprocess(_ref5) {
var n = _ref5[0],
es = _ref5[2],
np = _ref5[4];
return cons([n, np], {
kind: es.kind,
id: es.id,
labels: es.labels,
record: es.record
});
}
}, {
name: 'NodePattern',
symbols: ['Node'],
postprocess: id
}, {
name: 'Node$ebnf$1',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'Node$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Node',
symbols: [{
literal: '('
}, '_', 'Node$ebnf$1', {
literal: ')'
}, '_'],
postprocess: function postprocess(_ref6) {
var attrs = _ref6[2];
return attrs ? node(attrs.id, attrs.labels, attrs.record) : node();
}
}, {
name: 'Edge$ebnf$1',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'Edge$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Edge',
symbols: [{
literal: '-['
}, '_', 'Edge$ebnf$1', {
literal: ']->'
}, '_'],
postprocess: function postprocess(_ref7) {
var attrs = _ref7[2];
return _extends({
kind: 'right'
}, attrs);
}
}, {
name: 'Edge$ebnf$2',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'Edge$ebnf$2',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Edge',
symbols: [{
literal: '-['
}, '_', 'Edge$ebnf$2', {
literal: ']-'
}, '_'],
postprocess: function postprocess(_ref8) {
var attrs = _ref8[2];
return _extends({
kind: 'either'
}, attrs);
}
}, {
name: 'Edge$ebnf$3',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'Edge$ebnf$3',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Edge',
symbols: [{
literal: '<-['
}, '_', 'Edge$ebnf$3', {
literal: ']-'
}, '_'],
postprocess: function postprocess(_ref9) {
var attrs = _ref9[2];
return _extends({
kind: 'left'
}, attrs);
}
}, {
name: 'Edge',
symbols: [{
literal: '-[]->'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'right'
};
}
}, {
name: 'Edge',
symbols: [{
literal: '-[]-'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'either'
};
}
}, {
name: 'Edge',
symbols: [{
literal: '<-[]-'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'left'
};
}
}, {
name: 'Edge',
symbols: [{
literal: '-->'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'right'
};
}
}, {
name: 'Edge',
symbols: [{
literal: '--'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'either'
};
}
}, {
name: 'Edge',
symbols: [{
literal: '<--'
}, '_'],
postprocess: function postprocess() {
return {
kind: 'left'
};
}
}, {
name: 'PathComposition',
symbols: ['PathPoint'],
postprocess: id
}, {
name: 'PathComposition',
symbols: ['PathAnnotation'],
postprocess: id
}, {
name: 'PathComposition',
symbols: ['PathExpression'],
postprocess: id
}, {
name: 'PathPoint$ebnf$1',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'PathPoint$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'PathPoint',
symbols: [{
literal: '['
}, '_', 'PathPoint$ebnf$1', {
literal: ']'
}, '_'],
postprocess: function postprocess(_ref10) {
var attr = _ref10[2];
if (attr && (attr.id || attr.labels || attr.record) && attr.id !== 'ø') {
// console.log(attr);
return node(attr.id, attr.labels, attr.record);
} else {
return empty$1();
}
}
}, {
name: 'PathAnnotation$ebnf$1',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'PathAnnotation$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'PathAnnotation',
symbols: [{
literal: '['
}, '_', 'PathAnnotation$ebnf$1', 'Path', {
literal: ']'
}, '_'],
postprocess: function postprocess(_ref11) {
var attr = _ref11[2],
lhs = _ref11[3];
// console.log('annotate()', lhs)
return cons([lhs], attr ? {
id: attr.id,
labels: attr.labels,
record: attr.record
} : {});
}
}, {
name: 'PathExpression$ebnf$1',
symbols: ['Attributes'],
postprocess: id
}, {
name: 'PathExpression$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'PathExpression$ebnf$2',
symbols: ['Kind'],
postprocess: id
}, {
name: 'PathExpression$ebnf$2',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'PathExpression',
symbols: [{
literal: '['
}, '_', 'PathExpression$ebnf$1', 'PathExpression$ebnf$2', 'Path', 'Path', {
literal: ']'
}, '_'],
postprocess: function postprocess(_ref12) {
var attrs = _ref12[2],
kind = _ref12[3],
lhs = _ref12[4],
rhs = _ref12[5];
return cons([lhs, rhs], {
kind: kind,
id: attrs.id,
labels: attrs.labels,
record: attrs.record
});
}
}, {
name: 'PathPair$subexpression$1',
symbols: ['NodePattern']
}, {
name: 'PathPair$subexpression$1',
symbols: ['PathComposition']
}, {
name: 'PathPair',
symbols: ['PathPair$subexpression$1', {
literal: ','
}, '_', 'Path'],
postprocess: function postprocess(_ref13) {
var lp = _ref13[0],
rp = _ref13[3];
return pair([lp[0], rp]);
}
}, {
name: 'Kind',
symbols: [{
literal: ','
}, '_'],
postprocess: function postprocess() {
return 'pair';
}
}, {
name: 'Kind',
symbols: [{
literal: '-->'
}, '_'],
postprocess: function postprocess() {
return 'right';
}
}, {
name: 'Kind',
symbols: [{
literal: '--'
}, '_'],
postprocess: function postprocess() {
return 'either';
}
}, {
name: 'Kind',
symbols: [{
literal: '<--'
}, '_'],
postprocess: function postprocess() {
return 'left';
}
}, {
name: 'Attributes$ebnf$1',
symbols: ['Identity'],
postprocess: id
}, {
name: 'Attributes$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Attributes$ebnf$2',
symbols: ['LabelList'],
postprocess: id
}, {
name: 'Attributes$ebnf$2',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Attributes$ebnf$3',
symbols: ['Record'],
postprocess: id
}, {
name: 'Attributes$ebnf$3',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: 'Attributes',
symbols: ['Attributes$ebnf$1', 'Attributes$ebnf$2', 'Attributes$ebnf$3'],
postprocess: function postprocess(d, _, reject) {
var id = d[0],
labels = d[1],
record = d[2];
if (id || labels || record) {
return {
id: id,
labels: labels,
record: record
};
} else return reject;
}
}, {
name: 'LabelList$ebnf$1',
symbols: ['Label']
}, {
name: 'LabelList$ebnf$1',
symbols: ['LabelList$ebnf$1', 'Label'],
postprocess: function postprocess(d) {
return d[0].concat([d[1]]);
}
}, {
name: 'LabelList',
symbols: ['LabelList$ebnf$1'],
postprocess: function postprocess(_ref14) {
var labels = _ref14[0];
return labels;
}
}, {
name: 'Label',
symbols: [{
literal: ':'
}, 'Symbol'],
postprocess: function postprocess(_ref15) {
var label = _ref15[1];
return label;
}
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('identifier') ? {
type: 'identifier'
} : identifier, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [{
literal: 'ø'
}, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('symbol') ? {
type: 'symbol'
} : symbol, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('integer') ? {
type: 'integer'
} : integer, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('octal') ? {
type: 'octal'
} : octal, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('hexadecimal') ? {
type: 'hexadecimal'
} : hexadecimal, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('measurement') ? {
type: 'measurement'
} : measurement, '_'],
postprocess: text
}, {
name: 'Identity',
symbols: [/*#__PURE__*/lexer.has('tickedString') ? {
type: 'tickedString'
} : tickedString, '_'],
postprocess: function postprocess(_ref16) {
var t = _ref16[0];
return t.text.slice(1, -1);
}
}, {
name: 'Symbol',
symbols: [/*#__PURE__*/lexer.has('symbol') ? {
type: 'symbol'
} : symbol, '_'],
postprocess: text
}, {
name: 'Symbol',
symbols: [/*#__PURE__*/lexer.has('tickedString') ? {
type: 'tickedString'
} : tickedString, '_'],
postprocess: function postprocess(_ref17) {
var t = _ref17[0];
return t.text.slice(1, -1);
}
}, {
name: 'Record',
symbols: [{
literal: '{'
}, '_', {
literal: '}'
}, '_'],
postprocess: empty
}, {
name: 'Record$ebnf$1',
symbols: []
}, {
name: 'Record$ebnf$1$subexpression$1',
symbols: [{
literal: ','
}, '_', 'Property'],
postprocess: function postprocess(_ref18) {
var p = _ref18[2];
return p;
}
}, {
name: 'Record$ebnf$1',
symbols: ['Record$ebnf$1', 'Record$ebnf$1$subexpression$1'],
postprocess: function postprocess(d) {
return d[0].concat([d[1]]);
}
}, {
name: 'Record',
symbols: [{
literal: '{'
}, '_', 'Property', 'Record$ebnf$1', {
literal: '}'
}, '_'],
postprocess: function postprocess(_ref19) {
var p = _ref19[2],
ps = _ref19[3];
return propertiesToRecord([p].concat(ps));
}
}, {
name: 'Property',
symbols: ['Symbol', {
literal: ':'
}, '_', 'Value'],
postprocess: function postprocess(_ref20) {
var k = _ref20[0],
v = _ref20[3];
return property(k, v);
}
}, {
name: 'Value',
symbols: ['StringLiteral', '_'],
postprocess: id
}, {
name: 'Value',
symbols: ['NumericLiteral', '_'],
postprocess: id
}, {
name: 'Value',
symbols: [/*#__PURE__*/lexer.has('boolean') ? {
type: 'boolean'
} : boolean, '_'],
postprocess: function postprocess(d) {
return boolean$1(JSON.parse(d[0].value.toLowerCase()));
}
}, {
name: 'Value$ebnf$1',
symbols: []
}, {
name: 'Value$ebnf$1$subexpression$1',
symbols: [{
literal: ','
}, '_', 'Value'],
postprocess: function postprocess(_ref21) {
var v = _ref21[2];
return v;
}
}, {
name: 'Value$ebnf$1',
symbols: ['Value$ebnf$1', 'Value$ebnf$1$subexpression$1'],
postprocess: function postprocess(d) {
return d[0].concat([d[1]]);
}
}, {
name: 'Value',
symbols: [{
literal: '['
}, '_', 'Value', 'Value$ebnf$1', {
literal: ']'
}, '_'],
postprocess: function postprocess(_ref22) {
var v = _ref22[2],
vs = _ref22[3];
return [v].concat(vs);
}
}, {
name: 'StringLiteral',
symbols: [/*#__PURE__*/lexer.has('singleQuotedString') ? {
type: 'singleQuotedString'
} : singleQuotedString],
postprocess: function postprocess(d) {
return string(d[0].value);
}
}, {
name: 'StringLiteral',
symbols: [/*#__PURE__*/lexer.has('doubleQuotedString') ? {
type: 'doubleQuotedString'
} : doubleQuotedString],
postprocess: function postprocess(d) {
return string(d[0].value);
}
}, {
name: 'StringLiteral',
symbols: [/*#__PURE__*/lexer.has('tickedString') ? {
type: 'tickedString'
} : tickedString],
postprocess: function postprocess(d) {
return string(d[0].value);
}
}, {
name: 'StringLiteral',
symbols: [/*#__PURE__*/lexer.has('taggedString') ? {
type: 'taggedString'
} : taggedString],
postprocess: function postprocess(d) {
var parts = separateTagFromString(d[0].value);
return tagged(parts.tag, parts.value);
}
}, {
name: 'NumericLiteral',
symbols: [/*#__PURE__*/lexer.has('integer') ? {
type: 'integer'
} : integer],
postprocess: function postprocess(d) {
return integer$1(d[0].value);
}
}, {
name: 'NumericLiteral',
symbols: [/*#__PURE__*/lexer.has('decimal') ? {
type: 'decimal'
} : decimal],
postprocess: function postprocess(d) {
return decimal$1(d[0].value);
}
}, {
name: 'NumericLiteral',
symbols: [/*#__PURE__*/lexer.has('hexadecimal') ? {
type: 'hexadecimal'
} : hexadecimal],
postprocess: function postprocess(d) {
return hexadecimal$1(d[0].value);
}
}, {
name: 'NumericLiteral',
symbols: [/*#__PURE__*/lexer.has('octal') ? {
type: 'octal'
} : octal],
postprocess: function postprocess(d) {
return octal$1(d[0].value);
}
}, {
name: 'NumericLiteral',
symbols: [/*#__PURE__*/lexer.has('measurement') ? {
type: 'measurement'
} : measurement],
postprocess: function postprocess(d) {
var parts = separateNumberFromUnits(d[0].value);
return measurement$1(parts.unit, parts.value);
}
}, {
name: '_$ebnf$1',
symbols: [/*#__PURE__*/lexer.has('whitespace') ? {
type: 'whitespace'
} : whitespace],
postprocess: id
}, {
name: '_$ebnf$1',
symbols: [],
postprocess: function postprocess() {
return null;
}
}, {
name: '_',
symbols: ['_$ebnf$1'],
postprocess: empty
}, {
name: 'Comment',
symbols: [/*#__PURE__*/lexer.has('lineComment') ? {
type: 'lineComment'
} : lineComment],
postprocess: empty
}, {
name: 'EOL',
symbols: [{
literal: '\n'
}],
postprocess: empty
}],
ParserStart: 'GramSeq'
};
var INCOMPLETE_PARSE = 'Incomplete parse.';
var SYNTAX_ERROR = 'Syntax error at';
var gramErrors = {
__proto__: null,
INCOMPLETE_PARSE: INCOMPLETE_PARSE,
SYNTAX_ERROR: SYNTAX_ERROR
};
var lexerLocation = function lexerLocation(state) {
return {
line: state.line,
column: state.col
};
};
var tokenLocation = function tokenLocation(token) {
return {
line: token.line,
column: token.col
};
};
var parse = function parse(text, file) {
var nearleyParser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
try {
var parsed = nearleyParser.feed(text);
if (parsed.results[0] === undefined && parsed.lexerState) {
var location = lexerLocation(parsed.lexerState);
file.fail(INCOMPLETE_PARSE, location);
}
if (parsed.results.length > 1) {
file.info('[WARN] parsing is ambiguous');
}
return parsed.results[0] || {
type: 'error'
};
} catch (e) {
var _location = e.token ? tokenLocation(e.token) : {
line: 0,
column: 0
};
file.fail(e.message, _location);
}
};
var gramParserPlugin = function gramParserPlugin() {
this.Parser = parse;
};
var toAST = function toAST(src) {
var processor = unified().use(gramParserPlugin).freeze();
return processor.parse(src);
};
export { gramErrors as errors, gramParserPlugin, toAST };
//# sourceMappingURL=gram-parse.esm.js.map