bnf
Version:
BNF Compiler, parser, and interpreter framework.
375 lines (364 loc) • 10.5 kB
JavaScript
//Some of these more complex rules can be removed if a token can generate a group rule. Once that is added they will be removed!
exports.bnfRules = {
BLANK(){
return true;
},
DIGIT( token ){
if( token.CharCodeRange( 48, 57 ) ){
token.SetChar();
return true;
}
return false;
},
ALPHA( token ){
if( token.CharCodeRange( 65, 90 ) || token.CharCodeRange( 97, 122 ) ){
token.SetChar();
return true;
}
return false;
},
SYMBOL( token ){
if( token.CharIs( 33 )
|| token.CharCodeRange( 35, 38 )
|| token.CharCodeRange( 40, 47 )
|| token.CharCodeRange( 58, 64 )
|| token.CharIs( 91 )
|| token.CharCodeRange( 93, 95 )
|| token.CharCodeRange( 123, 126 ) ){
token.SetChar();
return true;
}
return false;
},
ANYCHAR( token ){
return token.Or( [
token.Rule( "ALPHA" ),
token.Rule( "DIGIT" ),
token.Rule( "SYMBOL" ),
token.Rule( "ONEWSP" )
]);
},
//Should change to range rule @LHF
DIGITS( token ){
return token.Or( [
token.Rule( "DIGIT" ),
( t ) => t.And( [ token.Rule( "DIGIT" ), token.Rule( "DIGITS" ) ] )
]);
},
NUMBER( token ){
return token.Or( [
( t ) => t.And( [ t.Rule( "DIGITS" ), t.Grammar( "LITERAL", "." ), t.Rule( "DIGITS" ) ] ),
( t ) => t.And( [ t.Grammar( "LITERAL", "." ), t.Rule( "DIGITS" ) ] ),
token.Rule( "DIGITS" )
]);
},
TAB( token ){
return token.TryChar( 9 );
},
SPACE( token ){
return token.TryChar( 32 );
},
//Should change to range rule @LHF
WSP( token ){
return token.Or( [
token.Rule( "ONEWSP" ),
( t ) => t.And( [ token.Rule( "ONEWSP" ), token.Rule( "WSP" ) ] )
]);
},
//MIGHT BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
ONEWSP( token ){
return token.Or( [
token.Rule( "SPACE" ),
token.Rule( "TAB" )
]);
},
CR( token ){
return token.TryChar( 13 );
},
LF( token ){
return token.TryChar( 10 );
},
CRLF( token ){
return token.Or( [
( t ) => t.And( [ t.Rule( "CR" ), t.Rule( "LF" ) ] ),
token.Rule( "CR" ),
token.Rule( "LF" )
]);
},
OWSP( token ){
return token.Or( [
token.Rule( "WSP" ), token.Rule( "BLANK" )
]);
},
BLANK( token ){
return true;
},
//MIGHT BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
ONEWSPCRLF( token ){
return token.Or( [
token.Rule( "ONEWSP" ), token.Rule( "CRLF" )
]);
},
//MIGHT BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
ONEORMOREWSP( token ){
return token.Or( [
token.Rule( "ONEWSPCRLF" ),
( t ) => t.And( [ token.Rule( "ONEWSPCRLF" ), token.Rule( "ONEORMOREWSP" ) ] )
]);
},
ANYWSP( token ){
return token.Or( [
token.Rule( "ONEORMOREWSP" ), token.Rule( "BLANK" )
]);
},
ESCAPE( token ){
return token.TryChar( 92 );
},
QUOTE( token ){
return token.TryChar( 34 );
},
SQUOTE( token ){
return token.TryChar( 39 );
},
AQUOTE( token ){
return token.TryChar( 96 );
},
ESCQUOTE( token ){
return token.TryString( Buffer.from( [ 92, 34 ] ) );
},
ESCSQUOTE( token ){
return token.TryString( Buffer.from( [ 92, 39 ] ) );
},
ESCAQUOTE( token ){
return token.TryString( Buffer.from( [ 92, 96 ] ) );
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
SQEANYCHAR( token ){
return token.Or( [
token.Rule( "ESCSQUOTE" ),
token.Rule( "ANYCHAR" ),
token.Rule( "QUOTE" ),
token.Rule( "AQUOTE" ),
token.Rule( "ESCAPE" )
]);
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
SQLITERALCHARS( token ){
return token.Or( [
token.Rule( "SQEANYCHAR" ),
( t ) => t.And( [ token.Rule( "SQEANYCHAR" ), token.Rule( "SQLITERALCHARS" ) ] )
]);
},
SQLITERAL( token ){
return token.And( [
token.Rule( "SQUOTE" ),
token.Rule( "SQLITERALCHARS" ),
token.Rule( "SQUOTE" )
]);
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
QEANYCHAR( token ){
return token.Or( [
token.Rule( "ESCQUOTE" ),
token.Rule( "ANYCHAR" ),
token.Rule( "SQUOTE" ),
token.Rule( "AQUOTE" ),
token.Rule( "ESCAPE" )
]);
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
QLITERALCHARS( token ){
return token.Or( [
token.Rule( "QEANYCHAR" ),
( t ) => t.And( [ token.Rule( "QEANYCHAR" ), token.Rule( "QLITERALCHARS" ) ] )
]);
},
QLITERAL( token ){
return token.And( [
token.Rule( "QUOTE" ),
token.Rule( "QLITERALCHARS" ),
token.Rule( "QUOTE" )
]);
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
AQEANYCHAR( token ){
return token.Or( [
token.Rule( "ESCAQUOTE" ),
token.Rule( "ANYCHAR" ),
token.Rule( "SQUOTE" ),
token.Rule( "QUOTE" ),
token.Rule( "CRLF" ),
token.Rule( "ESCAPE" )
]);
},
//DO BE REMOVED IN THIS MAJOR VERSION! DO NOT USE!
AQLITERALCHARS( token ){
return token.Or( [
token.Rule( "AQEANYCHAR" ),
( t ) => t.And( [ token.Rule( "AQEANYCHAR" ), token.Rule( "AQLITERALCHARS" ) ] )
] );
/*
This should work as the new way of doing an AQLITERALCHAR which can then be moved into AQLITERAL which is planned to remain.
return token.Grammar( "RANGE", {
rule : ( t ) =>
t.Grammar( "GROUP", ( t1 ) =>
t1.Or( [
t1.Rule( "ESCAQUOTE" ),
t1.Rule( "ANYCHAR" ),
t1.Rule( "SQUOTE" ),
t1.Rule( "QUOTE" ),
t1.Rule( "CRLF" ),
t1.Rule( "ESCAPE" )
] )
),
range : [ 1, -1 ]
} );
*/
},
AQLITERAL( token ){
return token.And( [
token.Rule( "AQUOTE" ),
token.Rule( "AQLITERALCHARS" ),
token.Rule( "AQUOTE" )
]);
},
LITERAL( token ){
return token.Or( [
token.Rule( "SQLITERAL" ),
token.Rule( "QLITERAL" )
]);
},
ANYLITERAL( token ){
return token.Or( [
token.Rule( "SQLITERAL" ),
token.Rule( "QLITERAL" ),
token.Rule( "AQLITERAL" )
]);
},
EOF( token ){
if( token.point === token.eof || token.script.subscript ){
return true;
}
return false;
},
SCRIPT( token ){
return token.And( [ token.Rule( "SYNTAX" ), token.Rule( "EOF" ) ] );
}
};
exports.parserRules = {
"&OR"( token ){
let rules = token.ruleSyntax;
let resetPoint = token.point;
let tokenPoints = {};
for( let i = 0; i < rules.length; i++ ){
token.point = resetPoint;
tokenPoints[i] = {
result : rules[i]( token ),
point : token.point
};
token._tokenTrees.push( [] );
token._currentTokenTree = token._tokenTrees[token._tokenTrees.length - 1];
}
let topWeightIndex = -1;
let topWeight = -1;
for( let i = 0; i < token._tokenTrees.length - 1; i++ ){
if( tokenPoints[i].result === true ){
let weight = 0;
token._tokenTrees[i].map( x => weight += x.weight );
if( weight > topWeight ){
topWeight = weight;
topWeightIndex = i;
}
}
}
if( topWeightIndex !== -1 ){
token.point = tokenPoints[topWeightIndex].point;
token._tokenTrees[0] = token._tokenTrees[topWeightIndex];
return true;
}
else{
//This can be optimized @LHF
for( let i = 0; i < token._tokenTrees.length - 1; i++ ){
for( let t = 0; t < token._tokenTrees[i].length; t++ ){
for( let line in token._tokenTrees[i][t].expected ){
for( let char in token._tokenTrees[i][t].expected[line] ){
for( let x = 0; x < token._tokenTrees[i][t].expected[line][char].length; x++ ){
token.AddExpected( token._tokenTrees[i][t].expected[line][char][x], line, char );
}
}
}
}
}
token._tokenTrees[0] = [];
return false;
}
},
"&AND"( token ){
let rules = token.ruleSyntax;
let resetPoint = token.point;
for( let i = 0; i < rules.length; i++ ){
if( !rules[i]( token ) ){
token.point = resetPoint;
return false;
}
}
return true;
},
"&LITERAL"( token ){
let scriptLit = token.GetLitString( token.ruleSyntax.length );
if( token.ruleSyntax === scriptLit ){
token.SetValue( token.ruleSyntax );
return true;
}
else{
token.AddExpected( token.ruleSyntax );
return false;
}
},
"&OPTIONAL"( token ){
return token.Or( [
token.ruleSyntax,
token.Rule( "BLANK" )
]);
},
"&GROUP"( token ){
return token.ruleSyntax( token );
},
"&RANGE"( token ){
if( token.ruleSyntax[0] === token.ruleSyntax[1] ){
return token.TryChar( token.ruleSyntax[0] );
}
else{
return token.TryCharRange( token.ruleSyntax[0], token.ruleSyntax[1] );
}
},
"&NOT"( token ){
return !token.ruleSyntax( token );
},
"&REPEAT"( token ){
while( token.ruleSyntax.rule( token ) ){
}
//Somehow the group def needs to be preservered to know the syntax that was expected.
let expected = token._currentTokenTree[0].name;
token._currentTokenTree.splice( token._currentTokenTree.length - 1, 1 );
let result = token._currentTokenTree.length >= token.ruleSyntax.range[0] && ( token.ruleSyntax.range[1] === -1 || token._currentTokenTree.length <= token.ruleSyntax.range[1] );
if( !result ){
token.AddExpected( "Repeating " + expected + " " + token.ruleSyntax.range[0] + "-" + ( token.ruleSyntax.range[1] === -1 ? "any" : token.ruleSyntax.range[1] ) );
}
return result;
},
"&SCRIPT"( token ){
let script = token.script.SubScript( token.ruleSyntax );
script.subscript = true;
script.rootToken.point = token.point;
let result = script.rootToken.Evaluate();
token._currentTokenTree.push( script.rootToken );
script.rootToken.parent = token;
if( result ){
token.point = script.rootToken.endPoint;
return true;
}
token.AddExpected( "Language " + token.ruleSyntax );
return false;
}
};