buffer-apg-js
Version:
JavaScript APG, an ABNF Parser Generator
484 lines (479 loc) • 18.6 kB
JavaScript
/* eslint-disable guard-for-in */
/* eslint-disable no-restricted-syntax */
/* eslint-disable new-cap */
/* *************************************************************************************
* copyright: Copyright (c) 2021 Lowell D. Thomas, all rights reserved
* license: BSD-2-Clause (https://opensource.org/licenses/BSD-2-Clause)
* ********************************************************************************* */
// This is the `apg-exp` object constructor.
// `apg-exp` functions similarly to the built-in JavaScript `RegExp` pattern matching engine.
// However, patterns are described with an [SABNF]()
// syntax and matching is done with an
// [`apg`](https://github.com/ldthomas/apg-js2) parser.
//
// See the user's guide at `./dist/guide/index.html` for complete usage details.
module.exports = function apgExp(input, flags, nodeHits, treeDepth) {
'use strict;';
const apglib = require('../apg-lib/node-exports');
const execFuncs = require('./exec');
const replaceFuncs = require('./replace');
const resultFuncs = require('./result');
const splitFuncs = require('./split');
const setFlags = require('./flags');
const sabnfGenerator = require('./sabnf-generator');
const thisThis = this;
const thisFileName = 'apg-exp: ';
let errorName = thisFileName;
const readonly = {
writable: false,
enumerable: false,
configurable: true,
};
/* private object data that needs to be passed around to supporting modules */
const priv = {
thisThis: this,
grammarObject: null,
ruleNames: [],
str: null,
chars: null,
parser: null,
result: null,
charsToString: null,
match(state) {
return state === apglib.ids.MATCH || state === apglib.ids.EMPTY;
},
};
// This is a custom exception object.
// Derived from Error, it is named `ApgExpError` and in addition to the error `message`
// it has two functions, `toText()` and `toHtml()` which will display the errors
// in a user-friendly ASCII text format or HTML format like the formats used by APG.
// e. g.
// ```
// try{
// ...
// }catch(e){
// if(e.name === "ApgExpError"){
// console.log(e.toText());
// }else{
// console.log(e.message);
// }
// ```
// All errors from the constructor and all object functions are reported by throwing an `ApgExpError` Error object.
const ApgExpError = function ApgExpError(msg, t, h) {
this.message = msg;
this.name = 'ApgExpError';
const text = t;
const html = h;
this.toText = function toText() {
let ret = '';
ret += this.message;
ret += '\n';
if (text) {
ret += text;
}
return ret;
};
this.toHtml = function toHtml() {
let ret = '';
ret += `<h3>${apglib.utils.stringToAsciiHtml(this.message)}</h3>`;
ret += '\n';
if (html) {
ret += html;
}
return ret;
};
};
ApgExpError.prototype = new Error();
/* verifies that all UDT callback functions have been defined */
const checkParserUdts = function checkParserUdts() {
const udterrors = [];
let error = null;
for (let i = 0; i < priv.grammarObject.udts.length; i += 1) {
const { lower } = priv.grammarObject.udts[i];
if (typeof priv.parser.callbacks[lower] !== 'function') {
udterrors.push(priv.ruleNames[lower]);
}
}
if (udterrors.length > 0) {
error = `undefined UDT callback functions: ${udterrors}`;
}
return error;
};
/* the constructor */
errorName = `${thisFileName}constructor: `;
let error = null;
let result = null;
try {
const TRUE = true;
while (TRUE) {
/* flags */
error = setFlags(this, flags);
if (error) {
error = new ApgExpError(error);
break;
}
/* grammar object for the defining SABNF grammar */
if (typeof input === 'string') {
this.source = input;
result = sabnfGenerator(input);
if (result.error) {
error = new ApgExpError(result.error, result.text, result.html);
break;
}
priv.grammarObject = result.obj;
} else if (
typeof input === 'object' &&
typeof input.grammarObject === 'string' &&
input.grammarObject === 'grammarObject'
) {
priv.grammarObject = input;
this.source = priv.grammarObject.toString();
} else {
error = new ApgExpError(`${thisFileName}invalid SABNF grammar input`);
this.source = '';
break;
}
Object.defineProperty(this, 'source', readonly);
/* the parser & AST */
priv.charsToString = apglib.utils.charsToString;
priv.parser = new apglib.parser();
this.ast = new apglib.ast();
this.trace = this.debug ? new apglib.trace() : null;
for (let i = 0; i < priv.grammarObject.rules.length; i += 1) {
const rule = priv.grammarObject.rules[i];
priv.ruleNames[rule.lower] = rule.name;
priv.parser.callbacks[rule.lower] = false;
this.ast.callbacks[rule.lower] = true;
}
for (let i = 0; i < priv.grammarObject.udts.length; i += 1) {
const rule = priv.grammarObject.udts[i];
priv.ruleNames[rule.lower] = rule.name;
priv.parser.callbacks[rule.lower] = false;
this.ast.callbacks[rule.lower] = true;
}
/* nodeHit and treeDepth limits */
if (typeof nodeHits === 'number') {
this.nodeHits = Math.floor(nodeHits);
if (this.nodeHits > 0) {
priv.parser.setMaxNodeHits(this.nodeHits);
} else {
error = new ApgExpError(`${thisFileName}nodeHits must be integer > 0: ${nodeHits}`);
this.nodeHits = Infinity;
break;
}
} else {
this.nodeHits = Infinity;
}
if (typeof treeDepth === 'number') {
this.treeDepth = Math.floor(treeDepth);
if (this.treeDepth > 0) {
priv.parser.setMaxTreeDepth(this.treeDepth);
} else {
error = new ApgExpError(`${thisFileName}treeDepth must be integer > 0: ${treeDepth}`);
this.treeDepth = Infinity;
break;
}
} else {
this.treeDepth = Infinity;
}
Object.defineProperty(this, 'nodeHits', readonly);
Object.defineProperty(this, 'treeDepth', readonly);
/* success */
this.lastIndex = 0;
break;
}
} catch (e) {
error = new ApgExpError(`${e.name}: ${e.message}`);
}
if (error) {
throw error;
}
// Find the SABNF-defined pattern in the input string.
// Can be called multiple times with the `g` or `y` flags.
/* public API */
this.exec = function exec(str) {
let execResult = null;
errorName = `${thisFileName}exec(): `;
if (typeof str === 'string') {
priv.str = str;
priv.chars = apglib.utils.stringToChars(str);
} else if (Array.isArray(str)) {
priv.str = null;
priv.chars = str;
} else {
return execResult;
}
priv.parser.ast = this.ast;
priv.parser.trace = this.trace;
const execError = checkParserUdts(errorName);
if (execError) {
throw new ApgExpError(errorName + execError);
}
if (this.sticky) {
execResult = execFuncs.execAnchor(priv);
} else {
execResult = execFuncs.execForward(priv);
}
return execResult;
};
// Test for a match of the SABNF-defined pattern in the input string.
// Can be called multiple times with the `g` or `y` flags.
this.test = function test(str) {
let testResult = null;
errorName = `${thisFileName}test(): `;
if (typeof str === 'string') {
priv.str = str;
priv.chars = apglib.utils.stringToChars(str);
} else if (Array.isArray(str)) {
priv.str = null;
priv.chars = str;
} else {
return testResult;
}
priv.parser.ast = null;
priv.parser.trace = null;
this.ast = null;
this.trace = null;
const testError = checkParserUdts(errorName);
if (testError) {
throw new ApgExpError(errorName + testError);
}
if (this.sticky) {
testResult = execFuncs.testAnchor(priv);
} else {
testResult = execFuncs.testForward(priv);
}
return testResult;
};
// This is roughly equivalent to the JavaScript string replacement function, `str.replace(regex, replacement)`.
// (It follows closely the
// [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) description.)
// If the global flag `g` is set, all matched phrases will be replaced,
// otherwise, only the first.
// If the sticky flag `y` is set, all matched 'consecutive' phrases will be replaced,
// otherwise, only the first.
// If the unicode flag `u` is set, an exception will be thrown. `replace()` only works on strings, not character code arrays.
// The `replacement` string may contain the patterns patterns.
// <pre>
// <code>
// $$ - insert the character $
// the escape sequence for the $ character
// $` - insert the prefix to the matched pattern
// $& - insert the matched pattern
// $' - insert the suffix of the matched pattern
// ${name} - insert the last match to the rule "name"
// </code>
// </pre>
// `replacement` may also be a user-written function of the form
// <pre>
// <code>
// let replacement = function(result, exp){}
// result - the result object from the pattern match
// exp - the apg-exp object
// </code>
// </pre>
// There is quite a bit of redundancy here with both the `result` object and the `apg-exp` object being passed to the
// replacement function. However, this provides the user with a great deal of flexibility in what might be the
// most convenient way to create the replacement. Also, the `apg-exp` object has the AST which is a powerful
// translation tool for really tough replacement jobs.
this.replace = function replace(str, replacement) {
errorName = `${thisFileName}replace(): `;
if (this.unicode) {
throw new ApgExpError(
`${errorName}cannot do string replacement in 'unicode' mode. Insure that 'u' flag is absent.`
);
}
if (typeof str !== 'string') {
throw new ApgExpError(`${errorName}input type error: str not a string`);
}
if (typeof replacement === 'string') {
return replaceFuncs.replaceString(priv, str, replacement);
}
if (typeof replacement === 'function') {
return replaceFuncs.replaceFunction(priv, str, replacement);
}
throw new ApgExpError(`${errorName}input type error: replacement not a string or function`);
};
// Mimics the JavaScript `String.split(regexp)` function. That is,
// `split(str[, limit])` is roughly equivalent to `str.split(regexp[, limit])`
// Returns an array of strings.
// If `str` is undefined or empty the returned array
// contains a single, empty string.
// Otherwise, `exp.exec(str)` is called in global mode. If a one or more matched phrases are found, they are removed from the
// string
// and the substrings are returned in an array.
// If no matched phrases are found, the array contains one element consisting of the entire string, `["str"]`.
// Empty string matches will split the string and advance `lastIndex` by one character (bump-along mode).
// That means, for example, the grammar `rule=""\n` would match the empty string at every character
// and an array of all characters would be returned. It would be similar to calling the JavaScript function `str.split("")`.
// Unlike the JavaScript function, capturing parentheses (rules) are not spliced into the output string.
// An exception is thrown if the unicode flag is set. `split()` works only on strings, not integer arrays of character codes.
// If the `limit` argument is used, it must be a positive number and no more than `limit` matches will be returned.
this.split = function split(str, limit) {
errorName = `${thisFileName}split(): `;
if (this.unicode) {
throw new ApgExpError(`${errorName}cannot do string split in 'unicode' mode. Insure that 'u' flag is absent.`);
}
if (str === undefined || str === null || str === '') {
return [''];
}
if (typeof str !== 'string') {
throw new ApgExpError(`${errorName}argument must be a string: typeof(arg): ${typeof str}`);
}
if (typeof limit !== 'number') {
// eslint-disable-next-line no-param-reassign
limit = Infinity;
} else {
// eslint-disable-next-line no-param-reassign
limit = Math.floor(limit);
if (limit <= 0) {
throw new ApgExpError(`${errorName}limit must be >= 0: limit: ${limit}`);
}
}
return splitFuncs.split(priv, str, limit);
};
// Select specific rule/UDT names to include in the result object.
// `list` is an array of rule/UDT names to include.
// All other names, not in the array, are excluded.
// Excluding a rule/UDT name does not affect the operation of any functions,
// it simply excludes its phrases from the results.
this.include = function include(list) {
errorName = `${thisFileName}include(): `;
if (list === undefined || list == null || (typeof list === 'string' && list.toLowerCase() === 'all')) {
/* set all to true */
for (const name in priv.grammarObject.callbacks) {
thisThis.ast.callbacks[name] = true;
}
return;
}
if (Array.isArray(list)) {
/* set all to false */
for (const name in priv.grammarObject.callbacks) {
thisThis.ast.callbacks[name] = false;
}
/* then set those in the list to true */
for (let i = 0; i < list.length; i += 1) {
let l = list[i];
if (typeof l !== 'string') {
throw new ApgExpError(`${errorName}invalid name type in list`);
}
l = l.toLowerCase();
if (thisThis.ast.callbacks[l] === undefined) {
throw new ApgExpError(`${errorName}unrecognized name in list: ${list[i]}`);
}
thisThis.ast.callbacks[l] = true;
}
return;
}
throw new ApgExpError(`${errorName}unrecognized list type`);
};
// Select specific rule/UDT names to exclude in the result object.
// `list` is an array of rule/UDT names to exclude.
// All other names, not in the array, are included.
// Excluding a rule/UDT name does not affect the operation of any functions,
// it simply excludes its phrases from the results.
this.exclude = function exclude(list) {
errorName = `${thisFileName}exclude(): `;
if (list === undefined || list == null || (typeof list === 'string' && list.toLowerCase() === 'all')) {
/* set all to false */
for (const name in priv.grammarObject.callbacks) {
thisThis.ast.callbacks[name] = false;
}
return;
}
if (Array.isArray(list)) {
/* set all to true */
for (const name in priv.grammarObject.callbacks) {
thisThis.ast.callbacks[name] = true;
}
/* then set all in list to false */
for (let i = 0; i < list.length; i += 1) {
let l = list[i];
if (typeof l !== 'string') {
throw new ApgExpError(`${errorName}invalid name type in list`);
}
l = l.toLowerCase();
if (thisThis.ast.callbacks[l] === undefined) {
throw new ApgExpError(`${errorName}unrecognized name in list: ${list[i]}`);
}
thisThis.ast.callbacks[l] = false;
}
return;
}
throw new ApgExpError(`${errorName}unrecognized list type`);
};
// Defines a UDT callback function. *All* UDTs appearing in the SABNF phrase syntax must be defined here.
// <pre><code>
// name - the (case-insensitive) name of the UDT
// func - the UDT callback function
// </code></pre>
this.defineUdt = function defineUdt(name, func) {
errorName = `${thisFileName}defineUdt(): `;
if (typeof name !== 'string') {
throw new ApgExpError(`${errorName}'name' must be a string`);
}
if (typeof func !== 'function') {
throw new ApgExpError(`${errorName}'func' must be a function reference`);
}
const lowerName = name.toLowerCase();
for (let i = 0; i < priv.grammarObject.udts.length; i += 1) {
if (priv.grammarObject.udts[i].lower === lowerName) {
priv.parser.callbacks[lowerName] = func;
return;
}
}
throw new ApgExpError(`${errorName}'name' not a UDT name: ${name}`);
};
// Estimates the upper bound of the call stack depth for this JavaScript
// engine. Taken from [here](http://www.2ality.com/2014/04/call-stack-size.html)
this.maxCallStackDepth = function maxCallStackDepth() {
try {
return 1 + this.maxCallStackDepth();
} catch (e) {
return 1;
}
};
// Returns the "last match" information in the `apg-exp` object in ASCII text.
// Patterned after and similar to the JavaScript
// [`RegExp` properties](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp).
this.toText = function toText() {
if (this.unicode) {
/* mode:
- "ascii", (default) display characters arrays as ASCII text
- "decimal", display character arrays as decimal integers
- "hexadecimal", display character arrays as hexadecimal (\xHH) integers
- "unicode", display character arrays as unicode (\uHH) integers
return resultFuncs.u.expToText(this, mode);
*/
}
return resultFuncs.s.expToText(this);
};
// Returns the "last match" information in the `apg-exp` object formatted as an HTML table.
this.toHtml = function toHtml(mode) {
if (this.unicode) {
/* *see mode definitions above */
return resultFuncs.u.expToHtml(this, mode);
}
return resultFuncs.s.expToHtml(this);
};
// Same as `toHtml()` except the output is a complete HTML page.
this.toHtmlPage = function toHtmlPage(mode) {
if (this.unicode) {
/* *see mode definitions above */
return resultFuncs.u.expToHtmlPage(this, mode);
}
return resultFuncs.s.expToHtmlPage(this);
};
/* Returns the SABNF syntax or grammar defining the pattern in ASCII text format. */
this.sourceToText = function sourceToText() {
return resultFuncs.s.sourceToText(this);
};
/* Returns the SABNF syntax or grammar defining the pattern in HTML format. */
this.sourceToHtml = function sourceToHtml() {
return resultFuncs.s.sourceToHtml(this);
};
/* Returns the SABNF syntax or grammar defining the pattern as a complete HTML page. */
this.sourceToHtmlPage = function sourceToHtmlPage() {
return resultFuncs.s.sourceToHtmlPage(this);
};
};