buffer-apg-js
Version:
JavaScript APG, an ABNF Parser Generator
497 lines (487 loc) • 18.7 kB
JavaScript
/* *************************************************************************************
* copyright: Copyright (c) 2021 Lowell D. Thomas, all rights reserved
* license: BSD-2-Clause (https://opensource.org/licenses/BSD-2-Clause)
* ********************************************************************************* */
// This module is Application Programming Interface (API) for **APG** - the ABNF Parser Generator.
//
// *Note on teminology.*
// APG is a parser generator.
// However, it really only generates a "grammar object" (see below) from the defining SABNF grammar.
// The generated parser is incomplete at this stage.
// Remaining, it is the job of the user to develop the generated parser from the grammar object and the **APG** Library (**apg-lib**).
//
// The following terminology my help clear up any confusion between the idea of a "generated parser" versus a "generated grammar object".
// - The generating parser: **APG** is an **APG** parser (yes, there is a circular dependence between **apg-api** and **apg-lib**). We'll call it the generating parser.
// - The target parser: **APG**'s goal is to generate a parser. We'll call it the target parser.
// - The target grammar: this is the (ASCII) SABNF grammar defining the target parser.
// - The target grammar object: **APG** parses the SABNF grammar and generates the JavaScript source for a target grammar object constructor function
// and/or an actual grammar object.
// - The final target parser: The user then develops the final target parser using the generated target grammar
// object and the **APG** parsing library, **apg-lib**.
// Throws execeptions on fatal errors.
//
// src: the input SABNF grammar<br>
// may be one of:
// - Buffer of bytes
// - JavaScript string
// - Array of integer character codes
module.exports = function api(src) {
const thisFileName = 'api.js: ';
const thisObject = this;
const Buffer = require('safer-buffer').Buffer;
/* PRIVATE PROPERTIES */
const apglib = require('../apg-lib/node-exports');
const converter = require('../apg-conv-api/converter');
const scanner = require('./scanner');
const parser = new (require('./parser'))();
const { attributes, showAttributes, showAttributeErrors, showRuleDependencies } = require('./attributes');
const showRules = require('./show-rules');
/* PRIVATE MEMBERS (FUNCTIONS) */
/* Convert a phrase (array of character codes) to HTML. */
const abnfToHtml = function abnfToHtml(chars, beg, len) {
const NORMAL = 0;
const CONTROL = 1;
const INVALID = 2;
const CONTROL_BEG = `<span class="${apglib.style.CLASS_CTRLCHAR}">`;
const CONTROL_END = '</span>';
const INVALID_BEG = `<span class="${apglib.style.CLASS_NOMATCH}">`;
const INVALID_END = '</span>';
let end;
let html = '';
const TRUE = true;
while (TRUE) {
if (!Array.isArray(chars) || chars.length === 0) {
break;
}
if (typeof beg !== 'number') {
throw new Error('abnfToHtml: beg must be type number');
}
if (beg >= chars.length) {
break;
}
if (typeof len !== 'number' || beg + len >= chars.length) {
end = chars.length;
} else {
end = beg + len;
}
let state = NORMAL;
for (let i = beg; i < end; i += 1) {
const ch = chars[i];
if (ch >= 32 && ch <= 126) {
/* normal - printable ASCII characters */
if (state === CONTROL) {
html += CONTROL_END;
state = NORMAL;
} else if (state === INVALID) {
html += INVALID_END;
state = NORMAL;
}
/* handle reserved HTML entity characters */
switch (ch) {
case 32:
html += ' ';
break;
case 60:
html += '<';
break;
case 62:
html += '>';
break;
case 38:
html += '&';
break;
case 34:
html += '"';
break;
case 39:
html += ''';
break;
case 92:
html += '\';
break;
default:
html += String.fromCharCode(ch);
break;
}
} else if (ch === 9 || ch === 10 || ch === 13) {
/* control characters */
if (state === NORMAL) {
html += CONTROL_BEG;
state = CONTROL;
} else if (state === INVALID) {
html += INVALID_END + CONTROL_BEG;
state = CONTROL;
}
if (ch === 9) {
html += 'TAB';
}
if (ch === 10) {
html += 'LF';
}
if (ch === 13) {
html += 'CR';
}
} else {
/* invalid characters */
if (state === NORMAL) {
html += INVALID_BEG;
state = INVALID;
} else if (state === CONTROL) {
html += CONTROL_END + INVALID_BEG;
state = INVALID;
}
/* display character as hexadecimal value */
html += `\\x${apglib.utils.charToHex(ch)}`;
}
}
if (state === INVALID) {
html += INVALID_END;
}
if (state === CONTROL) {
html += CONTROL_END;
}
break;
}
return html;
};
/* Convert a phrase (array of character codes) to ASCII text. */
const abnfToAscii = function abnfToAscii(chars, beg, len) {
let str = '';
for (let i = beg; i < beg + len; i += 1) {
const ch = chars[i];
if (ch >= 32 && ch <= 126) {
str += String.fromCharCode(ch);
} else {
switch (ch) {
case 9:
str += '\\t';
break;
case 10:
str += '\\n';
break;
case 13:
str += '\\r';
break;
default:
str += '\\unknown';
break;
}
}
}
return str;
};
/* translate lines (SABNF grammar) to ASCII text */
const linesToAscii = function linesToAscii(lines) {
let str = 'Annotated Input Grammar';
lines.forEach((val) => {
str += '\n';
str += `line no: ${val.lineNo}`;
str += ` : char index: ${val.beginChar}`;
str += ` : length: ${val.length}`;
str += ` : abnf: ${abnfToAscii(thisObject.chars, val.beginChar, val.length)}`;
});
str += '\n';
return str;
};
/* translate lines (SABNF grammar) to HTML */
const linesToHtml = function linesToHtml(lines) {
let html = '';
html += `<table class="${apglib.style.CLASS_GRAMMAR}">\n`;
const title = 'Annotated Input Grammar';
html += `<caption>${title}</caption>\n`;
html += '<tr>';
html += '<th>line<br>no.</th><th>first<br>char</th><th><br>length</th><th><br>text</th>';
html += '</tr>\n';
lines.forEach((val) => {
html += '<tr>';
html += `<td>${val.lineNo}`;
html += `</td><td>${val.beginChar}`;
html += `</td><td>${val.length}`;
html += `</td><td>${abnfToHtml(thisObject.chars, val.beginChar, val.length)}`;
html += '</td>';
html += '</tr>\n';
});
html += '</table>\n';
return html;
};
/* Format the error messages to HTML, for page display. */
const errorsToHtml = function errorsToHtml(errors, lines, chars, title) {
const [style] = apglib;
let html = '';
const errorArrow = `<span class="${style.CLASS_NOMATCH}">»</span>`;
html += `<p><table class="${style.CLASS_GRAMMAR}">\n`;
if (title && typeof title === 'string') {
html += `<caption>${title}</caption>\n`;
}
html += '<tr><th>line<br>no.</th><th>line<br>offset</th><th>error<br>offset</th><th><br>text</th></tr>\n';
errors.forEach((val) => {
let line;
let relchar;
let beg;
let end;
let text;
let prefix = '';
let suffix = '';
if (lines.length === 0) {
text = errorArrow;
relchar = 0;
} else {
line = lines[val.line];
beg = line.beginChar;
if (val.char > beg) {
prefix = abnfToHtml(chars, beg, val.char - beg);
}
beg = val.char;
end = line.beginChar + line.length;
if (beg < end) {
suffix = abnfToHtml(chars, beg, end - beg);
}
text = prefix + errorArrow + suffix;
relchar = val.char - line.beginChar;
html += '<tr>';
html += `<td>${val.line}</td><td>${line.beginChar}</td><td>${relchar}</td><td>${text}</td>`;
html += '</tr>\n';
html += '<tr>';
html += `<td colspan="3"></td><td>↑: ${apglib.utils.stringToAsciiHtml(val.msg)}</td>`;
html += '</tr>\n';
}
});
html += '</table></p>\n';
return html;
};
/* Display an array of errors in ASCII text */
const errorsToAscii = function errorsToAscii(errors, lines, chars) {
let str;
let line;
let beg;
let len;
str = '';
errors.forEach((error) => {
line = lines[error.line];
str += `${line.lineNo}: `;
str += `${line.beginChar}: `;
str += `${error.char - line.beginChar}: `;
beg = line.beginChar;
len = error.char - line.beginChar;
str += abnfToAscii(chars, beg, len);
str += ' >> ';
beg = error.char;
len = line.beginChar + line.length - error.char;
str += abnfToAscii(chars, beg, len);
str += '\n';
str += `${line.lineNo}: `;
str += `${line.beginChar}: `;
str += `${error.char - line.beginChar}: `;
str += 'error: ';
str += error.msg;
str += '\n';
});
return str;
};
let isScanned = false;
let isParsed = false;
let isTranslated = false;
let haveAttributes = false;
let attributeErrors = 0;
let lineMap;
/* PUBLIC PROPERTIES */
// The input SABNF grammar as a JavaScript string.
// this.sabnf;
// The input SABNF grammar as an array of character codes.
// this.chars;
// An array of line objects, defining each line of the input SABNF grammar
// - lineNo : the zero-based line number
// - beginChar : offset (into `this.chars`) of the first character in the line
// - length : the number of characters in the line
// - textLength : the number of characters of text in the line, excluding the line ending characters
// - endType : "CRLF", "LF", "CR" or "none" if the last line has no line ending characters
// - invalidChars : `true` if the line contains invalid characters, `false` otherwise
// this.lines;
// An array of rule names and data.
// - name : the rule name
// - lower : the rule name in lower case
// - index : the index of the rule (ordered by appearance in SABNF grammar)
// - isBkr : `true` if this rule has been back referenced, `false` otherwise
// - opcodes : array of opcodes for this rule
// - attrs : the rule attributes
// - ctrl : system data
// this.rules;
// An array of UDT names and data.
// this.udts;
// An array of errors, if any.
// - line : the line number containing the error
// - char : the character offset of the error
// - msg : the error message
this.errors = [];
/* CONSTRUCTOR */
if (Buffer.isBuffer(src)) {
this.chars = converter.decode('BINARY', src);
} else if (Array.isArray(src)) {
this.chars = src.slice();
} else if (typeof src === 'string') {
this.chars = converter.decode('STRING', src);
} else {
throw new Error(`${thisFileName}input source is not a string, byte Buffer or character array`);
}
this.sabnf = converter.encode('STRING', this.chars);
/* PUBLIC MEMBERS (FUNCTIONS) */
// Scan the input SABNF grammar for invalid characters and catalog the lines via `this.lines`.
// - strict : (optional) if `true`, all lines, including the last must end with CRLF (\r\n),
// if `false` (in any JavaScript sense) then line endings may be any mix of CRLF, LF, CR, or end-of-file.
// - trace (*) : (optional) a parser trace object, which will trace the parser that does the scan
this.scan = function scan(strict, trace) {
this.lines = scanner(this.chars, this.errors, strict, trace);
isScanned = true;
};
// Parse the input SABNF grammar for correct syntax.
// - strict : (optional) if `true`, the input grammar must be strict ABNF, conforming to [RFC 5234](https://tools.ietf.org/html/rfc5234)
// and [RFC 7405](https://tools.ietf.org/html/rfc7405). No superset features allowed.
// - trace (\*) : (optional) a parser trace object, which will trace the syntax parser
//
// <i>(*)NOTE: the trace option was used primarily during development.
// Error detection and reporting is now fairly robust and tracing should be unnecessary. Use at your own peril.</i>
this.parse = function parse(strict, trace) {
if (!isScanned) {
throw new Error(`${thisFileName}grammar not scanned`);
}
parser.syntax(this.chars, this.lines, this.errors, strict, trace);
isParsed = true;
};
// Translate the SABNF grammar syntax into the opcodes that will guide the parser for this grammar.
this.translate = function translate() {
if (!isParsed) {
throw new Error(`${thisFileName}grammar not scanned and parsed`);
}
const ret = parser.semantic(this.chars, this.lines, this.errors);
if (this.errors.length === 0) {
this.rules = ret.rules;
this.udts = ret.udts;
lineMap = ret.lineMap;
isTranslated = true;
}
};
// Compute the attributes of each rule.
this.attributes = function attrs() {
if (!isTranslated) {
throw new Error(`${thisFileName}grammar not scanned, parsed and translated`);
}
attributeErrors = attributes(this.rules, this.udts, lineMap, this.errors);
haveAttributes = true;
return attributeErrors;
};
// This function will perform the full suite of steps required to generate a parser grammar object
// from the input SABNF grammar.
this.generate = function generate(strict) {
this.lines = scanner(this.chars, this.errors, strict);
if (this.errors.length) {
return;
}
parser.syntax(this.chars, this.lines, this.errors, strict);
if (this.errors.length) {
return;
}
const ret = parser.semantic(this.chars, this.lines, this.errors);
if (this.errors.length) {
return;
}
this.rules = ret.rules;
this.udts = ret.udts;
lineMap = ret.lineMap;
attributeErrors = attributes(this.rules, this.udts, lineMap, this.errors);
haveAttributes = true;
};
// Display the rules.
// Must scan, parse and translate before calling this function, otherwise there are no rules to display.
// - order
// - "index" or "i", index order (default)
// - "alpha" or "a", alphabetical order
// - none of above, index order (default)
this.displayRules = function displayRules(order = 'index') {
if (!isTranslated) {
throw new Error(`${thisFileName}grammar not scanned, parsed and translated`);
}
return showRules(this.rules, this.udts, order);
};
// Display the rule dependencies.
// Must scan, parse, translate and compute attributes before calling this function.
// Otherwise the rule dependencies are not known.
// - order
// - "index" or "i", index order (default)
// - "alpha" or "a", alphabetical order
// - "type" or "t", ordered by type (alphabetical within each type/group)
// - none of above, index order (default)
this.displayRuleDependencies = function displayRuleDependencies(order = 'index') {
if (!haveAttributes) {
throw new Error(`${thisFileName}no attributes - must be preceeded by call to attributes()`);
}
return showRuleDependencies(order);
};
// Display the attributes.
// Must scan, parse, translate and compute attributes before calling this function.
// - order
// - "index" or "i", index order (default)
// - "alpha" or "a", alphabetical order
// - "type" or "t", ordered by type (alphabetical within each type/group)
// - none of above, type order (default)
this.displayAttributes = function displayAttributes(order = 'index') {
if (!haveAttributes) {
throw new Error(`${thisFileName}no attributes - must be preceeded by call to attributes()`);
}
if (attributeErrors) {
showAttributeErrors(order);
}
return showAttributes(order);
};
this.displayAttributeErrors = function displayAttributeErrors() {
if (!haveAttributes) {
throw new Error(`${thisFileName}no attributes - must be preceeded by call to attributes()`);
}
return showAttributeErrors();
};
// Returns a parser grammar object constructor function as a JavaScript string.
// This object can then be used to construct a parser.
this.toSource = function toSource(name) {
if (!haveAttributes) {
throw new Error(`${thisFileName}can't generate parser source - must be preceeded by call to attributes()`);
}
if (attributeErrors) {
throw new Error(`${thisFileName}can't generate parser source - attributes have ${attributeErrors} errors`);
}
return parser.generateSource(this.chars, this.lines, this.rules, this.udts, name);
};
// Returns a parser grammar object.
// This grammar object may be used by the application to construct a parser.
this.toObject = function toObject() {
if (!haveAttributes) {
throw new Error(`${thisFileName}can't generate parser source - must be preceeded by call to attributes()`);
}
if (attributeErrors) {
throw new Error(`${thisFileName}can't generate parser source - attributes have ${attributeErrors} errors`);
}
return parser.generateObject(this.sabnf, this.rules, this.udts);
};
// Display errors in text format, suitable for `console.log()`.
this.errorsToAscii = function errorsToAsciiFunc() {
return errorsToAscii(this.errors, this.lines, this.chars);
};
// Display errors in HTML format, suitable for web page display.
// (`apg-lib.css` required for proper styling)
this.errorsToHtml = function errorsToHtmlFunc(title) {
return errorsToHtml(this.errors, this.lines, this.chars, title);
};
// Generate an annotated the SABNF grammar display in text format.
this.linesToAscii = function linesToAsciiFunc() {
return linesToAscii(this.lines);
};
// Generate an annotated the SABNF grammar display in HTML format.
// (`apg-lib.css` required for proper styling)
this.linesToHtml = function linesToHtmlFunc() {
return linesToHtml(this.lines);
};
// This function was only used by apg.html which has been abandoned.
/*
this.getAttributesObject = function () {
return null;
};
*/
};