apg-unicode
Version:
JavaScript APG parser of Unicode code point arrays
333 lines (303 loc) • 12 kB
JavaScript
// # Trace Object Documentation
// The `Trace` object follows the parser through the parse tree and generates a line of text for each node visit.
// It is primarily used for debugging failed parses—helping identify where and why a parse failed.
// Each trace line displays:
// - Tree depth
// - Direction of the visit (going up or down the tree)
// - Result of the node visit
// - Node type
// - Phrase the parser is matching or has matched
// ---
// ## Constructor Configuration
// The `Trace` constructor accepts an optional configuration object with the following properties:
// | Property | Type | Default | Description |
// |--------------|-----------|-----------|-------------|
// | `displayType`| `string` | `'hex'` | Controls how the input string is displayed. Options: `'hex'`, `'ascii'`, `'unicode'`. See [Display Type Details](#display-type-details). |
// | `maxChars` | `number` | `50` | Maximum number of characters to display per trace line. |
// | `maxLines` | `number` | `5000` | Maximum number of trace lines to display. |
// | `lastLines` | `boolean` | `true` | If `true`, displays only the last `maxLines` of trace output. If `false`, displays the first `maxLines`. |
// Note on `lastLines`: When a parse fails it is often the case that the parser stops at the point of failure.
// Therefore, it is usually the last few lines of the trace that will show the problem.
// Hence, the default is to show only the last of the kept lines.
// ---
// ## Display Type Details
// The parser processes strings as arrays of positive integers. These typically represent character codes,
// but their interpretation depends on the SABNF grammar and application needs.
// The `displayType` setting determines how these integers are rendered.
// The output also varies based on the input string array type.
// ### `hex`
// Follows the format of the Linux `hexdump` utility: hexadecimal representation followed by an ASCII or Unicode rendition.
// | Input Type | Format Description |
// |----------------|--------------------|
// | `Uint8Array` | Each character is shown as a 2-digit hex byte. Printable ASCII characters are rendered; others appear as `.`. |
// | `Uint16Array` | Each character is shown as a 4-digit hex word. Valid Unicode code points are rendered; surrogate pairs appear as `.`. |
// | `Uint32Array` | Each character is shown as an 8-digit hex double word. Valid Unicode characters are rendered. |
// | `Array` | Same as `Uint32Array`, but values > `0xFFFFFFFF` are capped at `0xFFFFFFFF`. |
// ### `ascii`
// All characters are interpreted as ASCII. Non-printing characters are displayed as `.`.
// ### `unicode`
// All characters that represent valid Unicode code points are rendered. Invalid code points are displayed as `.`.
import { utilities as utils } from '../src/utilities.js';
import { identifiers as id } from '../src/identifiers.js';
export { Trace };
class Trace {
#FILENAME = 'trace.js: ';
#OFFSET_CHAR = '.';
#OFFSET_SPACE = '|';
#STATE_ACTIVE = '|-|';
#STATE_MATCH = '|M|';
#STATE_NOMATCH = '|N|';
#STATE_EMPTY = '|E|';
#END_CHAR = '\u25c6';
#END_MORE = '\u2026';
#HEX = 101;
#ASCII = 102;
#UNICODE = 103;
#chars;
#charEnd;
#charType;
#rules;
#udts;
#treeDepth;
#lineCount;
#lineIndex;
#out = [];
#config = {
displayType: this.#HEX,
maxChars: 50,
maxLines: 5000,
lastLines: true,
};
// The class constructor takes an optional configuration object.
// |key|value|
// |---|--------|
// |conf.displayType| 'hex'(default), 'ascii' or 'unicode'|
// |conf.maxChars|The maximum number of characters to display on a single line(default=50).|
// |conf.maxLines|The maximum number of lines to display(default=5000).|
// |conf.lastLines|If `true`(default) display only the last `maxLines`, if `false` display the first `maxLines`.|
constructor(conf) {
if (conf != undefined) {
if (typeof conf.displayType === 'string') {
let lower = conf.displayType.toLowerCase();
if (lower.slice(0, 3) === 'hex') {
this.#config.displayType = this.#HEX;
} else if (lower === 'ascii') {
this.#config.displayType = this.#ASCII;
} else if (lower === 'unicode') {
this.#config.displayType = this.#UNICODE;
} /* else use default displayType value */
}
if (typeof conf.maxChars === 'number' && conf.maxChars >= 0) {
this.#config.maxChars = conf.maxChars;
} /* else use default maxChars value */
if (typeof conf.maxLines === 'number' && conf.maxLines >= 0) {
this.#config.maxLines = conf.maxLines;
} /* else use default maxLines value */
if (typeof conf.lastLines === 'boolean' && conf.lastLines === false) {
this.#config.lastLines = false;
} /* else use the default true */
} /* else use the entire default configuration */
}
// Called by the application upon parser completion.
// Returns the total number of lines in the trace.
lineCount = () => this.#lineCount;
// Called by the application upon parser completion.
// Returns the trace as a string.
displayTrace = () => {
if (this.#out.length === 0) {
/* protect against application calling this before any parsing is done */
return '';
}
let ret;
if (this.#config.lastLines) {
if (this.#lineIndex === this.#out.length) {
/* there is no wrap around */
ret = this.#out.join('');
} else {
/* The array has wrapped: newer lines have overwritten older ones at the start. */
/* To restore chronological order, extract the overwritten lines and display them before the current contents. */
let prefix = this.#out.slice(this.#lineIndex);
let suffix = this.#out.slice(0, this.#lineIndex);
prefix.push(...suffix);
ret = prefix.join('');
}
} else {
ret = this.#out.join('');
}
return ret;
};
// Called by the parser to initialize the Trace object.
init = (r, u, c, e, t) => {
this.#rules = r;
this.#udts = u;
this.#chars = c;
this.#charEnd = e;
this.#charType = t;
this.#lineCount = 0;
this.#lineIndex = 0;
this.#treeDepth = 0;
this.#out.length = 0;
};
// Called by the parser to generate a trace line for the
// downward passage of the parser through a node.
down = (op, offset) => {
if (this.#validateLineCount()) {
const lead = this.#indent(this.#treeDepth);
let phrase = this.#downPhrase(offset);
this.#treeDepth += 1;
this.#out[this.#lineIndex++] = `${lead}${this.#STATE_ACTIVE}[${this.#opName(op)}]${phrase}\n`;
}
};
// Called by the parser to generate a trace line for the
// upward passage of the parser through a node.
up = (op, state, offset, phraseLength) => {
if (this.#validateLineCount()) {
const thisFunc = `${this.#FILENAME}trace.up: `;
this.#treeDepth -= 1;
const lead = this.#indent(this.#treeDepth);
let phrase;
let st;
switch (state) {
case id.EMPTY:
st = this.#STATE_EMPTY;
phrase = `''`;
break;
case id.MATCH:
st = this.#STATE_MATCH;
phrase = this.#upPhrase(offset, phraseLength);
break;
case id.NOMATCH:
st = this.#STATE_NOMATCH;
phrase = '';
break;
default:
throw new Error(`${thisFunc} unrecognized state`);
}
this.#out[this.#lineIndex++] = `${lead}${st}[${this.#opName(op)}]${phrase}\n`;
}
};
// Indents a line to indicate the tree depth of the node.
#indent = (n) => {
let ret = [];
let count = 0;
if (n >= 0) {
while (n--) {
count += 1;
if (count === 5) {
ret.push(this.#OFFSET_SPACE);
count = 0;
} else {
ret.push(this.#OFFSET_CHAR);
}
}
}
return ret.join('');
};
// Generates a string identifying the node.
#opName = (op) => {
let name = id.idName(op.type);
let str;
switch (op.type) {
case id.REP:
if (op.max === Infinity) {
name += `(${op.min},inf)`;
} else {
name += `(${op.min},${op.max})`;
}
break;
case id.RNM:
name += `(${this.#rules[op.index].name})`;
break;
case id.TRG:
name += `(${op.min},${op.max})`;
break;
case id.TBS:
if (op.string.length > 6) {
str = `(${utils.arrayToAscii(op.string, 0, 3)}...)`;
} else {
str = `(${utils.arrayToAscii(op.string, 0, 6)})`;
}
name += utils.exposeCtrlChars(str);
break;
case id.TLS:
if (op.string.length > 6) {
str = `(${utils.arrayToAscii(op.string, 0, 3)}...)`;
} else {
str = `(${utils.arrayToAscii(op.string, 0, 6)})`;
}
name += utils.exposeCtrlChars(str);
break;
case id.UDT:
name += `(${this.#udts[op.index].name})`;
break;
}
return name;
};
// Generates the phrase display. The display format is a function
// of the `conf.displayType` and the string type (`Uint8Array`, etc.).
#typeToPhrase = (indexStart, length) => {
/* the suitablity of each charType has already been verified in #init() */
let str;
if (this.#config.displayType === this.#HEX) {
if (this.#charType === id.UINT8) {
return utils.hexLineBytes(this.#chars, indexStart, length);
}
if (this.#charType === id.UINT16) {
return utils.hexLineWords(this.#chars, indexStart, length);
}
if (this.#charType === id.UINT32) {
return utils.hexLineDWords(this.#chars, indexStart, length);
}
if (this.#charType === id.ARRAY) {
return utils.hexLineArray(this.#chars, indexStart, length);
}
/* should never get here */
throw new Error(``);
}
if (this.#config.displayType === this.#ASCII) {
return utils.arrayToAscii(this.#chars, indexStart, length);
}
if (this.#config.displayType === this.#UNICODE) {
return utils.arrayToUnicode(this.#chars, indexStart, length);
}
/* should never get here */
throw new Error(`${this.#FILENAME}typeToPhrase() bad type: ${this.#config.displayType}`);
};
// Generates the phrase to display for downward passage through a node.
#downPhrase(offset) {
const left = this.#charEnd - offset;
const len = Math.min(this.#config.maxChars, left);
if (len < left) {
return this.#typeToPhrase(offset, len) + this.#END_MORE;
}
return this.#typeToPhrase(offset, len) + this.#END_CHAR;
}
// Generates the phrase to display for upward passage through a node.
#upPhrase(offset, phraseLength) {
const len = Math.min(this.#config.maxChars, phraseLength);
if (len < phraseLength) {
return this.#typeToPhrase(offset, len) + this.#END_MORE;
}
if (offset + phraseLength >= this.#charEnd) {
return this.#typeToPhrase(offset, len) + this.#END_CHAR;
}
return this.#typeToPhrase(offset, len);
}
// This function examines the line count and determines from
// `maxLines` and `lastLines` whether to display the line
// and if displayed, which output array line slot to put it in.
#validateLineCount = () => {
this.#lineCount++;
let ret = true;
if (this.#config.lastLines === false) {
if (this.#lineCount > this.#config.maxLines) {
ret = false;
}
} else {
if (this.#lineIndex >= this.#config.maxLines) {
this.#lineIndex = 0;
}
}
return ret;
};
}