UNPKG

apg-unicode

Version:

JavaScript APG parser of Unicode code point arrays

333 lines (303 loc) 12 kB
// # Trace Object Documentation // The `Trace` object follows the parser through the parse tree and generates a line of text for each node visit. // It is primarily used for debugging failed parses—helping identify where and why a parse failed. // Each trace line displays: // - Tree depth // - Direction of the visit (going up or down the tree) // - Result of the node visit // - Node type // - Phrase the parser is matching or has matched // --- // ## Constructor Configuration // The `Trace` constructor accepts an optional configuration object with the following properties: // | Property | Type | Default | Description | // |--------------|-----------|-----------|-------------| // | `displayType`| `string` | `'hex'` | Controls how the input string is displayed. Options: `'hex'`, `'ascii'`, `'unicode'`. See [Display Type Details](#display-type-details). | // | `maxChars` | `number` | `50` | Maximum number of characters to display per trace line. | // | `maxLines` | `number` | `5000` | Maximum number of trace lines to display. | // | `lastLines` | `boolean` | `true` | If `true`, displays only the last `maxLines` of trace output. If `false`, displays the first `maxLines`. | // Note on `lastLines`: When a parse fails it is often the case that the parser stops at the point of failure. // Therefore, it is usually the last few lines of the trace that will show the problem. // Hence, the default is to show only the last of the kept lines. // --- // ## Display Type Details // The parser processes strings as arrays of positive integers. These typically represent character codes, // but their interpretation depends on the SABNF grammar and application needs. // The `displayType` setting determines how these integers are rendered. // The output also varies based on the input string array type. // ### `hex` // Follows the format of the Linux `hexdump` utility: hexadecimal representation followed by an ASCII or Unicode rendition. // | Input Type | Format Description | // |----------------|--------------------| // | `Uint8Array` | Each character is shown as a 2-digit hex byte. Printable ASCII characters are rendered; others appear as `.`. | // | `Uint16Array` | Each character is shown as a 4-digit hex word. Valid Unicode code points are rendered; surrogate pairs appear as `.`. | // | `Uint32Array` | Each character is shown as an 8-digit hex double word. Valid Unicode characters are rendered. | // | `Array` | Same as `Uint32Array`, but values > `0xFFFFFFFF` are capped at `0xFFFFFFFF`. | // ### `ascii` // All characters are interpreted as ASCII. Non-printing characters are displayed as `.`. // ### `unicode` // All characters that represent valid Unicode code points are rendered. Invalid code points are displayed as `.`. import { utilities as utils } from '../src/utilities.js'; import { identifiers as id } from '../src/identifiers.js'; export { Trace }; class Trace { #FILENAME = 'trace.js: '; #OFFSET_CHAR = '.'; #OFFSET_SPACE = '|'; #STATE_ACTIVE = '|-|'; #STATE_MATCH = '|M|'; #STATE_NOMATCH = '|N|'; #STATE_EMPTY = '|E|'; #END_CHAR = '\u25c6'; #END_MORE = '\u2026'; #HEX = 101; #ASCII = 102; #UNICODE = 103; #chars; #charEnd; #charType; #rules; #udts; #treeDepth; #lineCount; #lineIndex; #out = []; #config = { displayType: this.#HEX, maxChars: 50, maxLines: 5000, lastLines: true, }; // The class constructor takes an optional configuration object. // |key|value| // |---|--------| // |conf.displayType| 'hex'(default), 'ascii' or 'unicode'| // |conf.maxChars|The maximum number of characters to display on a single line(default=50).| // |conf.maxLines|The maximum number of lines to display(default=5000).| // |conf.lastLines|If `true`(default) display only the last `maxLines`, if `false` display the first `maxLines`.| constructor(conf) { if (conf != undefined) { if (typeof conf.displayType === 'string') { let lower = conf.displayType.toLowerCase(); if (lower.slice(0, 3) === 'hex') { this.#config.displayType = this.#HEX; } else if (lower === 'ascii') { this.#config.displayType = this.#ASCII; } else if (lower === 'unicode') { this.#config.displayType = this.#UNICODE; } /* else use default displayType value */ } if (typeof conf.maxChars === 'number' && conf.maxChars >= 0) { this.#config.maxChars = conf.maxChars; } /* else use default maxChars value */ if (typeof conf.maxLines === 'number' && conf.maxLines >= 0) { this.#config.maxLines = conf.maxLines; } /* else use default maxLines value */ if (typeof conf.lastLines === 'boolean' && conf.lastLines === false) { this.#config.lastLines = false; } /* else use the default true */ } /* else use the entire default configuration */ } // Called by the application upon parser completion. // Returns the total number of lines in the trace. lineCount = () => this.#lineCount; // Called by the application upon parser completion. // Returns the trace as a string. displayTrace = () => { if (this.#out.length === 0) { /* protect against application calling this before any parsing is done */ return ''; } let ret; if (this.#config.lastLines) { if (this.#lineIndex === this.#out.length) { /* there is no wrap around */ ret = this.#out.join(''); } else { /* The array has wrapped: newer lines have overwritten older ones at the start. */ /* To restore chronological order, extract the overwritten lines and display them before the current contents. */ let prefix = this.#out.slice(this.#lineIndex); let suffix = this.#out.slice(0, this.#lineIndex); prefix.push(...suffix); ret = prefix.join(''); } } else { ret = this.#out.join(''); } return ret; }; // Called by the parser to initialize the Trace object. init = (r, u, c, e, t) => { this.#rules = r; this.#udts = u; this.#chars = c; this.#charEnd = e; this.#charType = t; this.#lineCount = 0; this.#lineIndex = 0; this.#treeDepth = 0; this.#out.length = 0; }; // Called by the parser to generate a trace line for the // downward passage of the parser through a node. down = (op, offset) => { if (this.#validateLineCount()) { const lead = this.#indent(this.#treeDepth); let phrase = this.#downPhrase(offset); this.#treeDepth += 1; this.#out[this.#lineIndex++] = `${lead}${this.#STATE_ACTIVE}[${this.#opName(op)}]${phrase}\n`; } }; // Called by the parser to generate a trace line for the // upward passage of the parser through a node. up = (op, state, offset, phraseLength) => { if (this.#validateLineCount()) { const thisFunc = `${this.#FILENAME}trace.up: `; this.#treeDepth -= 1; const lead = this.#indent(this.#treeDepth); let phrase; let st; switch (state) { case id.EMPTY: st = this.#STATE_EMPTY; phrase = `''`; break; case id.MATCH: st = this.#STATE_MATCH; phrase = this.#upPhrase(offset, phraseLength); break; case id.NOMATCH: st = this.#STATE_NOMATCH; phrase = ''; break; default: throw new Error(`${thisFunc} unrecognized state`); } this.#out[this.#lineIndex++] = `${lead}${st}[${this.#opName(op)}]${phrase}\n`; } }; // Indents a line to indicate the tree depth of the node. #indent = (n) => { let ret = []; let count = 0; if (n >= 0) { while (n--) { count += 1; if (count === 5) { ret.push(this.#OFFSET_SPACE); count = 0; } else { ret.push(this.#OFFSET_CHAR); } } } return ret.join(''); }; // Generates a string identifying the node. #opName = (op) => { let name = id.idName(op.type); let str; switch (op.type) { case id.REP: if (op.max === Infinity) { name += `(${op.min},inf)`; } else { name += `(${op.min},${op.max})`; } break; case id.RNM: name += `(${this.#rules[op.index].name})`; break; case id.TRG: name += `(${op.min},${op.max})`; break; case id.TBS: if (op.string.length > 6) { str = `(${utils.arrayToAscii(op.string, 0, 3)}...)`; } else { str = `(${utils.arrayToAscii(op.string, 0, 6)})`; } name += utils.exposeCtrlChars(str); break; case id.TLS: if (op.string.length > 6) { str = `(${utils.arrayToAscii(op.string, 0, 3)}...)`; } else { str = `(${utils.arrayToAscii(op.string, 0, 6)})`; } name += utils.exposeCtrlChars(str); break; case id.UDT: name += `(${this.#udts[op.index].name})`; break; } return name; }; // Generates the phrase display. The display format is a function // of the `conf.displayType` and the string type (`Uint8Array`, etc.). #typeToPhrase = (indexStart, length) => { /* the suitablity of each charType has already been verified in #init() */ let str; if (this.#config.displayType === this.#HEX) { if (this.#charType === id.UINT8) { return utils.hexLineBytes(this.#chars, indexStart, length); } if (this.#charType === id.UINT16) { return utils.hexLineWords(this.#chars, indexStart, length); } if (this.#charType === id.UINT32) { return utils.hexLineDWords(this.#chars, indexStart, length); } if (this.#charType === id.ARRAY) { return utils.hexLineArray(this.#chars, indexStart, length); } /* should never get here */ throw new Error(``); } if (this.#config.displayType === this.#ASCII) { return utils.arrayToAscii(this.#chars, indexStart, length); } if (this.#config.displayType === this.#UNICODE) { return utils.arrayToUnicode(this.#chars, indexStart, length); } /* should never get here */ throw new Error(`${this.#FILENAME}typeToPhrase() bad type: ${this.#config.displayType}`); }; // Generates the phrase to display for downward passage through a node. #downPhrase(offset) { const left = this.#charEnd - offset; const len = Math.min(this.#config.maxChars, left); if (len < left) { return this.#typeToPhrase(offset, len) + this.#END_MORE; } return this.#typeToPhrase(offset, len) + this.#END_CHAR; } // Generates the phrase to display for upward passage through a node. #upPhrase(offset, phraseLength) { const len = Math.min(this.#config.maxChars, phraseLength); if (len < phraseLength) { return this.#typeToPhrase(offset, len) + this.#END_MORE; } if (offset + phraseLength >= this.#charEnd) { return this.#typeToPhrase(offset, len) + this.#END_CHAR; } return this.#typeToPhrase(offset, len); } // This function examines the line count and determines from // `maxLines` and `lastLines` whether to display the line // and if displayed, which output array line slot to put it in. #validateLineCount = () => { this.#lineCount++; let ret = true; if (this.#config.lastLines === false) { if (this.#lineCount > this.#config.maxLines) { ret = false; } } else { if (this.#lineIndex >= this.#config.maxLines) { this.#lineIndex = 0; } } return ret; }; }