UNPKG

apg-unicode

Version:

JavaScript APG parser of Unicode code point arrays

168 lines (151 loc) 6.18 kB
// ## Abstract Syntax Tree (AST) Class Overview // The `AST` class provides the core infrastructure for constructing and // traversing abstract syntax trees after successful parsing. // Unlike a full parse tree, the AST retains only the named phrases explicitly requested by the application, // streamlining traversal and post-processing. // ### Key Features // - **Selective Node Retention** // The AST includes only successfully matched nodes that were explicitly requested, // omitting irrelevant or intermediate parsing details. // - **Bidirectional Phrase Access** // Each node in the AST retains the full matched phrase, // accessible during both downward and upward traversal passes. // - **Traversal Interface** // After parsing, the `AST` object exposes a `translate()` member, // enabling users to traverse the AST nodes, // processing or translating matched phrases according to application-specific needs. import { identifiers as id } from '../src/identifiers.js'; export { Ast }; class Ast { #FILENAME = 'parser.js: Ast()): '; #rules; #udts; #chars; #nodeCallbacks; #stack = []; #records = []; // Called by the application to set callback functions for specific rule name or UDT nodes. // * @param {string | undefined} name - A valid rule or UDT name. If `undefined` all callbacks will be cleared. // * @param {function} callback - The callback function to process the named node's phrase. setCallback = (name, callback) => { if (!this.#rules) { throw new Error(`${this.#FILENAME}cannot set callbacks until Grammar is initialized (Parser.setAst())`); } if (name === undefined) { /* clear all callback functions */ this.#nodeCallbacks = Array(this.#rules.length + this.#udts.length).fill(undefined); return; } if (!(typeof name === 'string' && typeof callback === 'function')) { throw new Error(`${this.#FILENAME}: setCallback() argument types not "string", "function"`); } /* see if this is a rule name */ const lower = name.toLowerCase(); for (const rule of this.#rules) { if (lower === rule.lower) { this.#nodeCallbacks[rule.index] = callback; return; } } /* see if this is a UDT name */ for (const udt of this.#udts) { if (lower === udt.lower) { this.#nodeCallbacks[this.#rules.length + udt.index] = callback; return; } } throw new Error(`${this.#FILENAME}: setCallback name not a rule or UDT name: ${name}`); }; // Called by the application to translate the AST after the parser has successfully // parsed the input string. // Translate means to process each phrase as it occurs during a // [depth-first](https://en.wikipedia.org/wiki/Depth-first_search) // traversal of the AST nodes. // * @param {*} data - optional application-defined data. Unused by AST but passed through // to the application callback functions for processing use. translate = (data) => { let ret; let callback; let record; for (let i = 0; i < this.#records.length; i += 1) { record = this.#records[i]; callback = this.#nodeCallbacks[record.callbackIndex]; if (callback) { if (record.state === id.SEM_PRE) { callback(id.SEM_PRE, this.#chars, record.phraseIndex, record.phraseLength, data); } else if (callback) { callback(id.SEM_POST, this.#chars, record.phraseIndex, record.phraseLength, data); } } } }; // Called by the parser to initialize the AST with the rules and UDTs. // Clears all node callback functions by setting them to `undefined`. initGrammar = (rules, udts) => { this.#rules = rules; this.#udts = udts; this.#nodeCallbacks = Array(this.#rules.length + this.#udts.length).fill(undefined); }; // Called by the parser to pass the string characters to the AST. // Initializes the AST records and index stack. initChars = (chars) => { this.#stack.length = 0; this.#records.length = 0; this.#chars = chars; }; // Called by the parser's `RNM` operator to determine if a callback function // is defined for the node or rule index. ruleDefined = (index) => !!this.#nodeCallbacks[index]; // Called by the parser's `UDT` operator to determine if a callback function // is defined for the node or UDT index. udtDefined = (index) => !!this.#nodeCallbacks[this.#rules.length + index]; // Called by the parser to reset the length of the records array. // Necessary when backtracking over previously matched nodes. setLength = (length) => { this.#records.length = length; if (length > 0) { this.#stack.length = this.#records[length - 1].stack; } else { this.#stack.length = 0; } }; // Called by the parser to get the length of the records array. getLength = () => this.#records.length; // Called by the parser's `RNM` & `UDT` operators. // Builds a record for the downward traversal through the node. down = (callbackIndex, name) => { const thisIndex = this.#records.length; this.#stack.push(thisIndex); this.#records.push({ name, thisIndex, thatIndex: undefined, state: id.SEM_PRE, callbackIndex, phraseIndex: undefined, phraseLength: undefined, stack: this.#stack.length, }); return thisIndex; }; // Called by the parser's `RNM` & `UDT` operators. // Builds a record for the upward traversal through the node. up = (callbackIndex, name, phraseIndex, phraseLength) => { const thisIndex = this.#records.length; const thatIndex = this.#stack.pop(); this.#records.push({ name, thisIndex, thatIndex, state: id.SEM_POST, callbackIndex, phraseIndex, phraseLength, stack: this.#stack.length, }); this.#records[thatIndex].thatIndex = thisIndex; this.#records[thatIndex].phraseIndex = phraseIndex; this.#records[thatIndex].phraseLength = phraseLength; return thisIndex; }; }