apg-unicode
Version:
JavaScript APG parser of Unicode code point arrays
168 lines (151 loc) • 6.18 kB
JavaScript
// ## Abstract Syntax Tree (AST) Class Overview
// The `AST` class provides the core infrastructure for constructing and
// traversing abstract syntax trees after successful parsing.
// Unlike a full parse tree, the AST retains only the named phrases explicitly requested by the application,
// streamlining traversal and post-processing.
// ### Key Features
// - **Selective Node Retention**
// The AST includes only successfully matched nodes that were explicitly requested,
// omitting irrelevant or intermediate parsing details.
// - **Bidirectional Phrase Access**
// Each node in the AST retains the full matched phrase,
// accessible during both downward and upward traversal passes.
// - **Traversal Interface**
// After parsing, the `AST` object exposes a `translate()` member,
// enabling users to traverse the AST nodes,
// processing or translating matched phrases according to application-specific needs.
import { identifiers as id } from '../src/identifiers.js';
export { Ast };
class Ast {
#FILENAME = 'parser.js: Ast()): ';
#rules;
#udts;
#chars;
#nodeCallbacks;
#stack = [];
#records = [];
// Called by the application to set callback functions for specific rule name or UDT nodes.
// * @param {string | undefined} name - A valid rule or UDT name. If `undefined` all callbacks will be cleared.
// * @param {function} callback - The callback function to process the named node's phrase.
setCallback = (name, callback) => {
if (!this.#rules) {
throw new Error(`${this.#FILENAME}cannot set callbacks until Grammar is initialized (Parser.setAst())`);
}
if (name === undefined) {
/* clear all callback functions */
this.#nodeCallbacks = Array(this.#rules.length + this.#udts.length).fill(undefined);
return;
}
if (!(typeof name === 'string' && typeof callback === 'function')) {
throw new Error(`${this.#FILENAME}: setCallback() argument types not "string", "function"`);
}
/* see if this is a rule name */
const lower = name.toLowerCase();
for (const rule of this.#rules) {
if (lower === rule.lower) {
this.#nodeCallbacks[rule.index] = callback;
return;
}
}
/* see if this is a UDT name */
for (const udt of this.#udts) {
if (lower === udt.lower) {
this.#nodeCallbacks[this.#rules.length + udt.index] = callback;
return;
}
}
throw new Error(`${this.#FILENAME}: setCallback name not a rule or UDT name: ${name}`);
};
// Called by the application to translate the AST after the parser has successfully
// parsed the input string.
// Translate means to process each phrase as it occurs during a
// [depth-first](https://en.wikipedia.org/wiki/Depth-first_search)
// traversal of the AST nodes.
// * @param {*} data - optional application-defined data. Unused by AST but passed through
// to the application callback functions for processing use.
translate = (data) => {
let ret;
let callback;
let record;
for (let i = 0; i < this.#records.length; i += 1) {
record = this.#records[i];
callback = this.#nodeCallbacks[record.callbackIndex];
if (callback) {
if (record.state === id.SEM_PRE) {
callback(id.SEM_PRE, this.#chars, record.phraseIndex, record.phraseLength, data);
} else if (callback) {
callback(id.SEM_POST, this.#chars, record.phraseIndex, record.phraseLength, data);
}
}
}
};
// Called by the parser to initialize the AST with the rules and UDTs.
// Clears all node callback functions by setting them to `undefined`.
initGrammar = (rules, udts) => {
this.#rules = rules;
this.#udts = udts;
this.#nodeCallbacks = Array(this.#rules.length + this.#udts.length).fill(undefined);
};
// Called by the parser to pass the string characters to the AST.
// Initializes the AST records and index stack.
initChars = (chars) => {
this.#stack.length = 0;
this.#records.length = 0;
this.#chars = chars;
};
// Called by the parser's `RNM` operator to determine if a callback function
// is defined for the node or rule index.
ruleDefined = (index) => !!this.#nodeCallbacks[index];
// Called by the parser's `UDT` operator to determine if a callback function
// is defined for the node or UDT index.
udtDefined = (index) => !!this.#nodeCallbacks[this.#rules.length + index];
// Called by the parser to reset the length of the records array.
// Necessary when backtracking over previously matched nodes.
setLength = (length) => {
this.#records.length = length;
if (length > 0) {
this.#stack.length = this.#records[length - 1].stack;
} else {
this.#stack.length = 0;
}
};
// Called by the parser to get the length of the records array.
getLength = () => this.#records.length;
// Called by the parser's `RNM` & `UDT` operators.
// Builds a record for the downward traversal through the node.
down = (callbackIndex, name) => {
const thisIndex = this.#records.length;
this.#stack.push(thisIndex);
this.#records.push({
name,
thisIndex,
thatIndex: undefined,
state: id.SEM_PRE,
callbackIndex,
phraseIndex: undefined,
phraseLength: undefined,
stack: this.#stack.length,
});
return thisIndex;
};
// Called by the parser's `RNM` & `UDT` operators.
// Builds a record for the upward traversal through the node.
up = (callbackIndex, name, phraseIndex, phraseLength) => {
const thisIndex = this.#records.length;
const thatIndex = this.#stack.pop();
this.#records.push({
name,
thisIndex,
thatIndex,
state: id.SEM_POST,
callbackIndex,
phraseIndex,
phraseLength,
stack: this.#stack.length,
});
this.#records[thatIndex].thatIndex = thisIndex;
this.#records[thatIndex].phraseIndex = phraseIndex;
this.#records[thatIndex].phraseLength = phraseLength;
return thisIndex;
};
}