UNPKG

apg-unicode

Version:

JavaScript APG parser of Unicode code point arrays

github.com/ldthomas/apg-unicode

ldthomas/apg-unicode

121 lines (108 loc) • 4.25 kB

JavaScript

// ## Identifiers Reference Guide // This file defines a comprehensive set of numerical identifiers used // throughout the parsing and translation process. // Each identifier is designed for memory-efficient representation and fast lookup, // ensuring robust performance of the parser and the application's callback functions. // These constants serve as identifiers for: // - **Parse Tree Node Operators** // Representing parse tree node types in the grammar object and in the parser during parsing. // - **Parser States** // Identifies parser states during depth-first traversal of the parse tree. // - **Abstract Syntax Tree (AST) Traversal Flags** // Identifies directional flow during the depth-first traversal of the AST. // - **Input String Array Types** // Allows quick differentiation between the allowed input string array types. export { identifiers }; const identifiers = { // These identify the operator types in the grammar object as well as the parser. // **Note:** These must match the values in [apg-js 4.4.0](https://github.com/ldthomas/apg-js) generator. /* the original seven ABNF operators */ ALT: 1 /* alternation */, CAT: 2 /* concatenation */, REP: 3 /* repetition */, RNM: 4 /* rule name */, TRG: 5 /* terminal range */, TBS: 6 /* terminal binary string, case sensitive */, TLS: 7 /* terminal literal string, case insensitive */, /* the super set, SABNF operators */ UDT: 11 /* user-defined terminal */, AND: 12 /* positive look ahead */, NOT: 13 /* negative look ahead */, // Used by the parser and the user's `RNM` and `UDT` callback functions. // Identifies the parser state as it traverses the parse tree nodes. // - **ACTIVE** - indicates the downward direction through the parse tree node. // - **MATCH** - indicates a successfully matched phrase, of length \> 0 // - **EMPTY** - indicates a successfully matched empty phrase, length = 0 // - **NOMATCH** - indicates failure to match any phrase at all ACTIVE: 100, MATCH: 101, EMPTY: 102, NOMATCH: 103, // Used by `AST` translator and the application's callback functions // to identify the node visitation states during the depth-first traversal. // - **SEM_PRE** - indicates the downward (pre-branch) direction through the `AST` node. // - **SEM_POST** - indicates the upward (post-branch) direction through the `AST` node. SEM_PRE: 200, SEM_POST: 201, SEM_OK: 300 /* ignored - retained for backwards compatibility */, // The input string array type identifiers. // - **UINT8** - Uint8Array // - **UINT16** - Uint16Array // - **UINT32** - Uint32Array // - **ARRAY** - Array UINT8: 400, UINT16: 401, UINT32: 402, ARRAY: 403, // `idName` translates a numerical identifier into a human-readable name. // * @param {number} s - an identifier from the list above // * @returns {string} - the ASCII name of the identifier idName: (s) => { switch (s) { case identifiers.ALT: return 'ALT'; case identifiers.CAT: return 'CAT'; case identifiers.REP: return 'REP'; case identifiers.RNM: return 'RNM'; case identifiers.TRG: return 'TRG'; case identifiers.TBS: return 'TBS'; case identifiers.TLS: return 'TLS'; case identifiers.UDT: return 'UDT'; case identifiers.AND: return 'AND'; case identifiers.NOT: return 'NOT'; case identifiers.ACTIVE: return 'ACTIVE'; case identifiers.EMPTY: return 'EMPTY'; case identifiers.MATCH: return 'MATCH'; case identifiers.NOMATCH: return 'NOMATCH'; case identifiers.SEM_PRE: return 'SEM_PRE'; case identifiers.SEM_POST: return 'SEM_POST'; case identifiers.SEM_OK: return 'SEM_OK'; case identifiers.UINT8: return 'UINT8'; case identifiers.UINT16: return 'UINT16'; case identifiers.UINT32: return 'UINT32'; case identifiers.ARRAY: return 'ARRAY'; default: return 'UNRECOGNIZED IDENTIFIER'; } }, };