@holgerengels/compute-engine

/* 0.26.0-alpha2 */ import type { Expression, MathJsonIdentifier } from '../../math-json/types'; import { ParseLatexOptions, LatexToken, Terminator, Parser, SymbolTable, SymbolType } from './public'; import type { IndexedLatexDictionary, IndexedLatexDictionaryEntry, IndexedInfixEntry, IndexedPostfixEntry, IndexedPrefixEntry, IndexedSymbolEntry, IndexedExpressionEntry, IndexedFunctionEntry } from './dictionary/definitions'; /** * ## THEORY OF OPERATIONS * * The parser is a recursive descent parser that uses a dictionary of * LaTeX commands to parse a LaTeX string into a MathJSON expression. * * The parser is a stateful object that keeps track of the current position * in the token stream, and the boundaries of the current parsing operation. * * To parse correctly some constructs, the parser needs to know the context * in which it is parsing. For example, parsing `k(2+x)` can be interpreted * as a function `k` applied to the sum of `2` and `x`, or as the product * of `k` and the sum of `2` and `x`. The parser needs to know that `k` is * a function to interpret the expression as a function application. * * The parser uses the current state of the compute engine, and any * identifier that may have been declared, to determine the correct * interpretation. * * Some constructs declare variables or functions while parsing. For example, * `\sum_{i=1}^n i` declares the variable `i` as the index of the sum. * * The parser keeps track of the parsing state with a stack of symbol tables. * * In addition, the handler `getIdentifierType()` is called when the parser * encounters an unknown identifier. This handler can be used to declare the * identifier, or to return `unknown` if the identifier is not known. * * Some functions affect the state of the parser: * - `Declare`, `Assign` modify the symbol table * - `Block` create a new symbol table (local scope) * - `Function` create a new symbol table with named arguments * * */ export declare class _Parser implements Parser { readonly options: Readonly<ParseLatexOptions>; _index: number; symbolTable: SymbolTable; pushSymbolTable(): void; popSymbolTable(): void; addSymbol(id: string, type: SymbolType): void; get index(): number; set index(val: number); private _tokens; private _positiveInfinityTokens; private _negativeInfinityTokens; private _notANumberTokens; private _decimalSeparatorTokens; private _wholeDigitGroupSeparatorTokens; private _fractionalDigitGroupSeparatorTokens; private _exponentProductTokens; private _beginExponentMarkerTokens; private _endExponentMarkerTokens; private _truncationMarkerTokens; private _imaginaryUnitTokens; private readonly _dictionary; private _boundaries; private _lastPeek; private _peekCounter; constructor(tokens: LatexToken[], dictionary: IndexedLatexDictionary, options: Readonly<ParseLatexOptions>); getIdentifierType(id: MathJsonIdentifier): SymbolType; get peek(): LatexToken; nextToken(): LatexToken; get atEnd(): boolean; /** * Return true if * - at end of the token stream * - the `t.condition` function returns true * Note: the `minPrec` condition is not checked. It should be checked separately. */ atTerminator(t?: Readonly<Terminator>): boolean; /** * True if the current token matches any of the boundaries we are * waiting for. */ get atBoundary(): boolean; addBoundary(boundary: LatexToken[]): void; removeBoundary(): void; matchBoundary(): boolean; boundaryError(msg: string | [string, ...Expression[]]): Expression; latex(start: number, end?: number): string; private latexAhead; /** * Return at most `this._dictionary.lookahead` LaTeX tokens. * * The index in the returned array correspond to the number of tokens. * Note that since a token can be longer than one char ('\\pi', but also * some astral plane unicode characters), the length of the string * does not match that index. However, knowing the index is important * to know by how many tokens to advance. * * For example: * * `[empty, '\\sqrt', '\\sqrt{', '\\sqrt{2', '\\sqrt{2}']` * */ lookAhead(): [count: number, tokens: string][]; /** Return all the definitions that match the tokens ahead * * The return value is an array of pairs `[def, n]` where `def` is the * definition that matches the tokens ahead, and `n` is the number of tokens * that matched. * * Note the 'operator' kind matches both infix, prefix and postfix operators. * */ peekDefinitions(kind: 'expression'): [IndexedExpressionEntry, number][]; peekDefinitions(kind: 'function'): [IndexedFunctionEntry, number][]; peekDefinitions(kind: 'symbol'): [IndexedSymbolEntry, number][]; peekDefinitions(kind: 'postfix'): [IndexedPostfixEntry, number][]; peekDefinitions(kind: 'infix'): [IndexedInfixEntry, number][]; peekDefinitions(kind: 'prefix'): [IndexedPrefixEntry, number][]; peekDefinitions(kind: 'operator'): [IndexedInfixEntry | IndexedPrefixEntry | IndexedPostfixEntry, number][]; /** Skip strictly `<space>` tokens. * To also skip `{}` see `skipSpace()`. * To skip visual space (e.g. `\,`) see `skipVisualSpace()`. */ skipSpaceTokens(): void; /** While parsing in math mode, skip applicable spaces, which includes `{}`. * Do not use to skip spaces while parsing a string. See `skipSpaceTokens()` * instead. */ skipSpace(): boolean; skipVisualSpace(): void; match(token: LatexToken): boolean; matchAll(tokens: LatexToken[]): boolean; matchAny(tokens: LatexToken[]): LatexToken; /** * A Latex number can be a decimal, hex or octal number. * It is used in some Latex commands, such as `\char` * * From TeX:8695 (scan_int): * > An integer number can be preceded by any number of spaces and `+' or * > `-' signs. Then comes either a decimal constant (i.e., radix 10), an * > octal constant (i.e., radix 8, preceded by '), a hexadecimal constant * > (radix 16, preceded by "), an alphabetic constant (preceded by `), or * > an internal variable. */ matchLatexNumber(isInteger?: boolean): null | number; matchChar(): string | null; /** * * If the next token matches the open delimiter, set a boundary with * the close token and return true. * * This method handles prefixes like `\left` and `\bigl`. * * It also handles "shorthand" delimiters, i.e. '(' will match both * `(` and `\lparen`. If a shorthand is used for the open delimiter, the * corresponding shorthand will be used for the close delimiter. * See DELIMITER_SHORTHAND. * */ private matchDelimiter; parseGroup(): Expression | null; parseOptionalGroup(): Expression | null; parseToken(): Expression | null; /** * Parse an expression in a tabular format, where rows are separated by `\\` * and columns by `&`. * * Return rows of sparse columns: empty rows are indicated with `Nothing`, * and empty cells are also indicated with `Nothing`. */ parseTabular(): null | Expression[][]; /** Match a string used as a LaTeX identifier, for example an environment * name. * Not suitable for general purpose text, e.g. argument of a `\text{} * command. See `matchChar()` instead. */ private parseStringGroupContent; /** Parse a group as a a string, for example for `\operatorname` or `\begin` */ parseStringGroup(optional?: boolean): string | null; /** Parse an environment: `\begin{env}...\end{end}` */ private parseEnvironment; /** If the next token matches a `-` sign, return '-', otherwise return '+' * */ private parseOptionalSign; /** Parse a sequence of decimal digits. The part indicates which * grouping separator should be expected. */ private parseDecimalDigits; /** The 'part' argument is used to dermine what grouping separator * should be expected. */ private parseSignedInteger; private parseExponent; parseRepeatingDecimal(): string; /** * Parse a number, with an optional sign, exponent, decimal marker, * repeating decimals, etc... */ parseNumber(): Expression | null; private parsePrefixOperator; private parseInfixOperator; /** * This returns an array of arguments (as in a function application), * or null if there is no match. * * - 'enclosure' : will look for an argument inside an enclosure * (open/close fence) * - 'implicit': either an expression inside a pair of `()`, or just a product * (i.e. we interpret `\cos 2x + 1` as `\cos(2x) + 1`) * */ parseArguments(kind?: 'enclosure' | 'implicit', until?: Readonly<Terminator>): ReadonlyArray<Expression> | null; /** * An enclosure is an opening matchfix operator, an optional expression, * optionally followed multiple times by a separator and another expression, * and finally a closing matching operator. */ private parseEnclosure; /** * A generic expression is used for dictionary entries that do * some complex (non-standard) parsing. This includes trig functions (to * parse implicit arguments), and integrals (to parse the integrand and * limits and the "dx" terminator). */ private parseGenericExpression; /** * A function is an identifier followed by postfix operators * (`\prime`...) and some arguments. */ private parseFunction; parseSymbol(until?: Readonly<Terminator>): Expression | null; /** * Parse a sequence superfix/subfix operator, e.g. `^{*}` * * Superfix and subfix need special handling: * * - they act mostly like an infix operator, but they are commutative, i.e. * `x_a^b` should be parsed identically to `x^b_a`. * * - furthermore, in LaTeX `x^a^b` parses the same as `x^a{}^b`. * */ private parseSupsub; parsePostfixOperator(lhs: Expression | null, until?: Readonly<Terminator>): Expression | null; /** * This method can be invoked when we know we're in an error situation, * for example when there are tokens remaining after we've finished parsing. * * In general, if a context does not apply, we return `null` to give * the chance to some other option to be considered. However, in some cases * we know we've exhausted all possibilities, and in this case this method * will return an error expression as informative as possible. * * We've encountered a LaTeX command or symbol but were not able to match it * to any entry in the LaTeX dictionary, or ran into it in an unexpected * context (postfix operator lacking an argument, for example) */ parseSyntaxError(): Expression; /** * <primary> := * (<number> | <symbol> | <environment> | <matchfix-expr>) * <subsup>* <postfix-operator>* * * <symbol> ::= * (<symbol-id> | (<latex-command><latex-arguments>)) <arguments> * * <matchfix-expr> := * <matchfix-op-open> * <expression> * (<matchfix-op-separator> <expression>)* * <matchfix-op-close> * */ private parsePrimary; /** * Parse an expression: * * <expression> ::= * | <primary> * | <prefix-op> <primary> * | <primary> <infix-op> <expression> * * Stop when an operator of precedence less than `until.minPrec` * is encountered */ parseExpression(until?: Readonly<Terminator>): Expression | null; /** * Add LaTeX or other requested metadata to the expression */ decorate(expr: Expression | null, start: number): Expression | null; error(code: string | [string, ...Expression[]], fromToken: number): Expression; private isFunctionOperator; /** Return all defs of the specified kind. * The defs at the end of the dictionary have priority, since they may * override previous definitions. (For example, there is a core definition * for matchfix[], which maps to a List, and a logic definition which * matches to Boole. The logic definition should take precedence.) */ getDefs(kind: string): Iterable<IndexedLatexDictionaryEntry>; } export declare function parse(latex: string, dictionary: IndexedLatexDictionary, options: Readonly<ParseLatexOptions>): Expression | null;