@holgerengels/compute-engine
Version:
Symbolic computing and numeric evaluations for JavaScript and Node.js
302 lines (301 loc) • 12.6 kB
TypeScript
/* 0.26.0-alpha2 */
import type { Expression, MathJsonIdentifier } from '../../math-json/types';
import { ParseLatexOptions, LatexToken, Terminator, Parser, SymbolTable, SymbolType } from './public';
import type { IndexedLatexDictionary, IndexedLatexDictionaryEntry, IndexedInfixEntry, IndexedPostfixEntry, IndexedPrefixEntry, IndexedSymbolEntry, IndexedExpressionEntry, IndexedFunctionEntry } from './dictionary/definitions';
/**
* ## THEORY OF OPERATIONS
*
* The parser is a recursive descent parser that uses a dictionary of
* LaTeX commands to parse a LaTeX string into a MathJSON expression.
*
* The parser is a stateful object that keeps track of the current position
* in the token stream, and the boundaries of the current parsing operation.
*
* To parse correctly some constructs, the parser needs to know the context
* in which it is parsing. For example, parsing `k(2+x)` can be interpreted
* as a function `k` applied to the sum of `2` and `x`, or as the product
* of `k` and the sum of `2` and `x`. The parser needs to know that `k` is
* a function to interpret the expression as a function application.
*
* The parser uses the current state of the compute engine, and any
* identifier that may have been declared, to determine the correct
* interpretation.
*
* Some constructs declare variables or functions while parsing. For example,
* `\sum_{i=1}^n i` declares the variable `i` as the index of the sum.
*
* The parser keeps track of the parsing state with a stack of symbol tables.
*
* In addition, the handler `getIdentifierType()` is called when the parser
* encounters an unknown identifier. This handler can be used to declare the
* identifier, or to return `unknown` if the identifier is not known.
*
* Some functions affect the state of the parser:
* - `Declare`, `Assign` modify the symbol table
* - `Block` create a new symbol table (local scope)
* - `Function` create a new symbol table with named arguments
*
*
*/
export declare class _Parser implements Parser {
readonly options: Readonly<ParseLatexOptions>;
_index: number;
symbolTable: SymbolTable;
pushSymbolTable(): void;
popSymbolTable(): void;
addSymbol(id: string, type: SymbolType): void;
get index(): number;
set index(val: number);
private _tokens;
private _positiveInfinityTokens;
private _negativeInfinityTokens;
private _notANumberTokens;
private _decimalSeparatorTokens;
private _wholeDigitGroupSeparatorTokens;
private _fractionalDigitGroupSeparatorTokens;
private _exponentProductTokens;
private _beginExponentMarkerTokens;
private _endExponentMarkerTokens;
private _truncationMarkerTokens;
private _imaginaryUnitTokens;
private readonly _dictionary;
private _boundaries;
private _lastPeek;
private _peekCounter;
constructor(tokens: LatexToken[], dictionary: IndexedLatexDictionary, options: Readonly<ParseLatexOptions>);
getIdentifierType(id: MathJsonIdentifier): SymbolType;
get peek(): LatexToken;
nextToken(): LatexToken;
get atEnd(): boolean;
/**
* Return true if
* - at end of the token stream
* - the `t.condition` function returns true
* Note: the `minPrec` condition is not checked. It should be checked separately.
*/
atTerminator(t?: Readonly<Terminator>): boolean;
/**
* True if the current token matches any of the boundaries we are
* waiting for.
*/
get atBoundary(): boolean;
addBoundary(boundary: LatexToken[]): void;
removeBoundary(): void;
matchBoundary(): boolean;
boundaryError(msg: string | [string, ...Expression[]]): Expression;
latex(start: number, end?: number): string;
private latexAhead;
/**
* Return at most `this._dictionary.lookahead` LaTeX tokens.
*
* The index in the returned array correspond to the number of tokens.
* Note that since a token can be longer than one char ('\\pi', but also
* some astral plane unicode characters), the length of the string
* does not match that index. However, knowing the index is important
* to know by how many tokens to advance.
*
* For example:
*
* `[empty, '\\sqrt', '\\sqrt{', '\\sqrt{2', '\\sqrt{2}']`
*
*/
lookAhead(): [count: number, tokens: string][];
/** Return all the definitions that match the tokens ahead
*
* The return value is an array of pairs `[def, n]` where `def` is the
* definition that matches the tokens ahead, and `n` is the number of tokens
* that matched.
*
* Note the 'operator' kind matches both infix, prefix and postfix operators.
*
*/
peekDefinitions(kind: 'expression'): [IndexedExpressionEntry, number][];
peekDefinitions(kind: 'function'): [IndexedFunctionEntry, number][];
peekDefinitions(kind: 'symbol'): [IndexedSymbolEntry, number][];
peekDefinitions(kind: 'postfix'): [IndexedPostfixEntry, number][];
peekDefinitions(kind: 'infix'): [IndexedInfixEntry, number][];
peekDefinitions(kind: 'prefix'): [IndexedPrefixEntry, number][];
peekDefinitions(kind: 'operator'): [IndexedInfixEntry | IndexedPrefixEntry | IndexedPostfixEntry, number][];
/** Skip strictly `<space>` tokens.
* To also skip `{}` see `skipSpace()`.
* To skip visual space (e.g. `\,`) see `skipVisualSpace()`.
*/
skipSpaceTokens(): void;
/** While parsing in math mode, skip applicable spaces, which includes `{}`.
* Do not use to skip spaces while parsing a string. See `skipSpaceTokens()`
* instead.
*/
skipSpace(): boolean;
skipVisualSpace(): void;
match(token: LatexToken): boolean;
matchAll(tokens: LatexToken[]): boolean;
matchAny(tokens: LatexToken[]): LatexToken;
/**
* A Latex number can be a decimal, hex or octal number.
* It is used in some Latex commands, such as `\char`
*
* From TeX:8695 (scan_int):
* > An integer number can be preceded by any number of spaces and `+' or
* > `-' signs. Then comes either a decimal constant (i.e., radix 10), an
* > octal constant (i.e., radix 8, preceded by '), a hexadecimal constant
* > (radix 16, preceded by "), an alphabetic constant (preceded by `), or
* > an internal variable.
*/
matchLatexNumber(isInteger?: boolean): null | number;
matchChar(): string | null;
/**
*
* If the next token matches the open delimiter, set a boundary with
* the close token and return true.
*
* This method handles prefixes like `\left` and `\bigl`.
*
* It also handles "shorthand" delimiters, i.e. '(' will match both
* `(` and `\lparen`. If a shorthand is used for the open delimiter, the
* corresponding shorthand will be used for the close delimiter.
* See DELIMITER_SHORTHAND.
*
*/
private matchDelimiter;
parseGroup(): Expression | null;
parseOptionalGroup(): Expression | null;
parseToken(): Expression | null;
/**
* Parse an expression in a tabular format, where rows are separated by `\\`
* and columns by `&`.
*
* Return rows of sparse columns: empty rows are indicated with `Nothing`,
* and empty cells are also indicated with `Nothing`.
*/
parseTabular(): null | Expression[][];
/** Match a string used as a LaTeX identifier, for example an environment
* name.
* Not suitable for general purpose text, e.g. argument of a `\text{}
* command. See `matchChar()` instead.
*/
private parseStringGroupContent;
/** Parse a group as a a string, for example for `\operatorname` or `\begin` */
parseStringGroup(optional?: boolean): string | null;
/** Parse an environment: `\begin{env}...\end{end}`
*/
private parseEnvironment;
/** If the next token matches a `-` sign, return '-', otherwise return '+'
*
*/
private parseOptionalSign;
/** Parse a sequence of decimal digits. The part indicates which
* grouping separator should be expected.
*/
private parseDecimalDigits;
/** The 'part' argument is used to dermine what grouping separator
* should be expected.
*/
private parseSignedInteger;
private parseExponent;
parseRepeatingDecimal(): string;
/**
* Parse a number, with an optional sign, exponent, decimal marker,
* repeating decimals, etc...
*/
parseNumber(): Expression | null;
private parsePrefixOperator;
private parseInfixOperator;
/**
* This returns an array of arguments (as in a function application),
* or null if there is no match.
*
* - 'enclosure' : will look for an argument inside an enclosure
* (open/close fence)
* - 'implicit': either an expression inside a pair of `()`, or just a product
* (i.e. we interpret `\cos 2x + 1` as `\cos(2x) + 1`)
*
*/
parseArguments(kind?: 'enclosure' | 'implicit', until?: Readonly<Terminator>): ReadonlyArray<Expression> | null;
/**
* An enclosure is an opening matchfix operator, an optional expression,
* optionally followed multiple times by a separator and another expression,
* and finally a closing matching operator.
*/
private parseEnclosure;
/**
* A generic expression is used for dictionary entries that do
* some complex (non-standard) parsing. This includes trig functions (to
* parse implicit arguments), and integrals (to parse the integrand and
* limits and the "dx" terminator).
*/
private parseGenericExpression;
/**
* A function is an identifier followed by postfix operators
* (`\prime`...) and some arguments.
*/
private parseFunction;
parseSymbol(until?: Readonly<Terminator>): Expression | null;
/**
* Parse a sequence superfix/subfix operator, e.g. `^{*}`
*
* Superfix and subfix need special handling:
*
* - they act mostly like an infix operator, but they are commutative, i.e.
* `x_a^b` should be parsed identically to `x^b_a`.
*
* - furthermore, in LaTeX `x^a^b` parses the same as `x^a{}^b`.
*
*/
private parseSupsub;
parsePostfixOperator(lhs: Expression | null, until?: Readonly<Terminator>): Expression | null;
/**
* This method can be invoked when we know we're in an error situation,
* for example when there are tokens remaining after we've finished parsing.
*
* In general, if a context does not apply, we return `null` to give
* the chance to some other option to be considered. However, in some cases
* we know we've exhausted all possibilities, and in this case this method
* will return an error expression as informative as possible.
*
* We've encountered a LaTeX command or symbol but were not able to match it
* to any entry in the LaTeX dictionary, or ran into it in an unexpected
* context (postfix operator lacking an argument, for example)
*/
parseSyntaxError(): Expression;
/**
* <primary> :=
* (<number> | <symbol> | <environment> | <matchfix-expr>)
* <subsup>* <postfix-operator>*
*
* <symbol> ::=
* (<symbol-id> | (<latex-command><latex-arguments>)) <arguments>
*
* <matchfix-expr> :=
* <matchfix-op-open>
* <expression>
* (<matchfix-op-separator> <expression>)*
* <matchfix-op-close>
*
*/
private parsePrimary;
/**
* Parse an expression:
*
* <expression> ::=
* | <primary>
* | <prefix-op> <primary>
* | <primary> <infix-op> <expression>
*
* Stop when an operator of precedence less than `until.minPrec`
* is encountered
*/
parseExpression(until?: Readonly<Terminator>): Expression | null;
/**
* Add LaTeX or other requested metadata to the expression
*/
decorate(expr: Expression | null, start: number): Expression | null;
error(code: string | [string, ...Expression[]], fromToken: number): Expression;
private isFunctionOperator;
/** Return all defs of the specified kind.
* The defs at the end of the dictionary have priority, since they may
* override previous definitions. (For example, there is a core definition
* for matchfix[], which maps to a List, and a logic definition which
* matches to Boole. The logic definition should take precedence.)
*/
getDefs(kind: string): Iterable<IndexedLatexDictionaryEntry>;
}
export declare function parse(latex: string, dictionary: IndexedLatexDictionary, options: Readonly<ParseLatexOptions>): Expression | null;