UNPKG

tex2typst

Version:

JavaScript library for converting TeX code to Typst

1,669 lines (1,662 loc) 138 kB
// src/utils.ts function isalpha(char) { return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char); } function assert(condition, message = "Assertion failed.") { if (!condition) { throw new Error(message); } } // src/tex-types.ts var TexToken = class _TexToken { type; value; constructor(type, value) { this.type = type; this.value = value; } eq(token) { return this.type === token.type && this.value === token.value; } toString() { switch (this.type) { case 4 /* COMMENT */: return "%" + this.value; default: return this.value; } } toNode() { return new TexTerminal(this); } static EMPTY = new _TexToken(0 /* EMPTY */, ""); static COMMAND_DISPLAYSTYLE = new _TexToken(2 /* COMMAND */, "\\displaystyle"); static COMMAND_TEXTSTYLE = new _TexToken(2 /* COMMAND */, "\\textstyle"); }; var TexNode = class { type; head; constructor(type, head) { this.type = type; this.head = head ? head : TexToken.EMPTY; } // Note that this is only shallow equality. eq(other) { return this.type === other.type && this.head.eq(other.head); } // Note: toString() is expensive. Do not use it on performance-critical code path. toString() { return this.serialize().reduce(writeTexTokenBuffer, ""); } }; var TexTerminal = class extends TexNode { constructor(head) { super("terminal", head); } serialize() { switch (this.head.type) { case 0 /* EMPTY */: return []; case 1 /* ELEMENT */: case 2 /* COMMAND */: case 3 /* LITERAL */: case 4 /* COMMENT */: case 7 /* CONTROL */: { return [this.head]; } case 5 /* SPACE */: case 6 /* NEWLINE */: { const tokens = []; for (const c of this.head.value) { const token_type = c === " " ? 5 /* SPACE */ : 6 /* NEWLINE */; tokens.push(new TexToken(token_type, c)); } return tokens; } default: throw new Error(`Unknown terminal token type: ${this.head.type}`); } } }; var TexText = class extends TexNode { constructor(head) { assert(head.type === 3 /* LITERAL */); super("text", head); } serialize() { return [ new TexToken(2 /* COMMAND */, "\\text"), new TexToken(1 /* ELEMENT */, "{"), this.head, new TexToken(1 /* ELEMENT */, "}") ]; } }; var TexGroup = class extends TexNode { items; constructor(items) { super("ordgroup", TexToken.EMPTY); this.items = items; } serialize() { return this.items.map((n) => n.serialize()).flat(); } }; var TexSupSub = class extends TexNode { base; sup; sub; constructor(data) { super("supsub", TexToken.EMPTY); this.base = data.base; this.sup = data.sup; this.sub = data.sub; } serialize() { let tokens = []; const { base, sup, sub } = this; tokens = tokens.concat(base.serialize()); function should_wrap_in_braces(node) { if (node.type === "ordgroup" || node.type === "supsub" || node.head.type === 0 /* EMPTY */) { return true; } else if (node.head.type === 1 /* ELEMENT */ && /\d+(\.\d+)?/.test(node.head.value) && node.head.value.length > 1) { return true; } else { return false; } } if (sub) { tokens.push(new TexToken(7 /* CONTROL */, "_")); if (should_wrap_in_braces(sub)) { tokens.push(new TexToken(1 /* ELEMENT */, "{")); tokens = tokens.concat(sub.serialize()); tokens.push(new TexToken(1 /* ELEMENT */, "}")); } else { tokens = tokens.concat(sub.serialize()); } } if (sup) { tokens.push(new TexToken(7 /* CONTROL */, "^")); if (should_wrap_in_braces(sup)) { tokens.push(new TexToken(1 /* ELEMENT */, "{")); tokens = tokens.concat(sup.serialize()); tokens.push(new TexToken(1 /* ELEMENT */, "}")); } else { tokens = tokens.concat(sup.serialize()); } } return tokens; } }; var TexFuncCall = class extends TexNode { args; // For type="sqrt", it's additional argument wrapped square bracket. e.g. 3 in \sqrt[3]{x} data; constructor(head, args, data = null) { super("funcCall", head); this.args = args; this.data = data; } serialize() { let tokens = []; tokens.push(this.head); if (this.head.value === "\\sqrt" && this.data) { tokens.push(new TexToken(1 /* ELEMENT */, "[")); tokens = tokens.concat(this.data.serialize()); tokens.push(new TexToken(1 /* ELEMENT */, "]")); } for (const arg of this.args) { tokens.push(new TexToken(1 /* ELEMENT */, "{")); tokens = tokens.concat(arg.serialize()); tokens.push(new TexToken(1 /* ELEMENT */, "}")); } return tokens; } }; var TexLeftRight = class extends TexNode { body; left; right; constructor(data) { super("leftright", TexToken.EMPTY); this.body = data.body; this.left = data.left; this.right = data.right; } serialize() { let tokens = []; tokens.push(new TexToken(2 /* COMMAND */, "\\left")); tokens.push(new TexToken(1 /* ELEMENT */, this.left ? this.left.value : ".")); tokens = tokens.concat(this.body.serialize()); tokens.push(new TexToken(2 /* COMMAND */, "\\right")); tokens.push(new TexToken(1 /* ELEMENT */, this.right ? this.right.value : ".")); return tokens; } }; var TexBeginEnd = class extends TexNode { matrix; // for environment="array" or "subarray", there's additional data like {c|c} right after \begin{env} data; constructor(head, matrix, data = null) { assert(head.type === 3 /* LITERAL */); super("beginend", head); this.matrix = matrix; this.data = data; } serialize() { let tokens = []; const matrix = this.matrix; tokens.push(new TexToken(2 /* COMMAND */, "\\begin")); tokens.push(new TexToken(1 /* ELEMENT */, "{")); tokens = tokens.concat(this.head); tokens.push(new TexToken(1 /* ELEMENT */, "}")); tokens.push(new TexToken(6 /* NEWLINE */, "\n")); for (let i = 0; i < matrix.length; i++) { const row = matrix[i]; for (let j = 0; j < row.length; j++) { const cell = row[j]; tokens = tokens.concat(cell.serialize()); if (j !== row.length - 1) { tokens.push(new TexToken(7 /* CONTROL */, "&")); } } if (i !== matrix.length - 1) { tokens.push(new TexToken(7 /* CONTROL */, "\\\\")); } } tokens.push(new TexToken(6 /* NEWLINE */, "\n")); tokens.push(new TexToken(2 /* COMMAND */, "\\end")); tokens.push(new TexToken(1 /* ELEMENT */, "{")); tokens = tokens.concat(this.head); tokens.push(new TexToken(1 /* ELEMENT */, "}")); return tokens; } }; function writeTexTokenBuffer(buffer, token) { const str = token.toString(); let no_need_space = false; if (token.type === 5 /* SPACE */) { no_need_space = true; } else { no_need_space ||= /[{\(\[\|]$/.test(buffer); no_need_space ||= /\\\w+$/.test(buffer) && str === "["; no_need_space ||= /^[\.,;:!\?\(\)\]{}_^]$/.test(str); no_need_space ||= ["\\{", "\\}"].includes(str); no_need_space ||= str === "'"; no_need_space ||= buffer.endsWith("_") || buffer.endsWith("^"); no_need_space ||= /\s$/.test(buffer); no_need_space ||= /^\s/.test(str); no_need_space ||= buffer === ""; no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(buffer) || buffer === "-" || buffer === "+"; no_need_space ||= buffer.endsWith("&") && str === "="; no_need_space ||= /\d$/.test(buffer) && /^[a-zA-Z]$/.test(str); } if (!no_need_space) { buffer += " "; } return buffer + str; } // src/generic.ts function array_find(array, item, start = 0) { const index = array.slice(start).findIndex((x) => x.eq(item)); return index === -1 ? -1 : index + start; } function array_includes(array, item) { return array.some((x) => x.eq(item)); } function array_split(array, sep) { const res = []; let current_slice = []; for (const i of array) { if (i.eq(sep)) { res.push(current_slice); current_slice = []; } else { current_slice.push(i); } } res.push(current_slice); return res; } function array_join(arrays, sep) { return arrays.flatMap((arr, i) => i !== arrays.length - 1 ? [...arr, sep] : arr); } function array_intersperse(array, sep) { return array.flatMap((x, i) => i !== array.length - 1 ? [x, sep] : [x]); } // src/jslex.ts var EOF = {}; function matchcompare(m1, m2) { const m1_len = m1.reMatchArray[0].length; const m2_len = m2.reMatchArray[0].length; if (m2_len !== m1_len) { return m2_len - m1_len; } else { return m1.index - m2.index; } } var Scanner = class { _input; _lexer; // position within input stream _pos = 0; // current line number _line = 0; // current column number _col = 0; _offset = 0; _less = null; _go = false; _newstate = null; _state; _text = null; _leng = null; _reMatchArray = null; constructor(input, lexer) { this._input = input; this._lexer = lexer; this._state = lexer.states[0]; } /** * Analogous to yytext and yyleng in lex - will be set during scan. */ text() { return this._text; } leng() { return this._leng; } reMatchArray() { return this._reMatchArray; } /** * Position of in stream, line number and column number of match. */ pos() { return this._pos; } line() { return this._line; } column() { return this._col; } /** * Analogous to input() in lex. * @return {string} The next character in the stream. */ input() { return this._input.charAt(this._pos + this._leng + this._offset++); } /** * Similar to unput() in lex, but does not allow modifying the stream. * @return {int} The offset position after the operation. */ unput() { return this._offset = this._offset > 0 ? this._offset-- : 0; } /** * Analogous to yyless(n) in lex - retains the first n characters from this pattern, and returns * the rest to the input stream, such that they will be used in the next pattern-matching operation. * @param {int} n Number of characters to retain. * @return {int} Length of the stream after the operation has completed. */ less(n) { this._less = n; this._offset = 0; this._text = this._text.substring(0, n); return this._leng = this._text.length; } /** * Like less(), but instead of retaining the first n characters, it chops off the last n. * @param {int} n Number of characters to chop. * @return {int} Length of the stream after the operation has completed. */ pushback(n) { return this.less(this._leng - n); } /** * Similar to REJECT in lex, except it doesn't break the current execution context. * TIP: reject() should be the last instruction in a spec callback. */ reject() { this._go = true; } /** * Analogous to BEGIN in lex - sets the named state (start condition). * @param {string|int} state Name of state to switch to, or ordinal number (0 is first, etc). * @return {string} The new state on successful switch, throws exception on failure. */ begin(state) { if (this._lexer.specification[state]) { return this._newstate = state; } const s = this._lexer.states[parseInt(state)]; if (s) { return this._newstate = s; } throw "Unknown state '" + state + "' requested"; } /** * Simple accessor for reading in the current state. * @return {string} The current state. */ state() { return this._state; } /** * Scan method to be returned to caller - grabs the next token and fires appropriate calback. * @return {T} The next token extracted from the stream. */ scan() { if (this._pos >= this._input.length) { return EOF; } const str = this._input.substring(this._pos); const rules = this._lexer.specification[this._state]; const matches = []; for (let i = 0; i < rules.length; i++) { const rule = rules[i]; const mt = str.match(rule.re); if (mt !== null && mt[0].length > 0) { matches.push({ index: i, rule, reMatchArray: mt }); } } if (matches.length === 0) { throw new Error("No match found for input '" + str + "'"); } matches.sort(matchcompare); this._go = true; let result; let matched_text; for (let j = 0, n = matches.length; j < n && this._go; j++) { this._offset = 0; this._less = null; this._go = false; this._newstate = null; const m = matches[j]; matched_text = m.reMatchArray[0]; this._text = matched_text; this._leng = matched_text.length; this._reMatchArray = m.reMatchArray; result = m.rule.action(this); if (this._newstate && this._newstate != this._state) { this._state = this._newstate; break; } } const text = this._less === null ? matched_text : matched_text.substring(0, this._less); const len = text.length; this._pos += len + this._offset; const nlm = text.match(/\n/g); if (nlm !== null) { this._line += nlm.length; this._col = len - text.lastIndexOf("\n") - 1; } else { this._col += len; } return result; } }; var JSLex = class { states; specification; constructor(spec3) { this.states = Object.keys(spec3); this.specification = {}; for (const s of this.states) { const rule_map = spec3[s]; if (s in this.specification) { throw "Duplicate state declaration encountered for state '" + s + "'"; } this.specification[s] = []; for (const [k, v] of rule_map.entries()) { let re; try { re = new RegExp("^" + k); } catch (err) { throw "Invalid regexp '" + k + "' in state '" + s + "' (" + err.message + ")"; } this.specification[s].push({ re, action: v }); } } } /** * Scanner function - makes a new scanner object which is used to get tokens one at a time. * @param {string} input Input text to tokenize. * @return {function} Scanner function. */ scanner(input) { return new Scanner(input, this); } /** * Similar to lex's yylex() function, consumes all input, calling calback for each token. * @param {string} input Text to lex. * @param {function} callback Function to execute for each token. */ lex(input, callback) { const scanner = this.scanner(input); while (true) { const token = scanner.scan(); if (token === EOF) { return; } if (token !== void 0) { callback(token); } } } /** * Consumes all input, collecting tokens along the way. * @param {string} input Text to lex. * @return {array} List of tokens, may contain an Error at the end. */ collect(input) { const tokens = []; const callback = function(item) { if (Array.isArray(item)) { tokens.push(...item); } else { tokens.push(item); } }; this.lex(input, callback); return tokens; } }; // src/tex-tokenizer.ts var TEX_UNARY_COMMANDS = [ "sqrt", "text", "bar", "bold", "boldsymbol", "ddot", "dot", "hat", "mathbb", "mathbf", "mathcal", "mathfrak", "mathit", "mathrm", "mathscr", "mathsf", "mathtt", "operatorname", "operatorname*", "overbrace", "overline", "pmb", "rm", "tilde", "underbrace", "underline", "vec", "widehat", "widetilde", "overleftarrow", "overrightarrow", "hspace", "substack", "displaylines", "mathinner", "mathrel", "mathbin", "mathop", "not", // the braket package "bra", "ket", "braket", "set", "Bra", "Ket", "Braket", "Set", "pmod" ]; var TEX_BINARY_COMMANDS = [ "frac", "tfrac", "binom", "dbinom", "dfrac", "tbinom", "overset", "underset", "textcolor" ]; function unescape(str) { const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"]; for (const char of chars) { str = str.replaceAll("\\" + char, char); } return str; } var rules_map = /* @__PURE__ */ new Map([ // match `\begin{array}{cc}` [ String.raw`\\begin{(array|subarry)}{(.+?)}`, (s) => { const match = s.reMatchArray(); return [ new TexToken(2 /* COMMAND */, "\\begin"), new TexToken(7 /* CONTROL */, "{"), new TexToken(3 /* LITERAL */, match[1]), new TexToken(7 /* CONTROL */, "}"), new TexToken(7 /* CONTROL */, "{"), new TexToken(3 /* LITERAL */, match[2]), new TexToken(7 /* CONTROL */, "}") ]; } ], [ String.raw`\\(text|operatorname\*?|textcolor|begin|end|hspace|array){(.+?)}`, (s) => { const match = s.reMatchArray(); return [ new TexToken(2 /* COMMAND */, "\\" + match[1]), new TexToken(7 /* CONTROL */, "{"), new TexToken(3 /* LITERAL */, unescape(match[2])), new TexToken(7 /* CONTROL */, "}") ]; } ], [String.raw`%[^\n]*`, (s) => new TexToken(4 /* COMMENT */, s.text().substring(1))], [String.raw`[{}_^&]`, (s) => new TexToken(7 /* CONTROL */, s.text())], [String.raw`\\[\\,:;!> ]`, (s) => new TexToken(7 /* CONTROL */, s.text())], [String.raw`~`, (s) => new TexToken(7 /* CONTROL */, s.text())], [String.raw`\r?\n`, (_s) => new TexToken(6 /* NEWLINE */, "\n")], [String.raw`\s+`, (s) => new TexToken(5 /* SPACE */, s.text())], [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(1 /* ELEMENT */, s.text())], // e.g. match `\frac13`, `\frac1 b`, `\frac a b` [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => { const match = s.reMatchArray(); const command = match[1]; if (TEX_BINARY_COMMANDS.includes(command.substring(1))) { const arg1 = match[2].trimStart(); const arg2 = match[3]; return [ new TexToken(2 /* COMMAND */, command), new TexToken(1 /* ELEMENT */, arg1), new TexToken(1 /* ELEMENT */, arg2) ]; } else { s.reject(); return []; } }], // e.g. match `\sqrt3`, `\sqrt a` [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => { const match = s.reMatchArray(); const command = match[1]; if (TEX_UNARY_COMMANDS.includes(command.substring(1))) { const arg1 = match[2].trimStart(); return [ new TexToken(2 /* COMMAND */, command), new TexToken(1 /* ELEMENT */, arg1) ]; } else { s.reject(); return []; } }], [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(2 /* COMMAND */, s.text())], // Numbers like "123", "3.14" [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(1 /* ELEMENT */, s.text())], [String.raw`[a-zA-Z]`, (s) => new TexToken(1 /* ELEMENT */, s.text())], [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(1 /* ELEMENT */, s.text())], // non-ASCII characters [String.raw`[^\x00-\x7F]`, (s) => new TexToken(1 /* ELEMENT */, s.text())], [String.raw`.`, (s) => new TexToken(8 /* UNKNOWN */, s.text())] ]); var spec = { "start": rules_map }; function tokenize_tex(input) { const lexer = new JSLex(spec); return lexer.collect(input); } // src/tex-parser.ts var IGNORED_COMMANDS = [ "bigl", "bigr", "bigm", "biggl", "biggr", "biggm", "Bigl", "Bigr", "Bigm", "Biggl", "Biggr", "Biggm" ]; var EMPTY_NODE = TexToken.EMPTY.toNode(); function get_command_param_num(command) { if (TEX_UNARY_COMMANDS.includes(command)) { return 1; } else if (TEX_BINARY_COMMANDS.includes(command)) { return 2; } else { return 0; } } var LEFT_CURLY_BRACKET = new TexToken(7 /* CONTROL */, "{"); var RIGHT_CURLY_BRACKET = new TexToken(7 /* CONTROL */, "}"); var LEFT_SQUARE_BRACKET = new TexToken(1 /* ELEMENT */, "["); var RIGHT_SQUARE_BRACKET = new TexToken(1 /* ELEMENT */, "]"); function eat_whitespaces(tokens, start) { let pos = start; while (pos < tokens.length && [5 /* SPACE */, 6 /* NEWLINE */].includes(tokens[pos].type)) { pos++; } return tokens.slice(start, pos); } function eat_parenthesis(tokens, start) { const firstToken = tokens[start]; if (firstToken.type === 1 /* ELEMENT */ && ["(", ")", "[", "]", "|", "\\{", "\\}", ".", "\\|", "<", ">"].includes(firstToken.value)) { return firstToken; } else if (firstToken.type === 2 /* COMMAND */ && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle", "lparen", "rparen", "lbrace", "rbrace"].includes(firstToken.value.slice(1))) { return firstToken; } else { return null; } } function eat_primes(tokens, start) { let pos = start; while (pos < tokens.length && tokens[pos].eq(new TexToken(1 /* ELEMENT */, "'"))) { pos += 1; } return pos - start; } function process_styled_parts(nodes) { let style_token = null; let bucket = []; let res = []; let i = 0; while (true) { if (i === nodes.length || nodes[i].head.eq(TexToken.COMMAND_DISPLAYSTYLE) || nodes[i].head.eq(TexToken.COMMAND_TEXTSTYLE)) { if (bucket.length > 0) { const g = bucket.length === 1 ? bucket[0] : new TexGroup(bucket); res.push(style_token ? new TexFuncCall(style_token, [g]) : g); } if (i === nodes.length) { break; } bucket = []; style_token = nodes[i].head; } else { bucket.push(nodes[i]); } i++; } return res; } var LEFT_COMMAND = new TexToken(2 /* COMMAND */, "\\left"); var RIGHT_COMMAND = new TexToken(2 /* COMMAND */, "\\right"); var BEGIN_COMMAND = new TexToken(2 /* COMMAND */, "\\begin"); var END_COMMAND = new TexToken(2 /* COMMAND */, "\\end"); var CONTROL_LINEBREAK = new TexToken(7 /* CONTROL */, "\\\\"); var LatexParserError = class _LatexParserError extends Error { constructor(message) { super(message); this.name = "LatexParserError"; } static UNMATCHED_LEFT_BRACE = new _LatexParserError("Unmatched '\\{'"); static UNMATCHED_RIGHT_BRACE = new _LatexParserError("Unmatched '\\}'"); static UNMATCHED_LEFT_BRACKET = new _LatexParserError("Unmatched '\\['"); static UNMATCHED_RIGHT_BRACKET = new _LatexParserError("Unmatched '\\]'"); static UNMATCHED_COMMAND_BEGIN = new _LatexParserError("Unmatched '\\begin'"); static UNMATCHED_COMMAND_END = new _LatexParserError("Unmatched '\\end'"); static UNMATCHED_COMMAND_LEFT = new _LatexParserError("Unmatched '\\left'"); static UNMATCHED_COMMAND_RIGHT = new _LatexParserError("Unmatched '\\right'"); }; var SUB_SYMBOL = new TexToken(7 /* CONTROL */, "_"); var SUP_SYMBOL = new TexToken(7 /* CONTROL */, "^"); var LatexParser = class { space_sensitive; newline_sensitive; // how many levels of \begin{...} \end{...} are we currently in alignmentDepth = 0; constructor(space_sensitive = false, newline_sensitive = true) { this.space_sensitive = space_sensitive; this.newline_sensitive = newline_sensitive; } parse(tokens) { return this.parseGroup(tokens.slice(0)); } parseGroup(tokens) { const [tree, _] = this.parseClosure(tokens, 0, null); return tree; } // return pos: (position of closingToken) + 1 // pos will be -1 if closingToken is not found parseClosure(tokens, start, closingToken) { const results = []; let pos = start; while (pos < tokens.length) { if (closingToken !== null && tokens[pos].eq(closingToken)) { break; } const [res, newPos] = this.parseNextExpr(tokens, pos); pos = newPos; if (res.head.type === 5 /* SPACE */ || res.head.type === 6 /* NEWLINE */) { if (!this.space_sensitive && res.head.value.replace(/ /g, "").length === 0) { continue; } if (!this.newline_sensitive && res.head.value === "\n") { continue; } } results.push(res); } if (pos >= tokens.length && closingToken !== null) { return [EMPTY_NODE, -1]; } const styledResults = process_styled_parts(results); let node; if (styledResults.length === 1) { node = styledResults[0]; } else { node = new TexGroup(styledResults); } return [node, pos + 1]; } parseNextExpr(tokens, start) { let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start); let sub = null; let sup = null; let num_prime = 0; num_prime += eat_primes(tokens, pos); pos += num_prime; if (pos < tokens.length) { const next_token = tokens[pos]; if (next_token.eq(SUB_SYMBOL)) { [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1); const new_primes = eat_primes(tokens, pos); num_prime += new_primes; pos += new_primes; if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) { [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1); if (eat_primes(tokens, pos) > 0) { throw new LatexParserError("Double superscript"); } } } else if (next_token.eq(SUP_SYMBOL)) { [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1); if (eat_primes(tokens, pos) > 0) { throw new LatexParserError("Double superscript"); } if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) { [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1); if (eat_primes(tokens, pos) > 0) { throw new LatexParserError("Double superscript"); } } } } if (sub !== null || sup !== null || num_prime > 0) { const res = { base, sup: null, sub: null }; if (sub) { res.sub = sub; } if (num_prime > 0) { const items = []; for (let i = 0; i < num_prime; i++) { items.push(new TexToken(1 /* ELEMENT */, "'").toNode()); } if (sup) { items.push(sup); } res.sup = items.length === 1 ? items[0] : new TexGroup(items); } else if (sup) { res.sup = sup; } return [new TexSupSub(res), pos]; } else { return [base, pos]; } } parseNextExprWithoutSupSub(tokens, start) { if (start >= tokens.length) { throw new LatexParserError("Unexpected end of input"); } const firstToken = tokens[start]; switch (firstToken.type) { case 1 /* ELEMENT */: case 3 /* LITERAL */: case 4 /* COMMENT */: case 5 /* SPACE */: case 6 /* NEWLINE */: return [firstToken.toNode(), start + 1]; case 2 /* COMMAND */: const commandName = firstToken.value.slice(1); if (IGNORED_COMMANDS.includes(commandName)) { return this.parseNextExprWithoutSupSub(tokens, start + 1); } if (firstToken.eq(BEGIN_COMMAND)) { return this.parseBeginEndExpr(tokens, start); } else if (firstToken.eq(END_COMMAND)) { throw LatexParserError.UNMATCHED_COMMAND_END; } else if (firstToken.eq(LEFT_COMMAND)) { return this.parseLeftRightExpr(tokens, start); } else if (firstToken.eq(RIGHT_COMMAND)) { throw LatexParserError.UNMATCHED_COMMAND_RIGHT; } else { return this.parseCommandExpr(tokens, start); } case 7 /* CONTROL */: const controlChar = firstToken.value; switch (controlChar) { case "{": const [group, newPos] = this.parseClosure(tokens, start + 1, RIGHT_CURLY_BRACKET); if (newPos === -1) { throw LatexParserError.UNMATCHED_LEFT_BRACE; } return [group, newPos]; case "}": throw LatexParserError.UNMATCHED_RIGHT_BRACE; case "\\\\": case "\\!": case "\\,": case "\\:": case "\\;": case "\\>": return [firstToken.toNode(), start + 1]; case "\\ ": case "~": return [firstToken.toNode(), start + 1]; case "_": case "^": return [EMPTY_NODE, start]; case "&": if (this.alignmentDepth <= 0) { throw new LatexParserError("Unexpected & outside of an alignment"); } return [firstToken.toNode(), start + 1]; default: throw new LatexParserError("Unknown control sequence"); } default: throw new LatexParserError("Unknown token type"); } } parseCommandExpr(tokens, start) { assert(tokens[start].type === 2 /* COMMAND */); const command_token = tokens[start]; const command = command_token.value; let pos = start + 1; const paramNum = get_command_param_num(command.slice(1)); switch (paramNum) { case 0: return [command_token.toNode(), pos]; case 1: { if (pos >= tokens.length) { throw new LatexParserError("Expecting argument for " + command); } if (command === "\\sqrt" && pos < tokens.length && tokens[pos].eq(LEFT_SQUARE_BRACKET)) { const [exponent, newPos1] = this.parseClosure(tokens, pos + 1, RIGHT_SQUARE_BRACKET); if (newPos1 === -1) { throw LatexParserError.UNMATCHED_LEFT_BRACKET; } const [arg12, newPos2] = this.parseNextArg(tokens, newPos1); return [new TexFuncCall(command_token, [arg12], exponent), newPos2]; } else if (command === "\\text") { if (pos + 2 >= tokens.length) { throw new LatexParserError("Expecting content for \\text command"); } assert(tokens[pos].eq(LEFT_CURLY_BRACKET)); assert(tokens[pos + 1].type === 3 /* LITERAL */); assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET)); const literal = tokens[pos + 1]; return [new TexText(literal), pos + 3]; } else if (command === "\\displaylines") { assert(tokens[pos].eq(LEFT_CURLY_BRACKET)); const [matrix, newPos2] = this.parseAligned(tokens, pos + 1, RIGHT_CURLY_BRACKET); if (newPos2 === -1) { throw LatexParserError.UNMATCHED_LEFT_BRACE; } const group = new TexGroup(array_join(matrix, CONTROL_LINEBREAK.toNode())); return [new TexFuncCall(command_token, [group]), newPos2]; } let [arg1, newPos] = this.parseNextArg(tokens, pos); return [new TexFuncCall(command_token, [arg1]), newPos]; } case 2: { const [arg1, pos1] = this.parseNextArg(tokens, pos); const [arg2, pos2] = this.parseNextArg(tokens, pos1); return [new TexFuncCall(command_token, [arg1, arg2]), pos2]; } default: throw new Error("Invalid number of parameters"); } } /* Extract a non-space argument from the token stream. So that `\frac{12} 3` is parsed as TypstFuncCall{ head: '\frac', args: [ELEMENT_12, ELEMENT_3] } rather than TypstFuncCall{ head: '\frac', args: [ELEMENT_12, SPACE] }, ELEMENT_3 */ parseNextArg(tokens, start) { let pos = start; let arg = null; while (pos < tokens.length) { let node; [node, pos] = this.parseNextExprWithoutSupSub(tokens, pos); if (!(node.head.type === 5 /* SPACE */ || node.head.type === 6 /* NEWLINE */)) { arg = node; break; } } if (arg === null) { throw new LatexParserError("Expecting argument but token stream ended"); } return [arg, pos]; } parseLeftRightExpr(tokens, start) { assert(tokens[start].eq(LEFT_COMMAND)); let pos = start + 1; pos += eat_whitespaces(tokens, pos).length; if (pos >= tokens.length) { throw new LatexParserError("Expecting a delimiter after \\left"); } const leftDelimiter = eat_parenthesis(tokens, pos); if (leftDelimiter === null) { throw new LatexParserError("Invalid delimiter after \\left"); } pos++; const [body, idx] = this.parseClosure(tokens, pos, RIGHT_COMMAND); if (idx === -1) { throw LatexParserError.UNMATCHED_COMMAND_LEFT; } pos = idx; pos += eat_whitespaces(tokens, pos).length; if (pos >= tokens.length) { throw new LatexParserError("Expecting a delimiter after \\right"); } const rightDelimiter = eat_parenthesis(tokens, pos); if (rightDelimiter === null) { throw new LatexParserError("Invalid delimiter after \\right"); } pos++; const left = leftDelimiter.value === "." ? null : leftDelimiter; const right = rightDelimiter.value === "." ? null : rightDelimiter; const res = new TexLeftRight({ body, left, right }); return [res, pos]; } parseBeginEndExpr(tokens, start) { assert(tokens[start].eq(BEGIN_COMMAND)); let pos = start + 1; assert(tokens[pos].eq(LEFT_CURLY_BRACKET)); assert(tokens[pos + 1].type === 3 /* LITERAL */); assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET)); const envName = tokens[pos + 1].value; pos += 3; let data = null; if (["array", "subarray"].includes(envName)) { pos += eat_whitespaces(tokens, pos).length; [data, pos] = this.parseNextArg(tokens, pos); } const [body, endIdx] = this.parseAligned(tokens, pos, END_COMMAND); if (endIdx === -1) { throw LatexParserError.UNMATCHED_COMMAND_BEGIN; } pos = endIdx; assert(tokens[pos].eq(LEFT_CURLY_BRACKET)); assert(tokens[pos + 1].type === 3 /* LITERAL */); assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET)); if (tokens[pos + 1].value !== envName) { throw new LatexParserError("\\begin and \\end environments mismatch"); } pos += 3; const res = new TexBeginEnd(new TexToken(3 /* LITERAL */, envName), body, data); return [res, pos]; } // return pos: (position of closingToken) + 1 // pos will be -1 if closingToken is not found parseAligned(tokens, start, closingToken) { this.alignmentDepth++; let pos = start; pos += eat_whitespaces(tokens, pos).length; let closure; [closure, pos] = this.parseClosure(tokens, pos, closingToken); if (pos === -1) { return [[], -1]; } let allRows; if (closure.type === "ordgroup") { const elements = closure.items; while (elements.length > 0 && [5 /* SPACE */, 6 /* NEWLINE */].includes(elements[elements.length - 1].head.type)) { elements.pop(); } allRows = array_split(elements, new TexToken(7 /* CONTROL */, "\\\\").toNode()).map((row) => { return array_split(row, new TexToken(7 /* CONTROL */, "&").toNode()).map((arr) => new TexGroup(arr)); }); } else { allRows = [[closure]]; } this.alignmentDepth--; return [allRows, pos]; } }; function passIgnoreWhitespaceBeforeScriptMark(tokens) { const is_script_mark = (token) => token.eq(SUB_SYMBOL) || token.eq(SUP_SYMBOL); let out_tokens = []; for (let i = 0; i < tokens.length; i++) { if (tokens[i].type === 5 /* SPACE */ && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) { continue; } if (tokens[i].type === 5 /* SPACE */ && i - 1 >= 0 && is_script_mark(tokens[i - 1])) { continue; } out_tokens.push(tokens[i]); } return out_tokens; } function passExpandCustomTexMacros(tokens, customTexMacros) { let out_tokens = []; for (const token of tokens) { if (token.type === 2 /* COMMAND */ && customTexMacros[token.value]) { const expanded_tokens = tokenize_tex(customTexMacros[token.value]); out_tokens = out_tokens.concat(expanded_tokens); } else { out_tokens.push(token); } } return out_tokens; } function parseTex(tex, customTexMacros = {}) { const parser = new LatexParser(); let tokens = tokenize_tex(tex); tokens = passIgnoreWhitespaceBeforeScriptMark(tokens); tokens = passExpandCustomTexMacros(tokens, customTexMacros); return parser.parse(tokens); } // src/typst-shorthands.ts var shorthandMap = /* @__PURE__ */ new Map([ // The following snippet is generated with tools/make-short-hand-map.py ["arrow.l.r.double.long", "<==>"], ["arrow.l.r.long", "<-->"], ["arrow.r.bar", "|->"], ["arrow.r.double.bar", "|=>"], ["arrow.r.double.long", "==>"], ["arrow.r.long", "-->"], ["arrow.r.long.squiggly", "~~>"], ["arrow.r.tail", ">->"], ["arrow.r.twohead", "->>"], ["arrow.l.double.long", "<=="], ["arrow.l.long", "<--"], ["arrow.l.long.squiggly", "<~~"], ["arrow.l.tail", "<-<"], ["arrow.l.twohead", "<<-"], ["arrow.l.r.double", "<=>"], ["colon.double.eq", "::="], ["dots.h", "..."], ["gt.triple", ">>>"], ["lt.triple", "<<<"], ["arrow.r", "->"], ["arrow.r.double", "=>"], ["arrow.r.squiggly", "~>"], ["arrow.l", "<-"], ["arrow.l.squiggly", "<~"], ["bar.v.double", "||"], ["bracket.l.stroked", "[|"], ["bracket.r.stroked", "|]"], ["colon.eq", ":="], ["eq.colon", "=:"], ["eq.not", "!="], ["gt.double", ">>"], ["gt.eq", ">="], ["lt.double", "<<"], ["lt.eq", "<="], ["ast.op", "*"], ["minus", "-"], ["tilde.op", "~"], // Typst's documentation doesn't include this. Wondering why ["arrow.l.r", "<->"] ]); var reverseShorthandMap = /* @__PURE__ */ new Map(); for (const [key, value] of shorthandMap.entries()) { if (value.length > 1) { reverseShorthandMap.set(value, key); } } // src/typst-types.ts var TypstToken = class _TypstToken { type; value; constructor(type, content) { this.type = type; this.value = content; } eq(other) { return this.type === other.type && this.value === other.value; } isOneOf(tokens) { return array_includes(tokens, this); } toNode() { return new TypstTerminal(this); } toString() { switch (this.type) { case 4 /* TEXT */: return `"${this.value}"`; case 5 /* COMMENT */: return `//${this.value}`; default: return this.value; } } static NONE = new _TypstToken(0 /* NONE */, "#none"); static EMPTY = new _TypstToken(2 /* ELEMENT */, ""); static LEFT_BRACE = new _TypstToken(2 /* ELEMENT */, "{"); static RIGHT_BRACE = new _TypstToken(2 /* ELEMENT */, "}"); static LEFT_PAREN = new _TypstToken(2 /* ELEMENT */, "("); static RIGHT_PAREN = new _TypstToken(2 /* ELEMENT */, ")"); static LEFT_ANGLE = new _TypstToken(1 /* SYMBOL */, "chevron.l"); static RIGHT_ANGLE = new _TypstToken(1 /* SYMBOL */, "chevron.r"); static VERTICAL_BAR = new _TypstToken(2 /* ELEMENT */, "|"); static PLUS = new _TypstToken(2 /* ELEMENT */, "+"); static MINUS = new _TypstToken(2 /* ELEMENT */, "-"); static LR = new _TypstToken(1 /* SYMBOL */, "lr"); static LEFT_DELIMITERS = [ _TypstToken.LEFT_PAREN, new _TypstToken(2 /* ELEMENT */, "["), _TypstToken.LEFT_BRACE, _TypstToken.VERTICAL_BAR, _TypstToken.LEFT_ANGLE, new _TypstToken(1 /* SYMBOL */, "paren.l"), new _TypstToken(1 /* SYMBOL */, "brace.l") ]; static RIGHT_DELIMITERS = [ _TypstToken.RIGHT_PAREN, new _TypstToken(2 /* ELEMENT */, "]"), _TypstToken.RIGHT_BRACE, _TypstToken.VERTICAL_BAR, _TypstToken.RIGHT_ANGLE, new _TypstToken(1 /* SYMBOL */, "paren.r"), new _TypstToken(1 /* SYMBOL */, "brace.r") ]; }; var TypstWriterError = class extends Error { node; constructor(message, node) { super(message); this.name = "TypstWriterError"; this.node = node; } }; var SOFT_SPACE = new TypstToken(7 /* CONTROL */, " "); var TypstNode = class { type; head; // Some Typst functions accept additional options. e.g. mat() has option "delim", op() has option "limits" options; constructor(type, head) { this.type = type; this.head = head ? head : TypstToken.NONE; } setOptions(options) { this.options = options; } // Note that this is only shallow equality. eq(other) { return this.type === other.type && this.head.eq(other.head); } toString() { throw new Error(`Unimplemented toString() in base class TypstNode`); } }; var TypstTerminal = class extends TypstNode { constructor(head) { super("terminal", head); } isOverHigh() { return false; } isLeftSpaceful() { switch (this.head.type) { case 6 /* SPACE */: case 8 /* NEWLINE */: return false; case 4 /* TEXT */: return true; case 1 /* SYMBOL */: case 2 /* ELEMENT */: { if (["(", "!", ",", ")", "}", "]"].includes(this.head.value)) { return false; } return true; } default: return true; } } isRightSpaceful() { switch (this.head.type) { case 6 /* SPACE */: case 8 /* NEWLINE */: return false; case 4 /* TEXT */: return true; case 1 /* SYMBOL */: case 2 /* ELEMENT */: { return ["+", "=", ",", "\\/", "dot", "dot.op", "arrow", "arrow.r"].includes(this.head.value); } default: return false; } } toString() { return this.head.toString(); } serialize(env, options) { if (this.head.type === 2 /* ELEMENT */) { if (this.head.value === "," && env.insideFunctionDepth > 0) { return [SOFT_SPACE, new TypstToken(1 /* SYMBOL */, "comma")]; } } else if (this.head.type === 1 /* SYMBOL */) { let symbol_name = this.head.value; if (options.preferShorthands) { if (shorthandMap.has(symbol_name)) { symbol_name = shorthandMap.get(symbol_name); } } if (options.inftyToOo && symbol_name === "infinity") { symbol_name = "oo"; } return [new TypstToken(1 /* SYMBOL */, symbol_name)]; } else if (this.head.type === 6 /* SPACE */ || this.head.type === 8 /* NEWLINE */) { const queue = []; for (const c of this.head.value) { if (c === " ") { if (options.keepSpaces) { queue.push(new TypstToken(6 /* SPACE */, c)); } } else if (c === "\n") { queue.push(new TypstToken(1 /* SYMBOL */, c)); } else { throw new TypstWriterError(`Unexpected whitespace character: ${c}`, this); } } return queue; } return [this.head]; } }; var TypstTokenQueue = class { queue = []; pushSoftSpace() { if (this.queue.length === 0) { return; } else if (this.queue.at(-1).eq(SOFT_SPACE)) { return; } else if (["(", "{", "["].includes(this.queue.at(-1).value)) { return; } this.queue.push(SOFT_SPACE); } pushAll(tokens) { if (tokens.length == 0) { return; } else if (tokens[0].eq(SOFT_SPACE) && this.queue.length === 0) { this.queue.push(...tokens.slice(1)); } else { if ([")", "}", "]"].includes(tokens[0].value)) { while (this.queue.at(-1)?.eq(SOFT_SPACE)) { this.queue.pop(); } } this.queue.push(...tokens); } } getQueue() { const res = Array.from(this.queue); while (res.at(-1)?.eq(SOFT_SPACE)) { res.pop(); } return res; } }; var TypstGroup = class extends TypstNode { items; constructor(items) { super("group", TypstToken.NONE); this.items = items; } isOverHigh() { return this.items.some((n) => n.isOverHigh()); } isLeftSpaceful() { if (this.items.length === 0) { return false; } return this.items[0].isLeftSpaceful(); } isRightSpaceful() { if (this.items.length === 0) { return false; } return this.items.at(-1).isRightSpaceful(); } serialize(env, options) { if (this.items.length === 0) { return []; } const q = new TypstTokenQueue(); for (let i = 0; i < this.items.length; i++) { const n = this.items[i]; const tokens = n.serialize(env, options); if (n.isLeftSpaceful()) { q.pushSoftSpace(); } q.pushAll(tokens); if (n.isRightSpaceful()) { q.pushSoftSpace(); } } const queue = q.getQueue(); if (queue.length > 0 && (queue[0].eq(TypstToken.MINUS) || queue[0].eq(TypstToken.PLUS))) { while (queue.length > 1 && queue[1].eq(SOFT_SPACE)) { queue.splice(1, 1); } } return queue; } }; var TypstSupsub = class extends TypstNode { base; sup; sub; constructor(data) { super("supsub", TypstToken.NONE); this.base = data.base; this.sup = data.sup; this.sub = data.sub; } isOverHigh() { return this.base.isOverHigh(); } isLeftSpaceful() { return true; } isRightSpaceful() { return true; } serialize(env, options) { const queue = []; let { base, sup, sub } = this; queue.push(...base.serialize(env, options)); const has_prime = sup && sup.head.eq(new TypstToken(2 /* ELEMENT */, "'")); if (has_prime) { queue.push(new TypstToken(2 /* ELEMENT */, "'")); } if (sub) { queue.push(new TypstToken(2 /* ELEMENT */, "_")); queue.push(...sub.serialize(env, options)); } if (sup && !has_prime) { queue.push(new TypstToken(2 /* ELEMENT */, "^")); queue.push(...sup.serialize(env, options)); } return queue; } }; var TypstFuncCall = class extends TypstNode { args; constructor(head, args) { super("funcCall", head); this.args = args; } isOverHigh() { if (this.head.value === "frac") { return true; } return this.args.some((n) => n.isOverHigh()); } isLeftSpaceful() { return true; } isRightSpaceful() { return !["op", "bold", "dot"].includes(this.head.value); } serialize(env, options) { const queue = []; const func_symbol = this.head; queue.push(func_symbol); env.insideFunctionDepth++; queue.push(TYPST_LEFT_PARENTHESIS); for (let i = 0; i < this.args.length; i++) { queue.push(...this.args[i].serialize(env, options)); if (i < this.args.length - 1) { queue.push(new TypstToken(2 /* ELEMENT */, ",")); queue.push(SOFT_SPACE); } } if (this.options) { for (const [key, value] of Object.entries(this.options)) { queue.push(new TypstToken(3 /* LITERAL */, `, ${key}: ${value.toString()}`)); } } queue.push(TYPST_RIGHT_PARENTHESIS); env.insideFunctionDepth--; return queue; } }; var TypstFraction = class extends TypstNode { args; constructor(args) { super("fraction", TypstToken.NONE); this.args = args; } isOverHigh() { return true; } isLeftSpaceful() { return true; } isRightSpaceful() { return true; } serialize(env, options) { const queue = []; const [numerator, denominator] = this.args; queue.push(...numerator.serialize(env, options)); queue.push(new TypstToken(2 /* ELEMENT */, "/")); queue.push(...denominator.serialize(env, options)); return queue; } }; var TYPST_LEFT_PARENTHESIS = new TypstToken(2 /* ELEMENT */, "("); var TYPST_RIGHT_PARENTHESIS = new TypstToken(2 /* ELEMENT */, ")"); var TypstLeftright = class extends TypstNode { body; left; right; // head is either null or 'lr' constructor(head, data) { super("leftright", head); this.body = data.body; this.left = data.left; this.right = data.right; } isOverHigh() { return this.body.isOverHigh(); } isLeftSpaceful() { return true; } isRightSpaceful() { return true; } serialize(env, options) { const queue = []; const LR = new TypstToken(1 /* SYMBOL */, "lr"); const { left, right } = this; if (this.head.eq(LR)) { queue.push(LR); queue.push(TYPST_LEFT_PARENTHESIS); } if (left) { queue.push(left); if (isalpha(left.value[0])) { queue.push(SOFT_SPACE); } } queue.push(...this.body.serialize(env, options)); if (right) { if (isalpha(right.value[0])) { queue.push(SOFT_SPACE); } queue.push(right); } if (this.head.eq(LR)) { queue.push(TYPST_RIGHT_PARENTHESIS); } return queue; } }; var TypstMatrixLike = class _TypstMatrixLike extends TypstNode { matrix; // head is 'mat', 'cases' or null constructor(head, data) { super("matrixLike", head); this.matrix = data; } isOverHigh() { return true; } isLeftSpaceful() { return true; } isRightSpaceful() { return false; } serialize(env, options) { const queue = []; let cell_sep; let row_sep; if (this.head.eq(_TypstMatrixLike.MAT)) { cell_sep = new TypstToken(2 /* ELEMENT */, ","); row_sep = new TypstToken(2 /* ELEMENT */, ";"); } else if (this.head.eq(_TypstMatrixLike.CASES)) { cell_sep = new TypstToken(2 /* ELEMENT */, "&"); row_sep = new TypstToken(2 /* ELEMENT */, ","); } else if (this.head.eq(TypstToken.NONE)) { cell_sep = new TypstToken(2 /* ELEMENT */, "&"); row_sep = new TypstToken(1 /* SYMBOL */, "\\"); } if (!this.head.eq(TypstToken.NONE)) { queue.push(this.head); env.insideFunctionDepth++; queue.push(TYPST_LEFT_PARENTHESIS); if (this.options) { for (const [key, value] of Object.entries(this.options)) { queue.push(new TypstToken(3 /* LITERAL */, `${key}: ${value.toString()}, `)); } } } this.matrix.forEach((row, i) => { row.forEach((cell, j) => { queue.push(...cell.serialize(env, options)); if (j < row.length - 1) { if (cell_sep.value === "&") { queue.push(SOFT_SPACE); } queue.push(cell_sep); queue.push(SOFT_SPACE); } else { if (i < this.matrix.length - 1) { if (row_sep.value === "\\") { queue.push(SOFT_SPACE); } queue.push(row_sep); queue.push(SOFT_SPACE); } } }); }); if (!this.head.eq(TypstToken.NONE)) { queue.push(TYPST_RIGHT_PARENTHESIS); env.insideFunctionDepth--; } return queue; } static MAT = new TypstToken(1 /* SYMBOL */, "mat"); static CASES = new TypstToken(1 /* SYMBOL */, "cases"); }; var TypstMarkupFunc = class extends TypstNode { /* In idealized situations, for `#heading([some text and math $x + y$ example])`, fragments would be [TypstMarkup{"some text and math "}, TypstNode{"x + y"