UNPKG

tex2typst

Version:

JavaScript library for converting TeX code to Typst

444 lines (387 loc) 16.9 kB
import { array_find } from "./generic"; import { TypstFraction, TypstFuncCall, TypstGroup, TypstLeftright, TypstLeftRightData, TypstMarkupFunc, TypstMatrixLike, TypstNode, TypstSupsub, TypstTerminal } from "./typst-types"; import { TypstNamedParams } from "./typst-types"; import { TypstSupsubData } from "./typst-types"; import { TypstToken } from "./typst-types"; import { TypstTokenType } from "./typst-types"; import { tokenize_typst } from "./typst-tokenizer"; import { assert, isalpha } from "./utils"; // TODO: In Typst, y' ' is not the same as y''. // The parser should be able to parse the former correctly. function eat_primes(tokens: TypstToken[], start: number): number { let pos = start; while (pos < tokens.length && tokens[pos].eq(new TypstToken(TypstTokenType.ELEMENT, "'"))) { pos += 1; } return pos - start; } function _find_closing_match(tokens: TypstToken[], start: number, leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number { assert(tokens[start].isOneOf(leftBrackets)); let count = 1; let pos = start + 1; while (count > 0) { if (pos >= tokens.length) { throw new Error('Unmatched brackets or parentheses'); } if (tokens[pos].isOneOf(rightBrackets)) { count -= 1; }else if (tokens[pos].isOneOf(leftBrackets)) { count += 1; } pos += 1; } return pos - 1; } function find_closing_match(tokens: TypstToken[], start: number): number { return _find_closing_match( tokens, start, [LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET], [RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET] ); } function extract_named_params(arr: TypstNode[]): [TypstNode[], TypstNamedParams] { const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode(); const np: TypstNamedParams = {}; const to_delete: number[] = []; for(let i = 0; i < arr.length; i++) { if(arr[i].type !== 'group') { continue; } const g = arr[i] as TypstGroup; const pos_colon = array_find(g.items, COLON); if(pos_colon === -1 || pos_colon === 0) { continue; } to_delete.push(i); const param_name = g.items[pos_colon - 1]; if(param_name.eq(new TypstToken(TypstTokenType.SYMBOL, 'delim').toNode())) { if(g.items.length !== 3) { throw new TypstParserError('Invalid number of arguments for delim'); } np['delim'] = g.items[pos_colon + 1]; } else { throw new TypstParserError('Not implemented for other named parameters'); } } for(let i = to_delete.length - 1; i >= 0; i--) { arr.splice(to_delete[i], 1); } return [arr, np]; } function primes(num: number): TypstNode[] { const res: TypstNode[] = []; for (let i = 0; i < num; i++) { res.push(new TypstToken(TypstTokenType.ELEMENT, "'").toNode()); } return res; } const DIV = new TypstToken(TypstTokenType.ELEMENT, '/').toNode(); function next_non_whitespace(nodes: TypstNode[], start: number): TypstNode | null { let pos = start; while (pos < nodes.length && (nodes[pos].head.type === TypstTokenType.SPACE || nodes[pos].head.type === TypstTokenType.NEWLINE)) { pos++; } return pos === nodes.length ? null : nodes[pos]; } function trim_whitespace_around_operators(nodes: TypstNode[]): TypstNode[] { let after_operator = false; const res: TypstNode[] = []; for (let i = 0; i < nodes.length; i++) { const current = nodes[i]; if (current.head.type === TypstTokenType.SPACE || current.head.type === TypstTokenType.NEWLINE) { if(after_operator) { continue; } if(next_non_whitespace(nodes, i + 1)?.eq(DIV)) { continue; } } if (current.eq(DIV)) { after_operator = true; } else { after_operator = false; } res.push(current); } return res; } function process_operators(nodes: TypstNode[]): TypstNode { nodes = trim_whitespace_around_operators(nodes); const stack: TypstNode[] = []; const args: TypstNode[] = []; let pos = 0; while (pos < nodes.length) { const current_tree = nodes[pos]; if(current_tree.eq(DIV)) { stack.push(current_tree); } else { if(stack.length > 0 && stack[stack.length-1].eq(DIV)) { let denominator = current_tree; if(args.length === 0) { throw new TypstParserError("Unexpected '/' operator, no numerator before it"); } let numerator = args.pop()!; if(denominator.type === 'leftright') { denominator = (denominator as TypstLeftright).body; } if(numerator.type === 'leftright') { numerator = (numerator as TypstLeftright).body; } args.push(new TypstFraction([numerator, denominator])); stack.pop(); // drop the '/' operator } else { args.push(current_tree); } } pos++; } return args.length === 1? args[0]: new TypstGroup(args); } function parse_named_params(groups: TypstGroup[]): TypstNamedParams { const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode(); const np: TypstNamedParams = {}; for (const group of groups) { assert(group.items.length == 3); assert(group.items[1].eq(COLON)); np[group.items[0].toString()] = new TypstTerminal(new TypstToken(TypstTokenType.LITERAL, group.items[2].toString())); } return np; } export class TypstParserError extends Error { constructor(message: string) { super(message); this.name = 'TypstParserError'; } } type TypstParseResult = [TypstNode, number]; const SUB_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '_'); const SUP_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '^'); const LEFT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '('); const RIGHT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ')'); const LEFT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '['); const RIGHT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ']'); const LEFT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '{'); const RIGHT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '}'); const COMMA = new TypstToken(TypstTokenType.ELEMENT, ','); const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';'); const CONTROL_AND = new TypstToken(TypstTokenType.CONTROL, '&'); interface TexParseEnv { spaceSensitive: boolean; newlineSensitive: boolean; } export class TypstParser { space_sensitive: boolean; newline_sensitive: boolean; constructor(space_sensitive: boolean = true, newline_sensitive: boolean = true) { this.space_sensitive = space_sensitive; this.newline_sensitive = newline_sensitive; } parse(tokens: TypstToken[]): TypstNode { const [tree, _] = this.parseGroup(tokens, 0, tokens.length); return tree; } parseGroup(tokens: TypstToken[], start: number, end: number): TypstParseResult { return this.parseUntil(tokens.slice(start, end), 0, null); } parseNextExpr(tokens: TypstToken[], start: number): TypstParseResult { let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start); let sub: TypstNode | null = null; let sup: TypstNode | null = null; const num_base_prime = eat_primes(tokens, pos); if (num_base_prime > 0) { base = new TypstGroup([base].concat(primes(num_base_prime))); pos += num_base_prime; } if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) { [sub, pos] = this.parseSupOrSub(tokens, pos + 1); if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) { [sup, pos] = this.parseSupOrSub(tokens, pos + 1); } } else if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) { [sup, pos] = this.parseSupOrSub(tokens, pos + 1); if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) { [sub, pos] = this.parseSupOrSub(tokens, pos + 1); } } if (sub !== null || sup !== null) { const res: TypstSupsubData = { base, sup, sub }; return [new TypstSupsub(res), pos]; } else { return [base, pos]; } } // return pos: (position of stopToken) + 1 // pos will be -1 if stopToken is not found parseUntil(tokens: TypstToken[], start: number, stopToken: TypstToken | null, env: Partial<TexParseEnv> = {}): TypstParseResult { if (env.spaceSensitive === undefined) { env.spaceSensitive = this.space_sensitive; } if (env.newlineSensitive === undefined) { env.newlineSensitive = this.newline_sensitive; } const results: TypstNode[] = []; let pos = start; while (pos < tokens.length) { if (stopToken !== null && tokens[pos].eq(stopToken)) { break; } const [res, newPos] = this.parseNextExpr(tokens, pos); pos = newPos; if (res.head.type === TypstTokenType.SPACE || res.head.type === TypstTokenType.NEWLINE) { if (!env.spaceSensitive && res.head.value.replace(/ /g, '').length === 0) { continue; } if (!env.newlineSensitive && res.head.value === '\n') { continue; } } results.push(res); } if (pos >= tokens.length && stopToken !== null) { return [TypstToken.NONE.toNode(), -1]; } const node = process_operators(results); return [node, pos + 1]; } parseSupOrSub(tokens: TypstToken[], start: number): TypstParseResult { let node: TypstNode; let end: number; if(tokens[start].eq(LEFT_PARENTHESES)) { [node, end] = this.parseUntil(tokens, start + 1, RIGHT_PARENTHESES); if (end === -1) { throw new Error("Unmatched '('"); } } else { [node, end] = this.parseNextExprWithoutSupSub(tokens, start); } const num_prime = eat_primes(tokens, end); if (num_prime > 0) { node = new TypstGroup([node].concat(primes(num_prime))); end += num_prime; } return [node, end]; } parseNextExprWithoutSupSub(tokens: TypstToken[], start: number): TypstParseResult { const firstToken = tokens[start]; const node = firstToken.toNode(); if(firstToken.eq(LEFT_PARENTHESES)) { const [body, end] = this.parseUntil(tokens, start + 1, RIGHT_PARENTHESES); if (end === -1) { throw new Error("Unmatched '('"); } const res = new TypstLeftright(null, { body: body, left: LEFT_PARENTHESES, right: RIGHT_PARENTHESES } as TypstLeftRightData); return [res, end]; } if(firstToken.type === TypstTokenType.ELEMENT && !isalpha(firstToken.value[0])) { return [node, start + 1]; } if ([TypstTokenType.ELEMENT, TypstTokenType.SYMBOL].includes(firstToken.type)) { if (start + 1 < tokens.length && tokens[start + 1].eq(LEFT_PARENTHESES)) { if(firstToken.value === 'mat') { const [matrix, named_params, newPos] = this.parseMatrix(tokens, start + 1, SEMICOLON, COMMA); const mat = new TypstMatrixLike(firstToken, matrix); mat.setOptions(named_params); return [mat, newPos]; } if(firstToken.value === 'cases') { const [cases, named_params, newPos] = this.parseMatrix(tokens, start + 1, COMMA, CONTROL_AND); const casesNode = new TypstMatrixLike(firstToken, cases); casesNode.setOptions(named_params); return [casesNode, newPos]; } if (firstToken.value === 'lr') { return this.parseLrArguments(tokens, start + 1); } if (['#heading', '#text'].includes(firstToken.value)) { const [args, newPos] = this.parseArguments(tokens, start + 1); const named_params = parse_named_params(args as TypstGroup[]); assert(tokens[newPos].eq(LEFT_BRACKET)); const DOLLAR = new TypstToken(TypstTokenType.ELEMENT, '$'); const end = _find_closing_match(tokens, newPos + 1, [DOLLAR], [DOLLAR]); const [group, _] = this.parseGroup(tokens, newPos + 2, end); assert(tokens[end + 1].eq(RIGHT_BRACKET)); const markup_func = new TypstMarkupFunc(firstToken, [group]); markup_func.setOptions(named_params); return [markup_func, end + 2]; } const [args, newPos] = this.parseArguments(tokens, start + 1); const func_call = new TypstFuncCall(firstToken, args); return [func_call, newPos]; } } return [node, start + 1]; } // start: the position of the left parentheses parseArguments(tokens: TypstToken[], start: number): [TypstNode[], number] { const end = find_closing_match(tokens, start); return [this.parseArgumentsWithSeparator(tokens, start + 1, end, COMMA), end + 1]; } // start: the position of the left parentheses parseLrArguments(tokens: TypstToken[], start: number): [TypstNode, number] { const lr_token = new TypstToken(TypstTokenType.SYMBOL, 'lr'); const end = find_closing_match(tokens, start); let left: TypstToken | null = null; let right: TypstToken | null = null; let inner_start = start + 1; let inner_end = end; if (inner_end > inner_start && tokens[inner_start].isOneOf(TypstToken.LEFT_DELIMITERS)) { left = tokens[inner_start]; inner_start += 1; } if (inner_end - 1 > inner_start && tokens[inner_end - 1].isOneOf(TypstToken.RIGHT_DELIMITERS)) { right = tokens[inner_end - 1]; inner_end -= 1; } const [inner_args, _] = this.parseGroup(tokens, inner_start, inner_end); return [ new TypstLeftright(lr_token, { body: inner_args, left: left, right: right }), end + 1, ]; } // start: the position of the left parentheses parseMatrix(tokens: TypstToken[], start: number, rowSepToken: TypstToken, cellSepToken: TypstToken): [TypstNode[][], TypstNamedParams, number] { const end = find_closing_match(tokens, start); const matrix: TypstNode[][] = []; let named_params: TypstNamedParams = {}; let pos = start + 1; while (pos < end) { while(pos < end) { let next_stop = array_find(tokens, rowSepToken, pos); if (next_stop === -1 || next_stop > end) { next_stop = end; } let row = this.parseArgumentsWithSeparator(tokens, pos, next_stop, cellSepToken); let np: TypstNamedParams = {}; [row, np] = extract_named_params(row); matrix.push(row); Object.assign(named_params, np); pos = next_stop + 1; } } return [matrix, named_params, end + 1]; } // start: the position of the first token of arguments parseArgumentsWithSeparator(tokens: TypstToken[], start: number, end: number, sepToken: TypstToken): TypstNode[] { const args: TypstNode[] = []; let pos = start; while (pos < end) { let arg: TypstNode; let newPos: number; const env = { spaceSensitive: false, newlineSensitive: true }; [arg, newPos] = this.parseUntil(tokens.slice(0, end), pos, sepToken, env); if (newPos == -1) { [arg, newPos] = this.parseUntil(tokens.slice(0, end), pos, null, env); } args.push(arg); pos = newPos; } return args; } } export function parseTypst(typst: string): TypstNode { const parser = new TypstParser(); let tokens = tokenize_typst(typst); return parser.parse(tokens); }