tex2typst
Version:
JavaScript library for converting TeX code to Typst
444 lines (387 loc) • 16.9 kB
text/typescript
import { array_find } from "./generic";
import { TypstFraction, TypstFuncCall, TypstGroup, TypstLeftright, TypstLeftRightData, TypstMarkupFunc, TypstMatrixLike, TypstNode, TypstSupsub, TypstTerminal } from "./typst-types";
import { TypstNamedParams } from "./typst-types";
import { TypstSupsubData } from "./typst-types";
import { TypstToken } from "./typst-types";
import { TypstTokenType } from "./typst-types";
import { tokenize_typst } from "./typst-tokenizer";
import { assert, isalpha } from "./utils";
// TODO: In Typst, y' ' is not the same as y''.
// The parser should be able to parse the former correctly.
function eat_primes(tokens: TypstToken[], start: number): number {
let pos = start;
while (pos < tokens.length && tokens[pos].eq(new TypstToken(TypstTokenType.ELEMENT, "'"))) {
pos += 1;
}
return pos - start;
}
function _find_closing_match(tokens: TypstToken[], start: number,
leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number {
assert(tokens[start].isOneOf(leftBrackets));
let count = 1;
let pos = start + 1;
while (count > 0) {
if (pos >= tokens.length) {
throw new Error('Unmatched brackets or parentheses');
}
if (tokens[pos].isOneOf(rightBrackets)) {
count -= 1;
}else if (tokens[pos].isOneOf(leftBrackets)) {
count += 1;
}
pos += 1;
}
return pos - 1;
}
function find_closing_match(tokens: TypstToken[], start: number): number {
return _find_closing_match(
tokens,
start,
[LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET],
[RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET]
);
}
function extract_named_params(arr: TypstNode[]): [TypstNode[], TypstNamedParams] {
const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode();
const np: TypstNamedParams = {};
const to_delete: number[] = [];
for(let i = 0; i < arr.length; i++) {
if(arr[i].type !== 'group') {
continue;
}
const g = arr[i] as TypstGroup;
const pos_colon = array_find(g.items, COLON);
if(pos_colon === -1 || pos_colon === 0) {
continue;
}
to_delete.push(i);
const param_name = g.items[pos_colon - 1];
if(param_name.eq(new TypstToken(TypstTokenType.SYMBOL, 'delim').toNode())) {
if(g.items.length !== 3) {
throw new TypstParserError('Invalid number of arguments for delim');
}
np['delim'] = g.items[pos_colon + 1];
} else {
throw new TypstParserError('Not implemented for other named parameters');
}
}
for(let i = to_delete.length - 1; i >= 0; i--) {
arr.splice(to_delete[i], 1);
}
return [arr, np];
}
function primes(num: number): TypstNode[] {
const res: TypstNode[] = [];
for (let i = 0; i < num; i++) {
res.push(new TypstToken(TypstTokenType.ELEMENT, "'").toNode());
}
return res;
}
const DIV = new TypstToken(TypstTokenType.ELEMENT, '/').toNode();
function next_non_whitespace(nodes: TypstNode[], start: number): TypstNode | null {
let pos = start;
while (pos < nodes.length && (nodes[pos].head.type === TypstTokenType.SPACE || nodes[pos].head.type === TypstTokenType.NEWLINE)) {
pos++;
}
return pos === nodes.length ? null : nodes[pos];
}
function trim_whitespace_around_operators(nodes: TypstNode[]): TypstNode[] {
let after_operator = false;
const res: TypstNode[] = [];
for (let i = 0; i < nodes.length; i++) {
const current = nodes[i];
if (current.head.type === TypstTokenType.SPACE || current.head.type === TypstTokenType.NEWLINE) {
if(after_operator) {
continue;
}
if(next_non_whitespace(nodes, i + 1)?.eq(DIV)) {
continue;
}
}
if (current.eq(DIV)) {
after_operator = true;
} else {
after_operator = false;
}
res.push(current);
}
return res;
}
function process_operators(nodes: TypstNode[]): TypstNode {
nodes = trim_whitespace_around_operators(nodes);
const stack: TypstNode[] = [];
const args: TypstNode[] = [];
let pos = 0;
while (pos < nodes.length) {
const current_tree = nodes[pos];
if(current_tree.eq(DIV)) {
stack.push(current_tree);
} else {
if(stack.length > 0 && stack[stack.length-1].eq(DIV)) {
let denominator = current_tree;
if(args.length === 0) {
throw new TypstParserError("Unexpected '/' operator, no numerator before it");
}
let numerator = args.pop()!;
if(denominator.type === 'leftright') {
denominator = (denominator as TypstLeftright).body;
}
if(numerator.type === 'leftright') {
numerator = (numerator as TypstLeftright).body;
}
args.push(new TypstFraction([numerator, denominator]));
stack.pop(); // drop the '/' operator
} else {
args.push(current_tree);
}
}
pos++;
}
return args.length === 1? args[0]: new TypstGroup(args);
}
function parse_named_params(groups: TypstGroup[]): TypstNamedParams {
const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode();
const np: TypstNamedParams = {};
for (const group of groups) {
assert(group.items.length == 3);
assert(group.items[1].eq(COLON));
np[group.items[0].toString()] = new TypstTerminal(new TypstToken(TypstTokenType.LITERAL, group.items[2].toString()));
}
return np;
}
export class TypstParserError extends Error {
constructor(message: string) {
super(message);
this.name = 'TypstParserError';
}
}
type TypstParseResult = [TypstNode, number];
const SUB_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '_');
const SUP_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '^');
const LEFT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '(');
const RIGHT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ')');
const LEFT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '[');
const RIGHT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ']');
const LEFT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '{');
const RIGHT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '}');
const COMMA = new TypstToken(TypstTokenType.ELEMENT, ',');
const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';');
const CONTROL_AND = new TypstToken(TypstTokenType.CONTROL, '&');
interface TexParseEnv {
spaceSensitive: boolean;
newlineSensitive: boolean;
}
export class TypstParser {
space_sensitive: boolean;
newline_sensitive: boolean;
constructor(space_sensitive: boolean = true, newline_sensitive: boolean = true) {
this.space_sensitive = space_sensitive;
this.newline_sensitive = newline_sensitive;
}
parse(tokens: TypstToken[]): TypstNode {
const [tree, _] = this.parseGroup(tokens, 0, tokens.length);
return tree;
}
parseGroup(tokens: TypstToken[], start: number, end: number): TypstParseResult {
return this.parseUntil(tokens.slice(start, end), 0, null);
}
parseNextExpr(tokens: TypstToken[], start: number): TypstParseResult {
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
let sub: TypstNode | null = null;
let sup: TypstNode | null = null;
const num_base_prime = eat_primes(tokens, pos);
if (num_base_prime > 0) {
base = new TypstGroup([base].concat(primes(num_base_prime)));
pos += num_base_prime;
}
if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) {
[sub, pos] = this.parseSupOrSub(tokens, pos + 1);
if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) {
[sup, pos] = this.parseSupOrSub(tokens, pos + 1);
}
} else if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) {
[sup, pos] = this.parseSupOrSub(tokens, pos + 1);
if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) {
[sub, pos] = this.parseSupOrSub(tokens, pos + 1);
}
}
if (sub !== null || sup !== null) {
const res: TypstSupsubData = { base, sup, sub };
return [new TypstSupsub(res), pos];
} else {
return [base, pos];
}
}
// return pos: (position of stopToken) + 1
// pos will be -1 if stopToken is not found
parseUntil(tokens: TypstToken[], start: number, stopToken: TypstToken | null, env: Partial<TexParseEnv> = {}): TypstParseResult {
if (env.spaceSensitive === undefined) {
env.spaceSensitive = this.space_sensitive;
}
if (env.newlineSensitive === undefined) {
env.newlineSensitive = this.newline_sensitive;
}
const results: TypstNode[] = [];
let pos = start;
while (pos < tokens.length) {
if (stopToken !== null && tokens[pos].eq(stopToken)) {
break;
}
const [res, newPos] = this.parseNextExpr(tokens, pos);
pos = newPos;
if (res.head.type === TypstTokenType.SPACE || res.head.type === TypstTokenType.NEWLINE) {
if (!env.spaceSensitive && res.head.value.replace(/ /g, '').length === 0) {
continue;
}
if (!env.newlineSensitive && res.head.value === '\n') {
continue;
}
}
results.push(res);
}
if (pos >= tokens.length && stopToken !== null) {
return [TypstToken.NONE.toNode(), -1];
}
const node = process_operators(results);
return [node, pos + 1];
}
parseSupOrSub(tokens: TypstToken[], start: number): TypstParseResult {
let node: TypstNode;
let end: number;
if(tokens[start].eq(LEFT_PARENTHESES)) {
[node, end] = this.parseUntil(tokens, start + 1, RIGHT_PARENTHESES);
if (end === -1) {
throw new Error("Unmatched '('");
}
} else {
[node, end] = this.parseNextExprWithoutSupSub(tokens, start);
}
const num_prime = eat_primes(tokens, end);
if (num_prime > 0) {
node = new TypstGroup([node].concat(primes(num_prime)));
end += num_prime;
}
return [node, end];
}
parseNextExprWithoutSupSub(tokens: TypstToken[], start: number): TypstParseResult {
const firstToken = tokens[start];
const node = firstToken.toNode();
if(firstToken.eq(LEFT_PARENTHESES)) {
const [body, end] = this.parseUntil(tokens, start + 1, RIGHT_PARENTHESES);
if (end === -1) {
throw new Error("Unmatched '('");
}
const res = new TypstLeftright(null, { body: body, left: LEFT_PARENTHESES, right: RIGHT_PARENTHESES } as TypstLeftRightData);
return [res, end];
}
if(firstToken.type === TypstTokenType.ELEMENT && !isalpha(firstToken.value[0])) {
return [node, start + 1];
}
if ([TypstTokenType.ELEMENT, TypstTokenType.SYMBOL].includes(firstToken.type)) {
if (start + 1 < tokens.length && tokens[start + 1].eq(LEFT_PARENTHESES)) {
if(firstToken.value === 'mat') {
const [matrix, named_params, newPos] = this.parseMatrix(tokens, start + 1, SEMICOLON, COMMA);
const mat = new TypstMatrixLike(firstToken, matrix);
mat.setOptions(named_params);
return [mat, newPos];
}
if(firstToken.value === 'cases') {
const [cases, named_params, newPos] = this.parseMatrix(tokens, start + 1, COMMA, CONTROL_AND);
const casesNode = new TypstMatrixLike(firstToken, cases);
casesNode.setOptions(named_params);
return [casesNode, newPos];
}
if (firstToken.value === 'lr') {
return this.parseLrArguments(tokens, start + 1);
}
if (['#heading', '#text'].includes(firstToken.value)) {
const [args, newPos] = this.parseArguments(tokens, start + 1);
const named_params = parse_named_params(args as TypstGroup[]);
assert(tokens[newPos].eq(LEFT_BRACKET));
const DOLLAR = new TypstToken(TypstTokenType.ELEMENT, '$');
const end = _find_closing_match(tokens, newPos + 1, [DOLLAR], [DOLLAR]);
const [group, _] = this.parseGroup(tokens, newPos + 2, end);
assert(tokens[end + 1].eq(RIGHT_BRACKET));
const markup_func = new TypstMarkupFunc(firstToken, [group]);
markup_func.setOptions(named_params);
return [markup_func, end + 2];
}
const [args, newPos] = this.parseArguments(tokens, start + 1);
const func_call = new TypstFuncCall(firstToken, args);
return [func_call, newPos];
}
}
return [node, start + 1];
}
// start: the position of the left parentheses
parseArguments(tokens: TypstToken[], start: number): [TypstNode[], number] {
const end = find_closing_match(tokens, start);
return [this.parseArgumentsWithSeparator(tokens, start + 1, end, COMMA), end + 1];
}
// start: the position of the left parentheses
parseLrArguments(tokens: TypstToken[], start: number): [TypstNode, number] {
const lr_token = new TypstToken(TypstTokenType.SYMBOL, 'lr');
const end = find_closing_match(tokens, start);
let left: TypstToken | null = null;
let right: TypstToken | null = null;
let inner_start = start + 1;
let inner_end = end;
if (inner_end > inner_start && tokens[inner_start].isOneOf(TypstToken.LEFT_DELIMITERS)) {
left = tokens[inner_start];
inner_start += 1;
}
if (inner_end - 1 > inner_start && tokens[inner_end - 1].isOneOf(TypstToken.RIGHT_DELIMITERS)) {
right = tokens[inner_end - 1];
inner_end -= 1;
}
const [inner_args, _] = this.parseGroup(tokens, inner_start, inner_end);
return [
new TypstLeftright(lr_token, { body: inner_args, left: left, right: right }),
end + 1,
];
}
// start: the position of the left parentheses
parseMatrix(tokens: TypstToken[], start: number, rowSepToken: TypstToken, cellSepToken: TypstToken): [TypstNode[][], TypstNamedParams, number] {
const end = find_closing_match(tokens, start);
const matrix: TypstNode[][] = [];
let named_params: TypstNamedParams = {};
let pos = start + 1;
while (pos < end) {
while(pos < end) {
let next_stop = array_find(tokens, rowSepToken, pos);
if (next_stop === -1 || next_stop > end) {
next_stop = end;
}
let row = this.parseArgumentsWithSeparator(tokens, pos, next_stop, cellSepToken);
let np: TypstNamedParams = {};
[row, np] = extract_named_params(row);
matrix.push(row);
Object.assign(named_params, np);
pos = next_stop + 1;
}
}
return [matrix, named_params, end + 1];
}
// start: the position of the first token of arguments
parseArgumentsWithSeparator(tokens: TypstToken[], start: number, end: number, sepToken: TypstToken): TypstNode[] {
const args: TypstNode[] = [];
let pos = start;
while (pos < end) {
let arg: TypstNode;
let newPos: number;
const env = { spaceSensitive: false, newlineSensitive: true };
[arg, newPos] = this.parseUntil(tokens.slice(0, end), pos, sepToken, env);
if (newPos == -1) {
[arg, newPos] = this.parseUntil(tokens.slice(0, end), pos, null, env);
}
args.push(arg);
pos = newPos;
}
return args;
}
}
export function parseTypst(typst: string): TypstNode {
const parser = new TypstParser();
let tokens = tokenize_typst(typst);
return parser.parse(tokens);
}