UNPKG

hyperformula

Version:

HyperFormula is a JavaScript engine for efficient processing of spreadsheet-like data and formulas

385 lines 14.8 kB
/** * @license * Copyright (c) 2025 Handsoncode. All rights reserved. */ import { tokenMatcher } from 'chevrotain'; import { ErrorType } from "../Cell.mjs"; import { AstNodeType, buildParsingErrorAst, CellAddress, collectDependencies } from ".//index.mjs"; import { cellAddressFromString, columnAddressFromString, rowAddressFromString } from "./addressRepresentationConverters.mjs"; import { imageWithWhitespace, ParsingErrorType, RangeSheetReferenceType } from "./Ast.mjs"; import { binaryOpTokenMap } from "./binaryOpTokenMap.mjs"; import { Cache } from "./Cache.mjs"; import { FormulaLexer, FormulaParser } from "./FormulaParser.mjs"; import { buildLexerConfig, CellReference, ColumnRange, ProcedureName, RowRange } from "./LexerConfig.mjs"; import { formatNumber } from "./Unparser.mjs"; import { ColumnAddress } from "./ColumnAddress.mjs"; import { RowAddress } from "./RowAddress.mjs"; /** * Parses formula using caching if feasible. */ export class ParserWithCaching { constructor(config, functionRegistry, resolveSheetReference) { this.config = config; this.functionRegistry = functionRegistry; this.resolveSheetReference = resolveSheetReference; this.statsCacheUsed = 0; this.lexerConfig = buildLexerConfig(config); this.lexer = new FormulaLexer(this.lexerConfig); this.formulaParser = new FormulaParser(this.lexerConfig, this.resolveSheetReference); this.cache = new Cache(this.functionRegistry); } /** * Parses a formula. * * @param text - formula to parse * @param formulaAddress - address with regard to which formula should be parsed. Impacts computed addresses in R0C0 format. */ parse(text, formulaAddress) { this.formulaAddress = formulaAddress; const lexerResult = this.tokenizeFormula(text); if (lexerResult.errors.length > 0) { const errors = lexerResult.errors.map(e => ({ type: ParsingErrorType.LexingError, message: e.message })); return { ast: buildParsingErrorAst(), errors, hasVolatileFunction: false, hasStructuralChangeFunction: false, dependencies: [] }; } const hash = this.computeHashFromTokens(lexerResult.tokens, formulaAddress); let cacheResult = this.cache.get(hash); if (cacheResult !== undefined) { ++this.statsCacheUsed; } else { const processedTokens = this.bindWhitespacesToTokens(lexerResult.tokens); const parsingResult = this.formulaParser.parseFromTokens(processedTokens, formulaAddress); if (parsingResult.errors.length > 0) { return Object.assign(Object.assign({}, parsingResult), { hasVolatileFunction: false, hasStructuralChangeFunction: false, dependencies: [] }); } else { cacheResult = this.cache.set(hash, parsingResult.ast); } } const { ast, hasVolatileFunction, hasStructuralChangeFunction } = cacheResult; const astWithNoReversedRanges = this.convertReversedRangesToRegularRanges(ast); const dependencies = collectDependencies(astWithNoReversedRanges, this.functionRegistry); return { ast: astWithNoReversedRanges, errors: [], hasVolatileFunction, hasStructuralChangeFunction, dependencies }; } convertReversedRangesToRegularRanges(ast) { switch (ast.type) { case AstNodeType.EMPTY: case AstNodeType.NUMBER: case AstNodeType.STRING: case AstNodeType.ERROR: case AstNodeType.ERROR_WITH_RAW_INPUT: case AstNodeType.CELL_REFERENCE: case AstNodeType.NAMED_EXPRESSION: return ast; case AstNodeType.CELL_RANGE: { const { start, end } = ast; const orderedEnds = this.orderCellRangeEnds(start, end); return Object.assign(Object.assign({}, ast), { start: orderedEnds.start, end: orderedEnds.end }); } case AstNodeType.COLUMN_RANGE: { const { start, end } = ast; const orderedEnds = this.orderColumnRangeEnds(start, end); return Object.assign(Object.assign({}, ast), { start: orderedEnds.start, end: orderedEnds.end }); } case AstNodeType.ROW_RANGE: { const { start, end } = ast; const orderedEnds = this.orderRowRangeEnds(start, end); return Object.assign(Object.assign({}, ast), { start: orderedEnds.start, end: orderedEnds.end }); } case AstNodeType.PERCENT_OP: case AstNodeType.PLUS_UNARY_OP: case AstNodeType.MINUS_UNARY_OP: { const valueFixed = this.convertReversedRangesToRegularRanges(ast.value); return Object.assign(Object.assign({}, ast), { value: valueFixed }); } case AstNodeType.CONCATENATE_OP: case AstNodeType.EQUALS_OP: case AstNodeType.NOT_EQUAL_OP: case AstNodeType.LESS_THAN_OP: case AstNodeType.GREATER_THAN_OP: case AstNodeType.LESS_THAN_OR_EQUAL_OP: case AstNodeType.GREATER_THAN_OR_EQUAL_OP: case AstNodeType.MINUS_OP: case AstNodeType.PLUS_OP: case AstNodeType.TIMES_OP: case AstNodeType.DIV_OP: case AstNodeType.POWER_OP: { const leftFixed = this.convertReversedRangesToRegularRanges(ast.left); const rightFixed = this.convertReversedRangesToRegularRanges(ast.right); return Object.assign(Object.assign({}, ast), { left: leftFixed, right: rightFixed }); } case AstNodeType.PARENTHESIS: { const exprFixed = this.convertReversedRangesToRegularRanges(ast.expression); return Object.assign(Object.assign({}, ast), { expression: exprFixed }); } case AstNodeType.FUNCTION_CALL: { const argsFixed = ast.args.map(arg => this.convertReversedRangesToRegularRanges(arg)); return Object.assign(Object.assign({}, ast), { args: argsFixed }); } case AstNodeType.ARRAY: { const argsFixed = ast.args.map(argsRow => argsRow.map(arg => this.convertReversedRangesToRegularRanges(arg))); return Object.assign(Object.assign({}, ast), { args: argsFixed }); } } } orderCellRangeEnds(endA, endB) { const ends = [endA, endB]; const [startCol, endCol] = ends.map(e => e.toColumnAddress()).sort(ColumnAddress.compareByAbsoluteAddress(this.formulaAddress)); const [startRow, endRow] = ends.map(e => e.toRowAddress()).sort(RowAddress.compareByAbsoluteAddress(this.formulaAddress)); const [startSheet, endSheet] = ends.map(e => e.sheet).sort(ParserWithCaching.compareSheetIds.bind(this)); return { start: CellAddress.fromColAndRow(startCol, startRow, startSheet), end: CellAddress.fromColAndRow(endCol, endRow, endSheet) }; } orderColumnRangeEnds(endA, endB) { const ends = [endA, endB]; const [startCol, endCol] = ends.sort(ColumnAddress.compareByAbsoluteAddress(this.formulaAddress)); const [startSheet, endSheet] = ends.map(e => e.sheet).sort(ParserWithCaching.compareSheetIds.bind(this)); return { start: new ColumnAddress(startCol.type, startCol.col, startSheet), end: new ColumnAddress(endCol.type, endCol.col, endSheet) }; } orderRowRangeEnds(endA, endB) { const ends = [endA, endB]; const [startRow, endRow] = ends.sort(RowAddress.compareByAbsoluteAddress(this.formulaAddress)); const [startSheet, endSheet] = ends.map(e => e.sheet).sort(ParserWithCaching.compareSheetIds.bind(this)); return { start: new RowAddress(startRow.type, startRow.row, startSheet), end: new RowAddress(endRow.type, endRow.row, endSheet) }; } static compareSheetIds(sheetA, sheetB) { sheetA = sheetA != null ? sheetA : Infinity; sheetB = sheetB != null ? sheetB : Infinity; return sheetA - sheetB; } fetchCachedResultForAst(ast) { const hash = this.computeHashFromAst(ast); return this.fetchCachedResult(hash); } fetchCachedResult(hash) { const cacheResult = this.cache.get(hash); if (cacheResult === undefined) { throw new Error('There is no AST with such key in the cache'); } else { const { ast, hasVolatileFunction, hasStructuralChangeFunction, relativeDependencies } = cacheResult; return { ast, errors: [], hasVolatileFunction, hasStructuralChangeFunction, dependencies: relativeDependencies }; } } computeHashFromTokens(tokens, baseAddress) { var _a; let hash = ''; let idx = 0; while (idx < tokens.length) { const token = tokens[idx]; if (tokenMatcher(token, CellReference)) { const cellAddress = cellAddressFromString(token.image, baseAddress, this.resolveSheetReference); if (cellAddress === undefined) { hash = hash.concat(token.image); } else { hash = hash.concat(cellAddress.hash(true)); } } else if (tokenMatcher(token, ProcedureName)) { const procedureName = token.image.toUpperCase().slice(0, -1); const canonicalProcedureName = (_a = this.lexerConfig.functionMapping[procedureName]) !== null && _a !== void 0 ? _a : procedureName; hash = hash.concat(canonicalProcedureName, '('); } else if (tokenMatcher(token, ColumnRange)) { const [start, end] = token.image.split(':'); const startAddress = columnAddressFromString(start, baseAddress, this.resolveSheetReference); const endAddress = columnAddressFromString(end, baseAddress, this.resolveSheetReference); if (startAddress === undefined || endAddress === undefined) { hash = hash.concat('!REF'); } else { hash = hash.concat(startAddress.hash(true), ':', endAddress.hash(true)); } } else if (tokenMatcher(token, RowRange)) { const [start, end] = token.image.split(':'); const startAddress = rowAddressFromString(start, baseAddress, this.resolveSheetReference); const endAddress = rowAddressFromString(end, baseAddress, this.resolveSheetReference); if (startAddress === undefined || endAddress === undefined) { hash = hash.concat('!REF'); } else { hash = hash.concat(startAddress.hash(true), ':', endAddress.hash(true)); } } else { hash = hash.concat(token.image); } idx++; } return hash; } rememberNewAst(ast) { const hash = this.computeHashFromAst(ast); return this.cache.maybeSetAndThenGet(hash, ast); } computeHashFromAst(ast) { return '=' + this.computeHashOfAstNode(ast); } computeHashOfAstNode(ast) { switch (ast.type) { case AstNodeType.EMPTY: { return ast.leadingWhitespace || ''; } case AstNodeType.NUMBER: { return imageWithWhitespace(formatNumber(ast.value, this.config.decimalSeparator), ast.leadingWhitespace); } case AstNodeType.STRING: { return imageWithWhitespace('"' + ast.value + '"', ast.leadingWhitespace); } case AstNodeType.NAMED_EXPRESSION: { return imageWithWhitespace(ast.expressionName, ast.leadingWhitespace); } case AstNodeType.FUNCTION_CALL: { const args = ast.args.map(arg => this.computeHashOfAstNode(arg)).join(this.config.functionArgSeparator); const rightPart = ast.procedureName + '(' + args + imageWithWhitespace(')', ast.internalWhitespace); return imageWithWhitespace(rightPart, ast.leadingWhitespace); } case AstNodeType.CELL_REFERENCE: { return imageWithWhitespace(ast.reference.hash(true), ast.leadingWhitespace); } case AstNodeType.COLUMN_RANGE: case AstNodeType.ROW_RANGE: case AstNodeType.CELL_RANGE: { const start = ast.start.hash(ast.sheetReferenceType !== RangeSheetReferenceType.RELATIVE); const end = ast.end.hash(ast.sheetReferenceType === RangeSheetReferenceType.BOTH_ABSOLUTE); return imageWithWhitespace(start + ':' + end, ast.leadingWhitespace); } case AstNodeType.MINUS_UNARY_OP: { return imageWithWhitespace('-' + this.computeHashOfAstNode(ast.value), ast.leadingWhitespace); } case AstNodeType.PLUS_UNARY_OP: { return imageWithWhitespace('+' + this.computeHashOfAstNode(ast.value), ast.leadingWhitespace); } case AstNodeType.PERCENT_OP: { return this.computeHashOfAstNode(ast.value) + imageWithWhitespace('%', ast.leadingWhitespace); } case AstNodeType.ERROR: { const image = this.config.translationPackage.getErrorTranslation(ast.error ? ast.error.type : ErrorType.ERROR); return imageWithWhitespace(image, ast.leadingWhitespace); } case AstNodeType.ERROR_WITH_RAW_INPUT: { return imageWithWhitespace(ast.rawInput, ast.leadingWhitespace); } case AstNodeType.ARRAY: { const args = ast.args.map(row => row.map(val => this.computeHashOfAstNode(val)).join(',')).join(';'); return imageWithWhitespace('{' + args + imageWithWhitespace('}', ast.internalWhitespace), ast.leadingWhitespace); } case AstNodeType.PARENTHESIS: { const expression = this.computeHashOfAstNode(ast.expression); const rightPart = '(' + expression + imageWithWhitespace(')', ast.internalWhitespace); return imageWithWhitespace(rightPart, ast.leadingWhitespace); } default: { return this.computeHashOfAstNode(ast.left) + imageWithWhitespace(binaryOpTokenMap[ast.type], ast.leadingWhitespace) + this.computeHashOfAstNode(ast.right); } } } bindWhitespacesToTokens(tokens) { const processedTokens = []; const first = tokens[0]; if (!tokenMatcher(first, this.lexerConfig.WhiteSpace)) { processedTokens.push(first); } for (let i = 1; i < tokens.length; ++i) { const current = tokens[i]; if (tokenMatcher(current, this.lexerConfig.WhiteSpace)) { continue; } const previous = tokens[i - 1]; if (tokenMatcher(previous, this.lexerConfig.WhiteSpace)) { current.leadingWhitespace = previous; } processedTokens.push(current); } return processedTokens; } tokenizeFormula(text) { return this.lexer.tokenizeFormula(text); } }