UNPKG

cspell-grammar

Version:
122 lines 4.12 kB
import assert from 'node:assert'; import { extractScope } from './grammarNormalizer.js'; import { applyCaptureToBeginOrMatch, applyCaptureToEnd } from './processors/procMatchingRule.js'; export function tokenizeLine(line, rule) { const text = line.text; const lineLen = line.text.length; const parsedText = []; let ctx = buildContext({ ...line, offset: 0, anchor: -1 }, rule); // Note: the begin rule has already been matched and applied. // Does it need to end or go deeper? while (ctx.line.offset <= lineLen) { // We are at an end let endMatch = ctx.rule.end?.(ctx.line); while (endMatch?.index === ctx.line.offset) { parsedText.push(...applyCaptureToEnd(ctx.rule, endMatch)); ctx = findParentWithEnd(ctx); ctx.line.offset = endMatch.index + endMatch.match.length; endMatch = ctx.rule.end?.(ctx.line); } if (ctx.line.offset >= lineLen) break; const { line, rule } = ctx; const offset = line.offset; const match = rule.findNext?.(line); const limit = endMatch?.index ?? lineLen; const emitTil = match ? Math.min(match.match.index, limit) : limit; if (offset < emitTil) { const scope = extractScope(rule); const start = offset; const end = emitTil; parsedText.push({ scope, text: text.slice(start, end), range: [start, end, line.lineNumber], }); ctx.line.offset = emitTil; } if (!match || (endMatch && endMatch.index <= match.match.index)) { continue; // We are at an end, process it at the beginning of the loop. } // Process the beginning of the match parsedText.push(...applyCaptureToBeginOrMatch(match)); line.anchor = match.match.index + match.match.match.length; line.offset = line.anchor; ctx = findNearestWithEnd(buildContext(line, match.rule)); } return toParseLineResult(line, ctx.rule, parsedText); } export function tokenizeText(text, grammar) { return [...tokenizeTextIterable(text, grammar)]; } export function* tokenizeTextIterable(text, grammar) { const lines = text.split(/(?<=\r\n|\n|\r(?!\n))/); const rule = grammar.begin(); let documentOffset = 0; let tr = tokenizeLine({ text: lines[0], lineNumber: 0, documentOffset }, rule); documentOffset += lines[0].length; yield toParsedLine(tr); for (let i = 1; i < lines.length; ++i) { const line = { text: lines[i], lineNumber: i, documentOffset }; documentOffset += line.text.length; tr = tr.parse(line); yield toParsedLine(tr); } } function toParsedLine(pr) { const { tokens: parsedText, line, offset } = pr; return { tokens: parsedText, line, offset }; } function toParseLineResult(line, rule, parsedText) { return { tokens: parsedText, line, offset: line.documentOffset, parse: (line) => tokenizeLine(line, rule), }; } function buildContext(line, rule) { const rules = calcRuleStack(rule); const numRules = rules.length; const rootNum = numRules - 1; const rootRule = rules[rootNum]; const rootContext = { line, rule: rootRule, }; let ctx = rootContext; for (let i = rootNum - 1; i >= 0; --i) { const rule = rules[i]; const line = ctx.line; ctx = { line, rule, parent: ctx, }; // Check while here. } return ctx; } function calcRuleStack(rule) { const rules = []; let r = rule; while (r) { rules.push(r); r = r.parent; } return rules; } function must(t, msg = 'Must be defined') { assert(t !== undefined && t !== null, msg); return t; } function findParentWithEnd(ctx) { return findNearestWithEnd(must(ctx.parent)); } function findNearestWithEnd(ctx) { while (!ctx.rule.end) { ctx = must(ctx.parent); } return ctx; } //# sourceMappingURL=tokenizeLine.js.map