cm-tarnation
Version:
An alternative parser for CodeMirror 6
283 lines • 12.2 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { LanguageDescription, ParseContext } from "@codemirror/language";
import { parseMixed, Parser as CodeMirrorParser, Tree, TreeCursor } from "@lezer/common";
import { ChunkBuffer } from "./compiler/buffer";
import { Compiler } from "./compiler/compiler";
import { DISABLED_NESTED, LIMIT_TO_VIEWPORT, MARGIN_AFTER, MARGIN_BEFORE, NodeID, REUSE_LEFT, REUSE_RIGHT } from "./constants";
import { ParseRegion } from "./region";
import { EmbeddedParserProp, perfy } from "./util";
/**
* Factory for correctly instantiating {@link Parser} instances. To
* CodeMirror, this class is the `parser`, and a {@link Parser} is the
* running process of said parser.
*/
export class ParserFactory extends CodeMirrorParser {
language;
/**
* @param language - The {@link TarnationLanguage} that this factory
* passes to the {@link Parser} instances it constructs.
*/
constructor(language) {
super();
this.language = language;
this.wrapper = parseMixed(this.nest.bind(this));
}
createParse(input, fragments, ranges) {
const parser = new Parser(this.language, input, fragments, ranges);
return DISABLED_NESTED ? parser : this.wrapper(parser, input, fragments, ranges);
}
/**
* Special "nest" function provided to the `parseMixed` function.
* Determines which nodes indicate a nested parsing region, and if so,
* returns a `NestedParser` for said region.
*/
nest(cursor, input) {
// don't bother with empty nodes
if (cursor.from === cursor.to)
return null;
let name;
let overlay;
new TreeCursor();
// let's try the configured function first
if (!name && this.language.configure.nest) {
const result = this.language.configure.nest(cursor, input);
if (result)
({ name, overlay } = result);
}
// didn't work (or didn't exist), try the default
// get name from the per-node property, use entire node as range
if (!name)
name = cursor.type.prop(EmbeddedParserProp);
// nothing found
if (!name)
return null;
let langs;
if (!(this.language.nestLanguages instanceof Array)) {
const context = ParseContext.get();
langs = context ? context.state.facet(this.language.nestLanguages) : [];
}
else {
langs = this.language.nestLanguages;
}
const lang = LanguageDescription.matchLanguageName(langs, name);
// language doesn't exist
if (!lang)
return null;
// language already loaded
if (lang.support)
return { parser: lang.support.language.parser, overlay };
// language not loaded yet
return { parser: ParseContext.getSkippingParser(lang.load()), overlay };
}
}
/**
* The `Parser` is the main interface for tokenizing and parsing, and what
* CodeMirror directly interacts with.
*
* Additionally, the `Parser` handles the recovery of grammar state from
* the stale trees provided by CodeMirror, and then uses this data to
* restart tokenization with reused tokens.
*
* Note that the `Parser` is not persistent a objects It is discarded as
* soon as the parse is done. That means that its startup time is very significant.
*/
export class Parser {
/**
* @param language - The language containing the grammar to use.
* @param input - The input document to parse.
* @param fragments - The fragments to be used for determining reuse of
* previous parses.
* @param ranges - The ranges of the document to parse.
*/
constructor(language, input, fragments, ranges) {
this.measurePerformance = perfy();
this.language = language;
this.stoppedAt = null;
const context = ParseContext.get();
const viewport = context?.viewport ? context.viewport : undefined;
this.region = new ParseRegion(input, ranges, fragments, viewport);
// find cached data, if possible
if (REUSE_LEFT && fragments?.length) {
for (let idx = 0; idx < fragments.length; idx++) {
const f = fragments[idx];
// make sure fragment is within the region of the document we care about
if (f.from > this.region.from || f.to < this.region.from)
continue;
// try to find the buffer for this fragment's tree in the cache
const buffer = this.find(f.tree, this.region.from, f.to);
if (buffer) {
// try to find a suitable chunk from the buffer to restart tokenization from
const { chunk, index } = buffer.search(this.region.edit.from, -1);
if (chunk && index !== null) {
// split the buffer, reuse the left side,
// but keep the right side around for reuse as well
const { left, right } = buffer.split(index);
this.previousRight = right;
this.region.from = chunk.from;
this.buffer = left;
this.state = chunk.state.clone();
}
}
}
}
this.parsedPos = this.region.from;
// if we couldn't reuse state, we'll need to startup things with a default state
if (!this.buffer || !this.state) {
this.buffer = new ChunkBuffer();
this.state = this.language.grammar.startState();
}
this.compiler = new Compiler(this.language, this.buffer);
// if we reused left, we'll catch the compiler up to the current position
if (this.buffer.chunks.length)
this.compiler.advanceFully();
}
/** True if the parser is done. */
get done() {
return this.parsedPos >= this.region.to;
}
/**
* Notifies the parser to not progress past the given position.
*
* @param pos - The position to stop at.
*/
stopAt(pos) {
this.region.to = pos;
this.stoppedAt = pos;
}
/** Advances tokenization one step. */
advance() {
// if we're told to stop, we need to BAIL
if (this.stoppedAt && this.parsedPos >= this.stoppedAt) {
return this.finish();
}
this.nextChunk();
this.compiler.step();
if (this.done)
return this.finish();
return null;
}
finish() {
const start = this.region.original.from;
const length = this.region.original.length;
const tree = this.compiler.compile(start, length);
if (LIMIT_TO_VIEWPORT) {
const context = ParseContext.get();
// inform editor that we skipped everything past the viewport
if (context &&
!this.stoppedAt &&
this.parsedPos > context.viewport.to &&
this.parsedPos < this.region.original.to) {
context.skipUntilInView(this.parsedPos, this.region.original.to);
}
}
this.performance = this.measurePerformance();
this.language.performance = this.performance;
return tree;
}
/** Advances the parser to the next chunk. */
nextChunk() {
// this condition is a little misleading,
// as we're actually going to break out when any chunk is emitted.
// however, if we're at the "last chunk", this condition catches that
while (this.parsedPos < this.region.to) {
if (REUSE_RIGHT) {
// try to reuse ahead state
const reused = this.previousRight && this.tryToReuse(this.previousRight);
// can't reuse the buffer more than once (pointless)
if (reused)
this.previousRight = undefined;
}
const pos = this.parsedPos;
const startState = this.state.clone();
let matchTokens = null;
let length = 0;
const start = Math.max(pos - MARGIN_BEFORE, this.region.from);
const startCompensated = this.region.compensate(pos, start - pos);
const str = this.region.read(startCompensated, MARGIN_AFTER, this.region.to);
const match = this.language.grammar.match(this.state, str, pos - start, pos);
if (match) {
this.state = match.state;
matchTokens = match.compile();
length = match.length;
}
// if we didn't match, we'll advance with an error token to prevent getting stuck
else {
matchTokens = [[NodeID.ERROR_ADVANCE, pos, pos + 1]];
length = 1;
}
this.parsedPos = this.region.compensate(pos, length);
let addedChunk = false;
for (let idx = 0; idx < matchTokens.length; idx++) {
const t = matchTokens[idx];
if (!this.region.contiguous) {
const from = this.region.compensate(pos, t[1] - pos);
const end = this.region.compensate(pos, t[2] - pos);
t[1] = from;
t[2] = end;
}
if (this.buffer.add(startState, t))
addedChunk = true;
}
if (addedChunk)
return true;
}
return false;
}
/**
* Tries to reuse a buffer _ahead_ of the current position. Returns true
* if this was successful, otherwise false.
*
* @param right - The buffer to try and reuse.
*/
tryToReuse(right) {
// can't reuse if we don't know the safe regions
if (!this.region.edit)
return false;
// can only safely reuse if we're ahead of the edited region
if (this.parsedPos <= this.region.edit.to)
return false;
// check every chunk and see if we can reuse it
for (let idx = 0; idx < right.chunks.length; idx++) {
const chunk = right.chunks[idx];
if (chunk.isReusable(this.state, this.parsedPos, this.region.edit.offset)) {
right.slide(idx, this.region.edit.offset, true);
this.buffer.link(right, this.region.original.length);
this.buffer.ensureLast(this.parsedPos, this.state);
this.state = this.buffer.last.state.clone();
this.parsedPos = this.buffer.last.from;
return true;
}
}
return false;
}
/**
* Returns the first chunk buffer found within a tree, if any.
*
* @param tree - The tree to search through, recursively.
* @param from - The start of the search area.
* @param to - The end of the search area.
* @param offset - An offset added to the tree's positions, so that they
* may match some other source's positions.
*/
find(tree, from, to, offset = 0) {
const bundle = offset >= from && offset + tree.length >= to
? tree.prop(this.language.stateProp)
: undefined;
if (bundle)
return bundle;
// recursively check children
for (let i = tree.children.length - 1; i >= 0; i--) {
const child = tree.children[i];
const pos = offset + tree.positions[i];
if (!(child instanceof Tree && pos < to))
continue;
const found = this.find(child, from, to, pos);
if (found)
return found;
}
return null;
}
}
//# sourceMappingURL=parser.js.map