@digitak/grubber
Version:
Parse code files and patch it without having to use an AST
155 lines (154 loc) • 5.93 kB
JavaScript
import Fragment from "./Fragment.js";
import languages from "./languages/index.js";
export default class Parser {
constructor(content, rules) {
this.content = content;
this.stopExpressionCache = new Map();
this._rules = [];
if (typeof rules === "string")
this.language = rules;
else
this._rules = rules;
}
get rules() {
if (this.language)
return new languages[this.language]().rules;
return this._rules;
}
find(...expressions) {
const result = [];
const rules = [
...expressions.map((expression) => ({ type: "match", expression })),
...this.rules,
];
this.parse(rules, (fragment) => result.push(fragment));
return result;
}
replace(...fromTos) {
let result = "";
let offset = 0;
const rules = [
...fromTos.map(({ from, to }) => ({
type: "match",
expression: from,
from: new RegExp(`^${typeof from === "string" ? from : from.source}$`),
to,
})),
...this.rules,
];
this.parse(rules, (fragment, rule) => {
result += this.content.slice(offset, fragment.start);
if (rule.from !== undefined && rule.to !== undefined) {
result += fragment.slice.replace(rule.from, rule.to);
}
else {
result += fragment.slice;
}
offset = fragment.end;
});
result += this.content.slice(offset);
return result;
}
findDependencies() {
if (!this.language)
throw "[findDependencies] No language specified";
const importExpression = languages[this.language].importExpression;
if (!importExpression)
throw `[findDependencies] No importExpression for language ${this.language}`;
return this.find(importExpression);
}
replaceDependencies(to) {
if (!this.language)
throw "[findDependencies] No language specified";
const importExpression = languages[this.language].importExpression;
if (!importExpression)
throw `[findDependencies] No importExpression for language ${this.language}`;
return this.replace({ from: importExpression, to });
}
parse(rules, onMatch) {
const nextMatch = this.getNextMatchExpression(rules);
let match;
while ((match = nextMatch.exec(this.content))) {
const matchingRule = this.getMatchingRule(rules, match);
if (matchingRule == null)
continue;
const [rule, ruleMatch] = matchingRule;
const fragment = this.resolveFragment(rule, match, ruleMatch.slice(1));
if (!fragment) {
// the fragment has no end (good chances it is a syntax error)
console.warn("No stop found for rule", rule);
break;
}
// console.log("fragment:", fragment.slice)
nextMatch.lastIndex = fragment.end;
if (rule.type === "match")
onMatch(fragment, rule);
}
}
getRuleExpression(rule) {
const expression = "expression" in rule ? rule.expression : rule.startAt;
return typeof expression === "string" ? expression : expression.source;
}
getRuleStopExpression(rule) {
let expression = this.stopExpressionCache.get(rule);
if (expression)
return expression;
if ("expression" in rule)
throw "A standalone expression has no stop delimiter";
expression = new RegExp(typeof rule.stopAt === "string" ? rule.stopAt : rule.stopAt.source, "gm");
this.stopExpressionCache.set(rule, expression);
return expression;
}
getNextMatchExpression(rules) {
return new RegExp(rules.map((rule) => `(?:${this.getRuleExpression(rule)})`).join("|"), "gm");
}
getMatchingRule(rules, match) {
const [input] = match;
let noMatch = true;
// console.log(`Get matching rule of: '${input}'`)
for (const rule of rules) {
const ruleExpression = this.getRuleExpression(rule);
const ruleMatch = input.match(ruleExpression);
if (ruleMatch &&
ruleMatch.index === 0 &&
ruleMatch[0].length === input.length) {
noMatch = false;
if ("expression" in rule) {
if ("onExpressionMatch" in rule &&
rule.onExpressionMatch &&
rule.onExpressionMatch(ruleMatch) === false) {
continue;
}
}
else {
if (rule.onStartMatch && rule.onStartMatch(ruleMatch) === false)
continue;
}
return [rule, ruleMatch];
}
}
if (noMatch) {
console.log("[getMatchingRule] No rules matched with match:", match);
throw "[getMatchingRule] No rules matched";
}
return null;
}
resolveFragment(rule, match, groups = []) {
const start = match.index;
const input = match[0];
const offset = start + input.length;
if ("expression" in rule)
return new Fragment(this.content, start, offset, groups);
else {
const nextStop = this.getRuleStopExpression(rule);
nextStop.lastIndex = offset - 1;
let stop;
while ((stop = nextStop.exec(this.content))) {
if (!rule.onStopMatch || rule.onStopMatch(stop) !== false) {
return new Fragment(this.content, start, stop.index + stop[0].length, groups);
}
}
return null;
}
}
}