eslint-plugin-regexp
Version:
ESLint plugin for finding RegExp mistakes and RegExp style guide violations.
400 lines (399 loc) • 15.3 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
const refa_1 = require("refa");
const regexp_ast_analysis_1 = require("regexp-ast-analysis");
const utils_1 = require("../utils");
const lexicographically_smallest_1 = require("../utils/lexicographically-smallest");
const refa_2 = require("../utils/refa");
const cache = new Map();
function getAllowedChars(flags) {
(0, refa_2.assertValidFlags)(flags);
const cacheKey = (flags.ignoreCase ? "i" : "") +
(flags.unicode ? "u" : "") +
(flags.unicodeSets ? "v" : "");
let result = cache.get(cacheKey);
if (result === undefined) {
result = {
allowed: refa_1.JS.createCharSet([
{ kind: "word", negate: false },
{ min: utils_1.CP_SPACE, max: utils_1.CP_SPACE },
{ min: utils_1.CP_PLUS, max: utils_1.CP_PLUS },
{ min: utils_1.CP_MINUS, max: utils_1.CP_MINUS },
{ min: utils_1.CP_STAR, max: utils_1.CP_STAR },
{ min: utils_1.CP_SLASH, max: utils_1.CP_SLASH },
{ min: utils_1.CP_APOSTROPHE, max: utils_1.CP_APOSTROPHE },
{ min: utils_1.CP_QUESTION, max: utils_1.CP_QUESTION },
], flags),
required: regexp_ast_analysis_1.Chars.word(flags),
};
cache.set(cacheKey, result);
}
return result;
}
function containsOnlyLiterals(element) {
return !(0, regexp_ast_analysis_1.hasSomeDescendant)(element, (d) => {
return (d.type === "Backreference" ||
d.type === "CharacterSet" ||
(d.type === "Quantifier" && d.max === Infinity) ||
(d.type === "CharacterClass" && d.negate) ||
(d.type === "ExpressionCharacterClass" && d.negate));
}, (d) => d.type !== "Assertion");
}
const lssCache = new WeakMap();
function cachedApproximateLexicographicallySmallest(alternative, parser, flags) {
let cached = lssCache.get(alternative);
if (cached === undefined) {
cached = approximateLexicographicallySmallest(alternative, parser, flags);
lssCache.set(alternative, cached);
}
return cached;
}
const LONGEST_PREFIX_OPTIONS = {
includeAfter: true,
onlyInside: true,
looseGroups: true,
};
function approximateLexicographicallySmallest(alternative, parser, flags) {
const lss = getLexicographicallySmallestFromAlternative(alternative, parser, flags);
if (lss !== undefined)
return lss;
const prefix = (0, regexp_ast_analysis_1.getLongestPrefix)(alternative, "ltr", flags, LONGEST_PREFIX_OPTIONS);
return getLexicographicallySmallestFromCharSets(prefix);
}
function getLexicographicallySmallestFromAlternative(alternative, parser, flags) {
if (alternative.type === "StringAlternative" ||
hasOnlyCharacters(alternative, flags)) {
const smallest = [];
for (const e of alternative.elements) {
const cs = (0, regexp_ast_analysis_1.toUnicodeSet)(e, flags).chars;
if (cs.isEmpty)
return undefined;
smallest.push(cs.ranges[0].min);
}
return smallest;
}
if (isOnlyCharacterElements(alternative.elements)) {
return (0, lexicographically_smallest_1.getLexicographicallySmallestInConcatenation)(alternative.elements.map((e) => (0, regexp_ast_analysis_1.toUnicodeSet)(e, flags)));
}
try {
const result = parser.parseElement(alternative, {
assertions: "unknown",
backreferences: "disable",
maxBackreferenceWords: 4,
maxNodes: 1000,
});
const expression = (0, refa_1.transform)({
onConcatenation(concat) {
concat.elements = concat.elements.filter((e) => e.type !== "Unknown");
},
}, result.expression);
const nfa = refa_1.NFA.fromRegex(expression, { maxCharacter: result.maxCharacter }, {}, new refa_1.NFA.LimitedNodeFactory(1000));
return getLexicographicallySmallestFromNfa(nfa.initial, nfa.finals);
}
catch (_error) {
return undefined;
}
}
function isOnlyCharacterElements(nodes) {
return nodes.every((e) => e.type === "Character" ||
e.type === "CharacterClass" ||
e.type === "CharacterSet" ||
e.type === "ExpressionCharacterClass");
}
function hasOnlyCharacters(alternative, flags) {
return (isOnlyCharacterElements(alternative.elements) &&
alternative.elements.every((e) => !(0, regexp_ast_analysis_1.hasStrings)(e, flags)));
}
function getLexicographicallySmallestFromNfa(initial, finals) {
const smallest = [];
let currentStates = [initial];
const newStatesSet = new Set();
const MAX_LENGTH = 1000;
for (let i = 0; i < MAX_LENGTH; i++) {
if (currentStates.some((n) => finals.has(n))) {
return smallest;
}
let min = Infinity;
for (const state of currentStates) {
state.out.forEach((charSet) => {
if (!charSet.isEmpty) {
min = Math.min(min, charSet.ranges[0].min);
}
});
}
if (min === Infinity) {
return undefined;
}
smallest.push(min);
const newStates = [];
newStatesSet.clear();
for (const state of currentStates) {
state.out.forEach((charSet, to) => {
if (charSet.has(min) && !newStatesSet.has(to)) {
newStates.push(to);
newStatesSet.add(to);
}
});
}
currentStates = newStates;
}
return undefined;
}
function getLexicographicallySmallestFromCharSets(word) {
const result = [];
for (const set of word) {
if (set.isEmpty)
break;
result.push(set.ranges[0].min);
}
return result;
}
function compareByteOrder(a, b) {
if (a === b) {
return 0;
}
return a < b ? -1 : +1;
}
function compareCharSets(a, b) {
const aRanges = a.ranges;
const bRanges = b.ranges;
for (let i = 0; i < aRanges.length && i < bRanges.length; i++) {
const aR = aRanges[i];
const bR = bRanges[i];
if (aR.min !== bR.min)
return aR.min - bR.min;
if (aR.max !== bR.max) {
if (aR.max < bR.max) {
return i + 1 < aRanges.length ? +1 : -1;
}
else {
return i + 1 < bRanges.length ? -1 : +1;
}
}
}
return aRanges.length - bRanges.length;
}
function compareCharSetStrings(a, b) {
const l = Math.min(a.length, b.length);
for (let i = 0; i < l; i++) {
const diff = compareCharSets(a[i], b[i]);
if (diff !== 0) {
return diff;
}
}
return a.length - b.length;
}
function compareWords(a, b) {
const l = Math.min(a.length, b.length);
for (let i = 0; i < l; i++) {
const aI = a[i];
const bI = b[i];
if (aI !== bI)
return aI - bI;
}
return a.length - b.length;
}
function sortAlternatives(alternatives, parser, flags) {
alternatives.sort((a, b) => {
const lssDiff = compareWords(cachedApproximateLexicographicallySmallest(a, parser, flags), cachedApproximateLexicographicallySmallest(b, parser, flags));
if (lssDiff !== 0) {
return lssDiff;
}
const prefixDiff = compareCharSetStrings((0, regexp_ast_analysis_1.getLongestPrefix)(a, "ltr", flags, LONGEST_PREFIX_OPTIONS), (0, regexp_ast_analysis_1.getLongestPrefix)(b, "ltr", flags, LONGEST_PREFIX_OPTIONS));
if (prefixDiff !== 0) {
return prefixDiff;
}
if (flags.ignoreCase) {
return (compareByteOrder(a.raw.toUpperCase(), b.raw.toUpperCase()) ||
compareByteOrder(a.raw, b.raw));
}
return compareByteOrder(a.raw, b.raw);
});
}
function sortStringAlternatives(alternatives, parser, flags) {
alternatives.sort((a, b) => {
const lssDiff = compareWords(getLexicographicallySmallestFromAlternative(a, parser, flags), getLexicographicallySmallestFromAlternative(b, parser, flags));
return lssDiff;
});
}
function isIntegerString(str) {
return /^(?:0|[1-9]\d*)$/u.test(str);
}
function trySortNumberAlternatives(alternatives) {
const runs = getRuns(alternatives, (a) => isIntegerString(a.raw));
for (const { startIndex, elements } of runs) {
elements.sort((a, b) => {
return Number(a.raw) - Number(b.raw);
});
alternatives.splice(startIndex, elements.length, ...elements);
}
}
function getReorderingBounds(original, reorder) {
if (original.length !== reorder.length) {
return undefined;
}
const len = original.length;
let first = 0;
for (; first < len && original[first] === reorder[first]; first++)
;
if (first === len) {
return undefined;
}
let last = len - 1;
for (; last >= 0 && original[last] === reorder[last]; last--)
;
return [first, last];
}
function getRuns(iter, condFn) {
const runs = [];
let elements = [];
let index = 0;
for (const item of iter) {
if (condFn(item)) {
elements.push(item);
}
else {
if (elements.length > 0) {
runs.push({ startIndex: index - elements.length, elements });
elements = [];
}
}
index++;
}
if (elements.length > 0) {
runs.push({ startIndex: index - elements.length, elements });
elements = [];
}
return runs;
}
exports.default = (0, utils_1.createRule)("sort-alternatives", {
meta: {
docs: {
description: "sort alternatives if order doesn't matter",
category: "Best Practices",
recommended: false,
},
fixable: "code",
schema: [],
messages: {
sort: "The {{alternatives}} can be sorted without affecting the regex.",
},
type: "suggestion",
},
create(context) {
const sliceMinLength = 3;
function createVisitor(regexpContext) {
const { node, getRegexpLocation, fixReplaceNode, flags } = regexpContext;
const allowedChars = getAllowedChars(flags);
const possibleCharsCache = new Map();
const parser = (0, refa_2.getParser)(regexpContext);
function getPossibleChars(a) {
let chars = possibleCharsCache.get(a);
if (chars === undefined) {
chars = (0, regexp_ast_analysis_1.getConsumedChars)(a, flags).chars;
}
return chars;
}
function trySortRun(run) {
const alternatives = run.elements;
if ((0, regexp_ast_analysis_1.canReorder)(alternatives, flags)) {
sortAlternatives(alternatives, parser, flags);
trySortNumberAlternatives(alternatives);
}
else {
const consumedChars = regexp_ast_analysis_1.Chars.empty(flags).union(...alternatives.map(getPossibleChars));
if (!consumedChars.isDisjointWith(regexp_ast_analysis_1.Chars.digit(flags))) {
const runs = getRuns(alternatives, (a) => isIntegerString(a.raw));
for (const { startIndex: index, elements } of runs) {
if (elements.length > 1 &&
(0, regexp_ast_analysis_1.canReorder)(elements, flags)) {
trySortNumberAlternatives(elements);
alternatives.splice(index, elements.length, ...elements);
}
}
}
}
enforceSorted(run, "alternatives of this group");
}
function enforceSorted(run, alternatives) {
const sorted = run.elements;
const parent = sorted[0].parent;
const unsorted = parent.alternatives.slice(run.startIndex, run.startIndex + sorted.length);
const bounds = getReorderingBounds(unsorted, sorted);
if (!bounds) {
return;
}
const loc = getRegexpLocation({
start: unsorted[bounds[0]].start,
end: unsorted[bounds[1]].end,
});
context.report({
node,
loc,
messageId: "sort",
data: { alternatives },
fix: fixReplaceNode(parent, () => {
const prefix = parent.raw.slice(0, unsorted[0].start - parent.start);
const suffix = parent.raw.slice(unsorted[unsorted.length - 1].end - parent.start);
return (prefix + sorted.map((a) => a.raw).join("|") + suffix);
}),
});
}
function onParent(parent) {
if (parent.alternatives.length < 2) {
return;
}
const runs = getRuns(parent.alternatives, (a) => {
if (!containsOnlyLiterals(a)) {
return false;
}
const consumedChars = getPossibleChars(a);
if (consumedChars.isEmpty) {
return false;
}
if (!consumedChars.isSubsetOf(allowedChars.allowed)) {
return false;
}
if (consumedChars.isDisjointWith(allowedChars.required)) {
return false;
}
return true;
});
if (runs.length === 1 &&
runs[0].elements.length === parent.alternatives.length) {
trySortRun(runs[0]);
}
else {
for (const run of runs) {
if (run.elements.length >= sliceMinLength &&
run.elements.length >= 2) {
trySortRun(run);
}
}
}
}
function onClassStringDisjunction(parent) {
if (parent.alternatives.length < 2) {
return;
}
const alternatives = [...parent.alternatives];
sortStringAlternatives(alternatives, parser, flags);
trySortNumberAlternatives(alternatives);
const run = {
startIndex: 0,
elements: [...alternatives],
};
enforceSorted(run, "string alternatives");
}
return {
onGroupEnter: onParent,
onPatternEnter: onParent,
onCapturingGroupEnter: onParent,
onClassStringDisjunctionEnter: onClassStringDisjunction,
};
}
return (0, utils_1.defineRegexpVisitor)(context, {
createVisitor,
});
},
});
;