@reliverse/rematch
Version:
@reliverse/rematch is a high-performance minimal glob matcher, with micromatch-level power, zepto-level size, and reliverse-grade dx.
779 lines (778 loc) • 24.5 kB
JavaScript
import {
MAX_LENGTH,
POSIX_REGEX_SOURCE,
REGEX_NON_SPECIAL_CHARS,
REGEX_SPECIAL_CHARS_BACKREF,
REPLACEMENTS,
globChars,
extglobChars
} from "./constants.js";
import * as utils from "./utils.js";
const expandRange = (args, options) => {
if (typeof options.expandRange === "function") {
return options.expandRange(...args, options);
}
args.sort();
const value = `[${args.join("-")}]`;
try {
new RegExp(value);
} catch (ex) {
return args.map((v) => utils.escapeRegex(v)).join("..");
}
return value;
};
const syntaxError = (type, char) => {
return `Missing ${type}: "${char}" - use "\\\\${char}" to match literal characters`;
};
const parse = (input, options) => {
if (typeof input !== "string") {
throw new TypeError("Expected a string");
}
input = REPLACEMENTS[input] || input;
const opts = { ...options };
const max = typeof opts.maxLength === "number" ? Math.min(MAX_LENGTH, opts.maxLength) : MAX_LENGTH;
let len = input.length;
if (len > max) {
throw new SyntaxError(
`Input length: ${len}, exceeds maximum allowed length: ${max}`
);
}
const bos = { type: "bos", value: "", output: opts.prepend || "" };
const tokens = [bos];
const capture = opts.capture ? "" : "?:";
const PLATFORM_CHARS = globChars(opts.windows);
const EXTGLOB_CHARS = extglobChars(PLATFORM_CHARS);
const {
DOT_LITERAL,
PLUS_LITERAL,
SLASH_LITERAL,
ONE_CHAR,
DOTS_SLASH,
NO_DOT,
NO_DOT_SLASH,
NO_DOTS_SLASH,
QMARK,
QMARK_NO_DOT,
STAR,
START_ANCHOR
} = PLATFORM_CHARS;
const globstar = (opts2) => {
return `(${capture}(?:(?!${START_ANCHOR}${opts2.dot ? DOTS_SLASH : DOT_LITERAL}).)*?)`;
};
const nodot = opts.dot ? "" : NO_DOT;
const qmarkNoDot = opts.dot ? QMARK : QMARK_NO_DOT;
let star = opts.bash === true ? globstar(opts) : STAR;
if (opts.capture) {
star = `(${star})`;
}
if (typeof opts.noext === "boolean") {
opts.noextglob = opts.noext;
}
const state = {
input,
index: -1,
start: 0,
dot: opts.dot === true,
consumed: "",
output: "",
prefix: "",
backtrack: false,
negated: false,
brackets: 0,
braces: 0,
parens: 0,
quotes: 0,
globstar: false,
tokens
};
input = utils.removePrefix(input, state);
len = input.length;
const extglobs = [];
const braces = [];
const stack = [];
let prev = bos;
let value;
const eos = () => state.index === len - 1;
const peek = state.peek = (n = 1) => input[state.index + n];
const advance = state.advance = () => input[++state.index] || "";
const remaining = () => input.slice(state.index + 1);
const consume = (value2 = "", num = 0) => {
state.consumed += value2;
state.index += num;
};
const append = (token) => {
state.output += token.output != null ? token.output : token.value;
consume(token.value);
};
const negate = () => {
let count = 1;
while (peek() === "!" && (peek(2) !== "(" || peek(3) === "?")) {
advance();
state.start++;
count++;
}
if (count % 2 === 0) {
return false;
}
state.negated = true;
state.start++;
return true;
};
const increment = (type) => {
state[type]++;
stack.push(type);
};
const decrement = (type) => {
state[type]--;
stack.pop();
};
const push = (tok) => {
if (prev.type === "globstar") {
const isBrace = state.braces > 0 && (tok.type === "comma" || tok.type === "brace");
const isExtglob = tok.extglob === true || extglobs.length && (tok.type === "pipe" || tok.type === "paren");
if (tok.type !== "slash" && tok.type !== "paren" && !isBrace && !isExtglob) {
state.output = state.output.slice(0, -prev.output.length);
prev.type = "star";
prev.value = "*";
prev.output = star;
state.output += prev.output;
}
}
if (extglobs.length && tok.type !== "paren") {
extglobs[extglobs.length - 1].inner += tok.value;
}
if (tok.value || tok.output) append(tok);
if (prev && prev.type === "text" && tok.type === "text") {
prev.output = (prev.output || prev.value) + tok.value;
prev.value += tok.value;
return;
}
tok.prev = prev;
tokens.push(tok);
prev = tok;
};
const extglobOpen = (type, value2) => {
const token = { ...EXTGLOB_CHARS[value2], conditions: 1, inner: "" };
token.prev = prev;
token.parens = state.parens;
token.output = state.output;
const output = (opts.capture ? "(" : "") + token.open;
increment("parens");
push({ type, value: value2, output: state.output ? "" : ONE_CHAR });
push({ type: "paren", extglob: true, value: advance(), output });
extglobs.push(token);
};
const extglobClose = (token) => {
let output = token.close + (opts.capture ? ")" : "");
let rest;
if (token.type === "negate") {
let extglobStar = star;
if (token.inner && token.inner.length > 1 && token.inner.includes("/")) {
extglobStar = globstar(opts);
}
if (extglobStar !== star || eos() || /^\)+$/.test(remaining())) {
output = token.close = `)$))${extglobStar}`;
}
if (token.inner.includes("*") && (rest = remaining()) && /^\.[^\\/.]+$/.test(rest)) {
const expression = parse(rest, { ...options, fastpaths: false }).output;
output = token.close = `)${expression})${extglobStar})`;
}
if (token.prev.type === "bos") {
state.negatedExtglob = true;
}
}
push({ type: "paren", extglob: true, value, output });
decrement("parens");
};
if (opts.fastpaths !== false && !/(^[*!]|[/()[\]{}"])/.test(input)) {
let backslashes = false;
let output = input.replace(
REGEX_SPECIAL_CHARS_BACKREF,
(m, esc, chars, first, rest, index) => {
if (first === "\\") {
backslashes = true;
return m;
}
if (first === "?") {
if (esc) {
return esc + first + (rest ? QMARK.repeat(rest.length) : "");
}
if (index === 0) {
return qmarkNoDot + (rest ? QMARK.repeat(rest.length) : "");
}
return QMARK.repeat(chars.length);
}
if (first === ".") {
return DOT_LITERAL.repeat(chars.length);
}
if (first === "*") {
if (esc) {
return esc + first + (rest ? star : "");
}
return star;
}
return esc ? m : `\\${m}`;
}
);
if (backslashes) {
if (opts.unescape) {
output = output.replace(/\\/g, "");
} else {
output = output.replace(/\\+/g, (m) => {
return m.length % 2 === 0 ? "\\\\" : m ? "\\" : "";
});
}
}
if (output === input && opts.contains === true) {
state.output = input;
return state;
}
state.output = utils.wrapOutput(output, state, options);
return state;
}
while (!eos()) {
value = advance();
if (value === "\0") {
continue;
}
if (value === "\\") {
const next = peek();
if (next === "/" && opts.bash !== true) {
continue;
}
if (next === "." || next === ";") {
continue;
}
if (!next) {
value += "\\";
push({ type: "text", value });
continue;
}
const match = /^\\+/.exec(remaining());
let slashes = 0;
if (match && match[0].length > 2) {
slashes = match[0].length;
state.index += slashes;
if (slashes % 2 !== 0) {
value += "\\";
}
}
if (opts.unescape === true) {
value = advance();
} else {
value += advance();
}
if (state.brackets === 0) {
push({ type: "text", value });
continue;
}
}
if (state.brackets > 0 && (value !== "]" || prev.value === "[" || prev.value === "[^")) {
if (opts.posix !== false && value === ":") {
const inner = prev.value.slice(1);
if (inner.includes("[")) {
prev.posix = true;
if (inner.includes(":")) {
const idx = prev.value.lastIndexOf("[");
const pre = prev.value.slice(0, idx);
const rest2 = prev.value.slice(idx + 2);
const posix = POSIX_REGEX_SOURCE[rest2];
if (posix) {
prev.value = pre + posix;
state.backtrack = true;
advance();
if (!bos.output && tokens.indexOf(prev) === 1) {
bos.output = ONE_CHAR;
}
continue;
}
}
}
}
if (value === "[" && peek() !== ":" || value === "-" && peek() === "]") {
value = `\\${value}`;
}
if (value === "]" && (prev.value === "[" || prev.value === "[^")) {
value = `\\${value}`;
}
if (opts.posix === true && value === "!" && prev.value === "[") {
value = "^";
}
prev.value += value;
append({ value });
continue;
}
if (state.quotes === 1 && value !== '"') {
value = utils.escapeRegex(value);
prev.value += value;
append({ value });
continue;
}
if (value === '"') {
state.quotes = state.quotes === 1 ? 0 : 1;
if (opts.keepQuotes === true) {
push({ type: "text", value });
}
continue;
}
if (value === "(") {
increment("parens");
push({ type: "paren", value });
continue;
}
if (value === ")") {
if (state.parens === 0 && opts.strictBrackets === true) {
throw new SyntaxError(syntaxError("opening", "("));
}
const extglob = extglobs[extglobs.length - 1];
if (extglob && state.parens === extglob.parens + 1) {
extglobClose(extglobs.pop());
continue;
}
push({ type: "paren", value, output: state.parens ? ")" : "\\)" });
decrement("parens");
continue;
}
if (value === "[") {
if (opts.nobracket === true || !remaining().includes("]")) {
if (opts.nobracket !== true && opts.strictBrackets === true) {
throw new SyntaxError(syntaxError("closing", "]"));
}
value = `\\${value}`;
} else {
increment("brackets");
}
push({ type: "bracket", value });
continue;
}
if (value === "]") {
if (opts.nobracket === true || prev && prev.type === "bracket" && prev.value.length === 1) {
push({ type: "text", value, output: `\\${value}` });
continue;
}
if (state.brackets === 0) {
if (opts.strictBrackets === true) {
throw new SyntaxError(syntaxError("opening", "["));
}
push({ type: "text", value, output: `\\${value}` });
continue;
}
decrement("brackets");
const prevValue = prev.value.slice(1);
if (prev.posix !== true && prevValue.startsWith("^") && !prevValue.includes("/")) {
value = `/${value}`;
}
prev.value += value;
append({ value });
if (opts.literalBrackets === false || utils.hasRegexChars(prevValue)) {
continue;
}
const escaped = utils.escapeRegex(prev.value);
state.output = state.output.slice(0, -prev.value.length);
if (opts.literalBrackets === true) {
state.output += escaped;
prev.value = escaped;
continue;
}
prev.value = `(${capture}${escaped}|${prev.value})`;
state.output += prev.value;
continue;
}
if (value === "{" && opts.nobrace !== true) {
increment("braces");
const open = {
type: "brace",
value,
output: "(",
outputIndex: state.output.length,
tokensIndex: state.tokens.length
};
braces.push(open);
push(open);
continue;
}
if (value === "}") {
const brace = braces[braces.length - 1];
if (opts.nobrace === true || !brace) {
push({ type: "text", value, output: value });
continue;
}
let output = ")";
if (brace.dots === true) {
const arr = tokens.slice();
const range = [];
for (let i = arr.length - 1; i >= 0; i--) {
tokens.pop();
if (arr[i].type === "brace") {
break;
}
if (arr[i].type !== "dots") {
range.unshift(arr[i].value);
}
}
output = expandRange(range, opts);
state.backtrack = true;
}
if (brace.comma !== true && brace.dots !== true) {
const out = state.output.slice(0, brace.outputIndex);
const toks = state.tokens.slice(brace.tokensIndex);
brace.value = brace.output = "\\{";
value = output = "\\}";
state.output = out;
for (const t of toks) {
state.output += t.output || t.value;
}
}
push({ type: "brace", value, output });
decrement("braces");
braces.pop();
continue;
}
if (value === "|") {
if (extglobs.length > 0) {
extglobs[extglobs.length - 1].conditions++;
}
push({ type: "text", value });
continue;
}
if (value === ",") {
let output = value;
const brace = braces[braces.length - 1];
if (brace && stack[stack.length - 1] === "braces") {
brace.comma = true;
output = "|";
}
push({ type: "comma", value, output });
continue;
}
if (value === "/") {
if (prev.type === "dot" && state.index === state.start + 1) {
state.start = state.index + 1;
state.consumed = "";
state.output = "";
tokens.pop();
prev = bos;
continue;
}
push({ type: "slash", value, output: SLASH_LITERAL });
continue;
}
if (value === ".") {
if (state.braces > 0 && prev.type === "dot") {
if (prev.value === ".") prev.output = DOT_LITERAL;
const brace = braces[braces.length - 1];
prev.type = "dots";
prev.output += value;
prev.value += value;
brace.dots = true;
continue;
}
if (state.braces + state.parens === 0 && prev.type !== "bos" && prev.type !== "slash") {
push({ type: "text", value, output: DOT_LITERAL });
continue;
}
push({ type: "dot", value, output: DOT_LITERAL });
continue;
}
if (value === "?") {
const isGroup = prev && prev.value === "(";
if (!isGroup && opts.noextglob !== true && peek() === "(" && peek(2) !== "?") {
extglobOpen("qmark", value);
continue;
}
if (prev && prev.type === "paren") {
const next = peek();
let output = value;
if (prev.value === "(" && !/[!=<:]/.test(next) || next === "<" && !/<([!=]|\w+>)/.test(remaining())) {
output = `\\${value}`;
}
push({ type: "text", value, output });
continue;
}
if (opts.dot !== true && (prev.type === "slash" || prev.type === "bos")) {
push({ type: "qmark", value, output: QMARK_NO_DOT });
continue;
}
push({ type: "qmark", value, output: QMARK });
continue;
}
if (value === "!") {
if (opts.noextglob !== true && peek() === "(") {
if (peek(2) !== "?" || !/[!=<:]/.test(peek(3))) {
extglobOpen("negate", value);
continue;
}
}
if (opts.nonegate !== true && state.index === 0) {
negate();
continue;
}
}
if (value === "+") {
if (opts.noextglob !== true && peek() === "(" && peek(2) !== "?") {
extglobOpen("plus", value);
continue;
}
if (prev && prev.value === "(" || opts.regex === false) {
push({ type: "plus", value, output: PLUS_LITERAL });
continue;
}
if (prev && (prev.type === "bracket" || prev.type === "paren" || prev.type === "brace") || state.parens > 0) {
push({ type: "plus", value });
continue;
}
push({ type: "plus", value: PLUS_LITERAL });
continue;
}
if (value === "@") {
if (opts.noextglob !== true && peek() === "(" && peek(2) !== "?") {
push({ type: "at", extglob: true, value, output: "" });
continue;
}
push({ type: "text", value });
continue;
}
if (value !== "*") {
if (value === "$" || value === "^") {
value = `\\${value}`;
}
const match = REGEX_NON_SPECIAL_CHARS.exec(remaining());
if (match) {
value += match[0];
state.index += match[0].length;
}
push({ type: "text", value });
continue;
}
if (prev && (prev.type === "globstar" || prev.star === true)) {
prev.type = "star";
prev.star = true;
prev.value += value;
prev.output = star;
state.backtrack = true;
state.globstar = true;
consume(value);
continue;
}
let rest = remaining();
if (opts.noextglob !== true && /^\([^?]/.test(rest)) {
extglobOpen("star", value);
continue;
}
if (prev.type === "star") {
if (opts.noglobstar === true) {
consume(value);
continue;
}
const prior = prev.prev;
const before = prior.prev;
const isStart = prior.type === "slash" || prior.type === "bos";
const afterStar = before && (before.type === "star" || before.type === "globstar");
if (opts.bash === true && (!isStart || rest[0] && rest[0] !== "/")) {
push({ type: "star", value, output: "" });
continue;
}
const isBrace = state.braces > 0 && (prior.type === "comma" || prior.type === "brace");
const isExtglob = extglobs.length && (prior.type === "pipe" || prior.type === "paren");
if (!isStart && prior.type !== "paren" && !isBrace && !isExtglob) {
push({ type: "star", value, output: "" });
continue;
}
while (rest.slice(0, 3) === "/**") {
const after = input[state.index + 4];
if (after && after !== "/") {
break;
}
rest = rest.slice(3);
consume("/**", 3);
}
if (prior.type === "bos" && eos()) {
prev.type = "globstar";
prev.value += value;
prev.output = globstar(opts);
state.output = prev.output;
state.globstar = true;
consume(value);
continue;
}
if (prior.type === "slash" && prior.prev.type !== "bos" && !afterStar && eos()) {
state.output = state.output.slice(
0,
-(prior.output + prev.output).length
);
prior.output = `(?:${prior.output}`;
prev.type = "globstar";
prev.output = globstar(opts) + (opts.strictSlashes ? ")" : "|$)");
prev.value += value;
state.globstar = true;
state.output += prior.output + prev.output;
consume(value);
continue;
}
if (prior.type === "slash" && prior.prev.type !== "bos" && rest[0] === "/") {
const end = rest[1] !== void 0 ? "|$" : "";
state.output = state.output.slice(
0,
-(prior.output + prev.output).length
);
prior.output = `(?:${prior.output}`;
prev.type = "globstar";
prev.output = `${globstar(opts)}${SLASH_LITERAL}|${SLASH_LITERAL}${end})`;
prev.value += value;
state.output += prior.output + prev.output;
state.globstar = true;
consume(value + advance());
push({ type: "slash", value: "/", output: "" });
continue;
}
if (prior.type === "bos" && rest[0] === "/") {
prev.type = "globstar";
prev.value += value;
prev.output = `(?:^|${SLASH_LITERAL}|${globstar(opts)}${SLASH_LITERAL})`;
state.output = prev.output;
state.globstar = true;
consume(value + advance());
push({ type: "slash", value: "/", output: "" });
continue;
}
state.output = state.output.slice(0, -prev.output.length);
prev.type = "globstar";
prev.output = globstar(opts);
prev.value += value;
state.output += prev.output;
state.globstar = true;
consume(value);
continue;
}
const token = { type: "star", value, output: star };
if (opts.bash === true) {
token.output = ".*?";
if (prev.type === "bos" || prev.type === "slash") {
token.output = nodot + token.output;
}
push(token);
continue;
}
if (prev && (prev.type === "bracket" || prev.type === "paren") && opts.regex === true) {
token.output = value;
push(token);
continue;
}
if (state.index === state.start || prev.type === "slash" || prev.type === "dot") {
if (prev.type === "dot") {
state.output += NO_DOT_SLASH;
prev.output += NO_DOT_SLASH;
} else if (opts.dot === true) {
state.output += NO_DOTS_SLASH;
prev.output += NO_DOTS_SLASH;
} else {
state.output += nodot;
prev.output += nodot;
}
if (peek() !== "*") {
state.output += ONE_CHAR;
prev.output += ONE_CHAR;
}
}
push(token);
}
while (state.brackets > 0) {
if (opts.strictBrackets === true)
throw new SyntaxError(syntaxError("closing", "]"));
state.output = utils.escapeLast(state.output, "[", state.output.length - 1);
decrement("brackets");
}
while (state.parens > 0) {
if (opts.strictBrackets === true)
throw new SyntaxError(syntaxError("closing", ")"));
state.output = utils.escapeLast(state.output, "(", state.output.length - 1);
decrement("parens");
}
while (state.braces > 0) {
if (opts.strictBrackets === true)
throw new SyntaxError(syntaxError("closing", "}"));
state.output = utils.escapeLast(state.output, "{", state.output.length - 1);
decrement("braces");
}
if (opts.strictSlashes !== true && (prev.type === "star" || prev.type === "bracket")) {
push({ type: "maybe_slash", value: "", output: `${SLASH_LITERAL}?` });
}
if (state.backtrack === true) {
state.output = "";
for (const token of state.tokens) {
state.output += token.output != null ? token.output : token.value;
if (token.suffix) {
state.output += token.suffix;
}
}
}
return state;
};
parse.fastpaths = (input, options) => {
const opts = { ...options };
const max = typeof opts.maxLength === "number" ? Math.min(MAX_LENGTH, opts.maxLength) : MAX_LENGTH;
const len = input.length;
if (len > max) {
throw new SyntaxError(
`Input length: ${len}, exceeds maximum allowed length: ${max}`
);
}
input = REPLACEMENTS[input] || input;
const {
DOT_LITERAL,
SLASH_LITERAL,
ONE_CHAR,
DOTS_SLASH,
NO_DOT,
NO_DOTS,
NO_DOTS_SLASH,
STAR,
START_ANCHOR
} = globChars(opts.windows);
const nodot = opts.dot ? NO_DOTS : NO_DOT;
const slashDot = opts.dot ? NO_DOTS_SLASH : NO_DOT;
const capture = opts.capture ? "" : "?:";
const state = { negated: false, prefix: "" };
let star = opts.bash === true ? ".*?" : STAR;
if (opts.capture) {
star = `(${star})`;
}
const globstar = (opts2) => {
if (opts2.noglobstar === true) return star;
return `(${capture}(?:(?!${START_ANCHOR}${opts2.dot ? DOTS_SLASH : DOT_LITERAL}).)*?)`;
};
const create = (str) => {
switch (str) {
case "*":
return `${nodot}${ONE_CHAR}${star}`;
case ".*":
return `${DOT_LITERAL}${ONE_CHAR}${star}`;
case "*.*":
return `${nodot}${star}${DOT_LITERAL}${ONE_CHAR}${star}`;
case "*/*":
return `${nodot}${star}${SLASH_LITERAL}${ONE_CHAR}${slashDot}${star}`;
case "**":
return nodot + globstar(opts);
case "**/*":
return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${slashDot}${ONE_CHAR}${star}`;
case "**/*.*":
return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${slashDot}${star}${DOT_LITERAL}${ONE_CHAR}${star}`;
case "**/.*":
return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${DOT_LITERAL}${ONE_CHAR}${star}`;
default: {
const match = /^(.*?)\.(\w+)$/.exec(str);
if (!match) return;
const source2 = create(match[1]);
if (!source2) return;
return source2 + DOT_LITERAL + match[2];
}
}
};
const output = utils.removePrefix(input, state);
let source = create(output);
if (source && opts.strictSlashes !== true) {
source += `${SLASH_LITERAL}?`;
}
return source;
};
export default parse;