UNPKG

@reliverse/rematch

Version:

@reliverse/rematch is a high-performance minimal glob matcher, with micromatch-level power, zepto-level size, and reliverse-grade dx.

779 lines (778 loc) 24.5 kB
import { MAX_LENGTH, POSIX_REGEX_SOURCE, REGEX_NON_SPECIAL_CHARS, REGEX_SPECIAL_CHARS_BACKREF, REPLACEMENTS, globChars, extglobChars } from "./constants.js"; import * as utils from "./utils.js"; const expandRange = (args, options) => { if (typeof options.expandRange === "function") { return options.expandRange(...args, options); } args.sort(); const value = `[${args.join("-")}]`; try { new RegExp(value); } catch (ex) { return args.map((v) => utils.escapeRegex(v)).join(".."); } return value; }; const syntaxError = (type, char) => { return `Missing ${type}: "${char}" - use "\\\\${char}" to match literal characters`; }; const parse = (input, options) => { if (typeof input !== "string") { throw new TypeError("Expected a string"); } input = REPLACEMENTS[input] || input; const opts = { ...options }; const max = typeof opts.maxLength === "number" ? Math.min(MAX_LENGTH, opts.maxLength) : MAX_LENGTH; let len = input.length; if (len > max) { throw new SyntaxError( `Input length: ${len}, exceeds maximum allowed length: ${max}` ); } const bos = { type: "bos", value: "", output: opts.prepend || "" }; const tokens = [bos]; const capture = opts.capture ? "" : "?:"; const PLATFORM_CHARS = globChars(opts.windows); const EXTGLOB_CHARS = extglobChars(PLATFORM_CHARS); const { DOT_LITERAL, PLUS_LITERAL, SLASH_LITERAL, ONE_CHAR, DOTS_SLASH, NO_DOT, NO_DOT_SLASH, NO_DOTS_SLASH, QMARK, QMARK_NO_DOT, STAR, START_ANCHOR } = PLATFORM_CHARS; const globstar = (opts2) => { return `(${capture}(?:(?!${START_ANCHOR}${opts2.dot ? DOTS_SLASH : DOT_LITERAL}).)*?)`; }; const nodot = opts.dot ? "" : NO_DOT; const qmarkNoDot = opts.dot ? QMARK : QMARK_NO_DOT; let star = opts.bash === true ? globstar(opts) : STAR; if (opts.capture) { star = `(${star})`; } if (typeof opts.noext === "boolean") { opts.noextglob = opts.noext; } const state = { input, index: -1, start: 0, dot: opts.dot === true, consumed: "", output: "", prefix: "", backtrack: false, negated: false, brackets: 0, braces: 0, parens: 0, quotes: 0, globstar: false, tokens }; input = utils.removePrefix(input, state); len = input.length; const extglobs = []; const braces = []; const stack = []; let prev = bos; let value; const eos = () => state.index === len - 1; const peek = state.peek = (n = 1) => input[state.index + n]; const advance = state.advance = () => input[++state.index] || ""; const remaining = () => input.slice(state.index + 1); const consume = (value2 = "", num = 0) => { state.consumed += value2; state.index += num; }; const append = (token) => { state.output += token.output != null ? token.output : token.value; consume(token.value); }; const negate = () => { let count = 1; while (peek() === "!" && (peek(2) !== "(" || peek(3) === "?")) { advance(); state.start++; count++; } if (count % 2 === 0) { return false; } state.negated = true; state.start++; return true; }; const increment = (type) => { state[type]++; stack.push(type); }; const decrement = (type) => { state[type]--; stack.pop(); }; const push = (tok) => { if (prev.type === "globstar") { const isBrace = state.braces > 0 && (tok.type === "comma" || tok.type === "brace"); const isExtglob = tok.extglob === true || extglobs.length && (tok.type === "pipe" || tok.type === "paren"); if (tok.type !== "slash" && tok.type !== "paren" && !isBrace && !isExtglob) { state.output = state.output.slice(0, -prev.output.length); prev.type = "star"; prev.value = "*"; prev.output = star; state.output += prev.output; } } if (extglobs.length && tok.type !== "paren") { extglobs[extglobs.length - 1].inner += tok.value; } if (tok.value || tok.output) append(tok); if (prev && prev.type === "text" && tok.type === "text") { prev.output = (prev.output || prev.value) + tok.value; prev.value += tok.value; return; } tok.prev = prev; tokens.push(tok); prev = tok; }; const extglobOpen = (type, value2) => { const token = { ...EXTGLOB_CHARS[value2], conditions: 1, inner: "" }; token.prev = prev; token.parens = state.parens; token.output = state.output; const output = (opts.capture ? "(" : "") + token.open; increment("parens"); push({ type, value: value2, output: state.output ? "" : ONE_CHAR }); push({ type: "paren", extglob: true, value: advance(), output }); extglobs.push(token); }; const extglobClose = (token) => { let output = token.close + (opts.capture ? ")" : ""); let rest; if (token.type === "negate") { let extglobStar = star; if (token.inner && token.inner.length > 1 && token.inner.includes("/")) { extglobStar = globstar(opts); } if (extglobStar !== star || eos() || /^\)+$/.test(remaining())) { output = token.close = `)$))${extglobStar}`; } if (token.inner.includes("*") && (rest = remaining()) && /^\.[^\\/.]+$/.test(rest)) { const expression = parse(rest, { ...options, fastpaths: false }).output; output = token.close = `)${expression})${extglobStar})`; } if (token.prev.type === "bos") { state.negatedExtglob = true; } } push({ type: "paren", extglob: true, value, output }); decrement("parens"); }; if (opts.fastpaths !== false && !/(^[*!]|[/()[\]{}"])/.test(input)) { let backslashes = false; let output = input.replace( REGEX_SPECIAL_CHARS_BACKREF, (m, esc, chars, first, rest, index) => { if (first === "\\") { backslashes = true; return m; } if (first === "?") { if (esc) { return esc + first + (rest ? QMARK.repeat(rest.length) : ""); } if (index === 0) { return qmarkNoDot + (rest ? QMARK.repeat(rest.length) : ""); } return QMARK.repeat(chars.length); } if (first === ".") { return DOT_LITERAL.repeat(chars.length); } if (first === "*") { if (esc) { return esc + first + (rest ? star : ""); } return star; } return esc ? m : `\\${m}`; } ); if (backslashes) { if (opts.unescape) { output = output.replace(/\\/g, ""); } else { output = output.replace(/\\+/g, (m) => { return m.length % 2 === 0 ? "\\\\" : m ? "\\" : ""; }); } } if (output === input && opts.contains === true) { state.output = input; return state; } state.output = utils.wrapOutput(output, state, options); return state; } while (!eos()) { value = advance(); if (value === "\0") { continue; } if (value === "\\") { const next = peek(); if (next === "/" && opts.bash !== true) { continue; } if (next === "." || next === ";") { continue; } if (!next) { value += "\\"; push({ type: "text", value }); continue; } const match = /^\\+/.exec(remaining()); let slashes = 0; if (match && match[0].length > 2) { slashes = match[0].length; state.index += slashes; if (slashes % 2 !== 0) { value += "\\"; } } if (opts.unescape === true) { value = advance(); } else { value += advance(); } if (state.brackets === 0) { push({ type: "text", value }); continue; } } if (state.brackets > 0 && (value !== "]" || prev.value === "[" || prev.value === "[^")) { if (opts.posix !== false && value === ":") { const inner = prev.value.slice(1); if (inner.includes("[")) { prev.posix = true; if (inner.includes(":")) { const idx = prev.value.lastIndexOf("["); const pre = prev.value.slice(0, idx); const rest2 = prev.value.slice(idx + 2); const posix = POSIX_REGEX_SOURCE[rest2]; if (posix) { prev.value = pre + posix; state.backtrack = true; advance(); if (!bos.output && tokens.indexOf(prev) === 1) { bos.output = ONE_CHAR; } continue; } } } } if (value === "[" && peek() !== ":" || value === "-" && peek() === "]") { value = `\\${value}`; } if (value === "]" && (prev.value === "[" || prev.value === "[^")) { value = `\\${value}`; } if (opts.posix === true && value === "!" && prev.value === "[") { value = "^"; } prev.value += value; append({ value }); continue; } if (state.quotes === 1 && value !== '"') { value = utils.escapeRegex(value); prev.value += value; append({ value }); continue; } if (value === '"') { state.quotes = state.quotes === 1 ? 0 : 1; if (opts.keepQuotes === true) { push({ type: "text", value }); } continue; } if (value === "(") { increment("parens"); push({ type: "paren", value }); continue; } if (value === ")") { if (state.parens === 0 && opts.strictBrackets === true) { throw new SyntaxError(syntaxError("opening", "(")); } const extglob = extglobs[extglobs.length - 1]; if (extglob && state.parens === extglob.parens + 1) { extglobClose(extglobs.pop()); continue; } push({ type: "paren", value, output: state.parens ? ")" : "\\)" }); decrement("parens"); continue; } if (value === "[") { if (opts.nobracket === true || !remaining().includes("]")) { if (opts.nobracket !== true && opts.strictBrackets === true) { throw new SyntaxError(syntaxError("closing", "]")); } value = `\\${value}`; } else { increment("brackets"); } push({ type: "bracket", value }); continue; } if (value === "]") { if (opts.nobracket === true || prev && prev.type === "bracket" && prev.value.length === 1) { push({ type: "text", value, output: `\\${value}` }); continue; } if (state.brackets === 0) { if (opts.strictBrackets === true) { throw new SyntaxError(syntaxError("opening", "[")); } push({ type: "text", value, output: `\\${value}` }); continue; } decrement("brackets"); const prevValue = prev.value.slice(1); if (prev.posix !== true && prevValue.startsWith("^") && !prevValue.includes("/")) { value = `/${value}`; } prev.value += value; append({ value }); if (opts.literalBrackets === false || utils.hasRegexChars(prevValue)) { continue; } const escaped = utils.escapeRegex(prev.value); state.output = state.output.slice(0, -prev.value.length); if (opts.literalBrackets === true) { state.output += escaped; prev.value = escaped; continue; } prev.value = `(${capture}${escaped}|${prev.value})`; state.output += prev.value; continue; } if (value === "{" && opts.nobrace !== true) { increment("braces"); const open = { type: "brace", value, output: "(", outputIndex: state.output.length, tokensIndex: state.tokens.length }; braces.push(open); push(open); continue; } if (value === "}") { const brace = braces[braces.length - 1]; if (opts.nobrace === true || !brace) { push({ type: "text", value, output: value }); continue; } let output = ")"; if (brace.dots === true) { const arr = tokens.slice(); const range = []; for (let i = arr.length - 1; i >= 0; i--) { tokens.pop(); if (arr[i].type === "brace") { break; } if (arr[i].type !== "dots") { range.unshift(arr[i].value); } } output = expandRange(range, opts); state.backtrack = true; } if (brace.comma !== true && brace.dots !== true) { const out = state.output.slice(0, brace.outputIndex); const toks = state.tokens.slice(brace.tokensIndex); brace.value = brace.output = "\\{"; value = output = "\\}"; state.output = out; for (const t of toks) { state.output += t.output || t.value; } } push({ type: "brace", value, output }); decrement("braces"); braces.pop(); continue; } if (value === "|") { if (extglobs.length > 0) { extglobs[extglobs.length - 1].conditions++; } push({ type: "text", value }); continue; } if (value === ",") { let output = value; const brace = braces[braces.length - 1]; if (brace && stack[stack.length - 1] === "braces") { brace.comma = true; output = "|"; } push({ type: "comma", value, output }); continue; } if (value === "/") { if (prev.type === "dot" && state.index === state.start + 1) { state.start = state.index + 1; state.consumed = ""; state.output = ""; tokens.pop(); prev = bos; continue; } push({ type: "slash", value, output: SLASH_LITERAL }); continue; } if (value === ".") { if (state.braces > 0 && prev.type === "dot") { if (prev.value === ".") prev.output = DOT_LITERAL; const brace = braces[braces.length - 1]; prev.type = "dots"; prev.output += value; prev.value += value; brace.dots = true; continue; } if (state.braces + state.parens === 0 && prev.type !== "bos" && prev.type !== "slash") { push({ type: "text", value, output: DOT_LITERAL }); continue; } push({ type: "dot", value, output: DOT_LITERAL }); continue; } if (value === "?") { const isGroup = prev && prev.value === "("; if (!isGroup && opts.noextglob !== true && peek() === "(" && peek(2) !== "?") { extglobOpen("qmark", value); continue; } if (prev && prev.type === "paren") { const next = peek(); let output = value; if (prev.value === "(" && !/[!=<:]/.test(next) || next === "<" && !/<([!=]|\w+>)/.test(remaining())) { output = `\\${value}`; } push({ type: "text", value, output }); continue; } if (opts.dot !== true && (prev.type === "slash" || prev.type === "bos")) { push({ type: "qmark", value, output: QMARK_NO_DOT }); continue; } push({ type: "qmark", value, output: QMARK }); continue; } if (value === "!") { if (opts.noextglob !== true && peek() === "(") { if (peek(2) !== "?" || !/[!=<:]/.test(peek(3))) { extglobOpen("negate", value); continue; } } if (opts.nonegate !== true && state.index === 0) { negate(); continue; } } if (value === "+") { if (opts.noextglob !== true && peek() === "(" && peek(2) !== "?") { extglobOpen("plus", value); continue; } if (prev && prev.value === "(" || opts.regex === false) { push({ type: "plus", value, output: PLUS_LITERAL }); continue; } if (prev && (prev.type === "bracket" || prev.type === "paren" || prev.type === "brace") || state.parens > 0) { push({ type: "plus", value }); continue; } push({ type: "plus", value: PLUS_LITERAL }); continue; } if (value === "@") { if (opts.noextglob !== true && peek() === "(" && peek(2) !== "?") { push({ type: "at", extglob: true, value, output: "" }); continue; } push({ type: "text", value }); continue; } if (value !== "*") { if (value === "$" || value === "^") { value = `\\${value}`; } const match = REGEX_NON_SPECIAL_CHARS.exec(remaining()); if (match) { value += match[0]; state.index += match[0].length; } push({ type: "text", value }); continue; } if (prev && (prev.type === "globstar" || prev.star === true)) { prev.type = "star"; prev.star = true; prev.value += value; prev.output = star; state.backtrack = true; state.globstar = true; consume(value); continue; } let rest = remaining(); if (opts.noextglob !== true && /^\([^?]/.test(rest)) { extglobOpen("star", value); continue; } if (prev.type === "star") { if (opts.noglobstar === true) { consume(value); continue; } const prior = prev.prev; const before = prior.prev; const isStart = prior.type === "slash" || prior.type === "bos"; const afterStar = before && (before.type === "star" || before.type === "globstar"); if (opts.bash === true && (!isStart || rest[0] && rest[0] !== "/")) { push({ type: "star", value, output: "" }); continue; } const isBrace = state.braces > 0 && (prior.type === "comma" || prior.type === "brace"); const isExtglob = extglobs.length && (prior.type === "pipe" || prior.type === "paren"); if (!isStart && prior.type !== "paren" && !isBrace && !isExtglob) { push({ type: "star", value, output: "" }); continue; } while (rest.slice(0, 3) === "/**") { const after = input[state.index + 4]; if (after && after !== "/") { break; } rest = rest.slice(3); consume("/**", 3); } if (prior.type === "bos" && eos()) { prev.type = "globstar"; prev.value += value; prev.output = globstar(opts); state.output = prev.output; state.globstar = true; consume(value); continue; } if (prior.type === "slash" && prior.prev.type !== "bos" && !afterStar && eos()) { state.output = state.output.slice( 0, -(prior.output + prev.output).length ); prior.output = `(?:${prior.output}`; prev.type = "globstar"; prev.output = globstar(opts) + (opts.strictSlashes ? ")" : "|$)"); prev.value += value; state.globstar = true; state.output += prior.output + prev.output; consume(value); continue; } if (prior.type === "slash" && prior.prev.type !== "bos" && rest[0] === "/") { const end = rest[1] !== void 0 ? "|$" : ""; state.output = state.output.slice( 0, -(prior.output + prev.output).length ); prior.output = `(?:${prior.output}`; prev.type = "globstar"; prev.output = `${globstar(opts)}${SLASH_LITERAL}|${SLASH_LITERAL}${end})`; prev.value += value; state.output += prior.output + prev.output; state.globstar = true; consume(value + advance()); push({ type: "slash", value: "/", output: "" }); continue; } if (prior.type === "bos" && rest[0] === "/") { prev.type = "globstar"; prev.value += value; prev.output = `(?:^|${SLASH_LITERAL}|${globstar(opts)}${SLASH_LITERAL})`; state.output = prev.output; state.globstar = true; consume(value + advance()); push({ type: "slash", value: "/", output: "" }); continue; } state.output = state.output.slice(0, -prev.output.length); prev.type = "globstar"; prev.output = globstar(opts); prev.value += value; state.output += prev.output; state.globstar = true; consume(value); continue; } const token = { type: "star", value, output: star }; if (opts.bash === true) { token.output = ".*?"; if (prev.type === "bos" || prev.type === "slash") { token.output = nodot + token.output; } push(token); continue; } if (prev && (prev.type === "bracket" || prev.type === "paren") && opts.regex === true) { token.output = value; push(token); continue; } if (state.index === state.start || prev.type === "slash" || prev.type === "dot") { if (prev.type === "dot") { state.output += NO_DOT_SLASH; prev.output += NO_DOT_SLASH; } else if (opts.dot === true) { state.output += NO_DOTS_SLASH; prev.output += NO_DOTS_SLASH; } else { state.output += nodot; prev.output += nodot; } if (peek() !== "*") { state.output += ONE_CHAR; prev.output += ONE_CHAR; } } push(token); } while (state.brackets > 0) { if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "]")); state.output = utils.escapeLast(state.output, "[", state.output.length - 1); decrement("brackets"); } while (state.parens > 0) { if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", ")")); state.output = utils.escapeLast(state.output, "(", state.output.length - 1); decrement("parens"); } while (state.braces > 0) { if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "}")); state.output = utils.escapeLast(state.output, "{", state.output.length - 1); decrement("braces"); } if (opts.strictSlashes !== true && (prev.type === "star" || prev.type === "bracket")) { push({ type: "maybe_slash", value: "", output: `${SLASH_LITERAL}?` }); } if (state.backtrack === true) { state.output = ""; for (const token of state.tokens) { state.output += token.output != null ? token.output : token.value; if (token.suffix) { state.output += token.suffix; } } } return state; }; parse.fastpaths = (input, options) => { const opts = { ...options }; const max = typeof opts.maxLength === "number" ? Math.min(MAX_LENGTH, opts.maxLength) : MAX_LENGTH; const len = input.length; if (len > max) { throw new SyntaxError( `Input length: ${len}, exceeds maximum allowed length: ${max}` ); } input = REPLACEMENTS[input] || input; const { DOT_LITERAL, SLASH_LITERAL, ONE_CHAR, DOTS_SLASH, NO_DOT, NO_DOTS, NO_DOTS_SLASH, STAR, START_ANCHOR } = globChars(opts.windows); const nodot = opts.dot ? NO_DOTS : NO_DOT; const slashDot = opts.dot ? NO_DOTS_SLASH : NO_DOT; const capture = opts.capture ? "" : "?:"; const state = { negated: false, prefix: "" }; let star = opts.bash === true ? ".*?" : STAR; if (opts.capture) { star = `(${star})`; } const globstar = (opts2) => { if (opts2.noglobstar === true) return star; return `(${capture}(?:(?!${START_ANCHOR}${opts2.dot ? DOTS_SLASH : DOT_LITERAL}).)*?)`; }; const create = (str) => { switch (str) { case "*": return `${nodot}${ONE_CHAR}${star}`; case ".*": return `${DOT_LITERAL}${ONE_CHAR}${star}`; case "*.*": return `${nodot}${star}${DOT_LITERAL}${ONE_CHAR}${star}`; case "*/*": return `${nodot}${star}${SLASH_LITERAL}${ONE_CHAR}${slashDot}${star}`; case "**": return nodot + globstar(opts); case "**/*": return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${slashDot}${ONE_CHAR}${star}`; case "**/*.*": return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${slashDot}${star}${DOT_LITERAL}${ONE_CHAR}${star}`; case "**/.*": return `(?:${nodot}${globstar(opts)}${SLASH_LITERAL})?${DOT_LITERAL}${ONE_CHAR}${star}`; default: { const match = /^(.*?)\.(\w+)$/.exec(str); if (!match) return; const source2 = create(match[1]); if (!source2) return; return source2 + DOT_LITERAL + match[2]; } } }; const output = utils.removePrefix(input, state); let source = create(output); if (source && opts.strictSlashes !== true) { source += `${SLASH_LITERAL}?`; } return source; }; export default parse;