UNPKG

kuvio

Version:

Create string patterns and derive things from them, such as regexes

1,067 lines (1,048 loc) 24.9 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { alnum: () => alnum, alpha: () => alpha, and: () => and, andThen: () => andThen, anyNumber: () => anyNumber, anything: () => anything, atLeast: () => atLeast, atLeastOne: () => atLeastOne, atMost: () => atMost, between: () => between, blank: () => blank, char: () => char, characterClass: () => characterClass, digit: () => digit, empty: () => empty, exactString: () => exactString, exactly: () => exactly, graph: () => graph, hexDigit: () => hexDigit, integerRange: () => integerRange, lower: () => lower, maybe: () => maybe, non: () => non, oneOf: () => oneOf, or: () => or, patterns: () => patterns_exports, pipe: () => pipe, print: () => print, punct: () => punct, regexFromPattern: () => regexFromPattern, sequence: () => sequence, space: () => space, subgroup: () => subgroup, times: () => times, upper: () => upper, word: () => word, xdigit: () => xdigit }); module.exports = __toCommonJS(src_exports); // src/base.ts var char = (c) => ({ tag: "atom", kind: "character", char: c }); var anything = { tag: "atom", kind: "anything" }; var convertRanges = (ranges) => ranges.map((range) => { if (typeof range === "string") { return { lower: range.charCodeAt(0), upper: range.charCodeAt(0) }; } const [c1, c2] = range; const lower2 = typeof c1 === "string" ? c1.charCodeAt(0) : c1; const upper2 = typeof c2 === "string" ? c2.charCodeAt(0) : c2; return { lower: lower2, upper: upper2 }; }); var characterClass = (exclude, ...ranges) => ({ tag: "atom", kind: "characterClass", exclude, ranges: convertRanges(ranges) }); var subgroup = (subpattern) => subpattern.tag === "atom" ? subpattern : { tag: "atom", kind: "subgroup", subpattern }; var anyNumber = (opts = { greedy: false }) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: opts.greedy, kind: "star" }); var atLeastOne = (opts = { greedy: false }) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: opts.greedy, kind: "plus" }); var maybe = (atom) => ({ tag: "quantifiedAtom", atom, greedy: false, kind: "question" }); var times = (count) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: true, kind: "exactly", count }); var exactly = times; var atLeast = (min) => (atom) => ({ tag: "quantifiedAtom", atom, kind: "minimum", min }); var between = (min, max) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: true, kind: "between", min, max }); var atMost = (max) => (atom) => ({ tag: "quantifiedAtom", atom, kind: "between", min: 0, max }); var or = (right) => (left) => ({ tag: "disjunction", left, right }); var getTerms = (termOrSeq) => { switch (termOrSeq.tag) { case "termSequence": return termOrSeq.terms; case "atom": return [termOrSeq]; case "quantifiedAtom": return [termOrSeq]; } }; var andThen = (term) => (alt) => ({ tag: "termSequence", terms: [...getTerms(alt), ...getTerms(term)] }); var exactString = (s) => subgroup({ tag: "termSequence", terms: s.split("").map(char) }); var sequence = (term, ...terms) => ({ tag: "termSequence", terms: [term, ...terms] }); var and = (first, ...addl) => (cc) => ({ tag: "atom", kind: "characterClass", exclude: cc.exclude, ranges: cc.ranges.concat( typeof first === "string" || first instanceof Array ? convertRanges([first, ...addl]) : first.ranges ) }); var non = (cc) => ({ ...cc, exclude: !cc.exclude }); var empty = { tag: "atom", kind: "character", char: "" }; // src/util/pipe.ts function pipe() { let ret = arguments[0]; for (let i = 1; i < arguments.length; i++) { ret = arguments[i](ret); } return ret; } // src/character-classes.ts var upper = characterClass(false, ["A", "Z"]); var lower = characterClass(false, ["a", "z"]); var alpha = pipe(upper, and(lower)); var digit = characterClass(false, ["0", "9"]); var xdigit = pipe(digit, and(["A", "F"], ["a", "f"])); var hexDigit = xdigit; var alnum = pipe(alpha, and(digit)); var word = pipe(alnum, and("_")); var punct = characterClass( false, ["!", "/"], [":", "@"], ["[", "_"], ["{", "~"] ); var blank = characterClass(false, " ", " "); var space = pipe(blank, and("\n", "\r", "\f", "\v")); var graph = characterClass(false, [33, 127]); var print = pipe(graph, and(" ")); // src/combinators.ts var oneOf = (pattern, ...patterns) => patterns.reduce((ored, next) => pipe(ored, or(next)), pattern); var integerRange_ = (min, max, omitInitialZeros = false) => { const curMinDigit = Number(min[0] ?? "0"); const restMin = min.slice(1); const curMaxDigit = Number(max[0] ?? "9"); const restMax = max.slice(1); const res = restMin.length === 0 ? curMinDigit === curMaxDigit ? char(min) : characterClass(false, [min, max]) : curMinDigit === curMaxDigit ? pipe( char(curMinDigit.toString(10)), andThen(subgroup(integerRange_(restMin, restMax))) ) : oneOf( curMinDigit === 0 && omitInitialZeros ? integerRange_(restMin, restMax.replace(/./g, "9"), true) : pipe( char(curMinDigit.toString(10)), andThen( subgroup(integerRange_(restMin, restMin.replace(/./g, "9"))) ) ), ...curMaxDigit - curMinDigit > 1 ? [ pipe( characterClass(false, [ (curMinDigit + 1).toString(10), (curMaxDigit - 1).toString(10) ]), andThen( sequence(empty, ...restMin.split("").map(() => digit)) ) ) ] : [], pipe( char(curMaxDigit.toString(10)), andThen( subgroup(integerRange_(restMin.replace(/./g, "0"), restMax)) ) ) ); return res; }; var integerRange = (min, max) => { if (min > max || Number.isNaN(min) || Number.isNaN(max) || !Number.isInteger(min) || !Number.isInteger(max) || min < 0 || max < 0) { return empty; } const maxStr = max.toString(10); const minStr = min.toString(10).padStart(maxStr.length, "0"); return integerRange_(minStr, maxStr, true); }; // src/regex.ts var repr = (n) => ( // < 32 -> control characters // 45 -> '-'.. seems like `/[--z]/` for example actually works, but looks // weird. // 47 -> '/' doesn't need to be escaped in JS, but it's helpful for copying // into regex debuggers since it must be escaped in some languages // 92 -> '\' just to avoid any issues with escaping // 93 -> ']' which needs to be escaped // 94 -> '^' which might get parsed as class exclusion marker, so escape just in case // 127 -> del // >127 -> outside normal ascii range. escape 'em n < 32 || n === 45 || n === 47 || n === 92 || n === 93 || n === 94 || n >= 127 ? n > 255 ? `\\u${n.toString(16).padStart(4, "0")}` : `\\x${n.toString(16).padStart(2, "0")}` : String.fromCharCode(n) ); var charEscapes = /* @__PURE__ */ new Map([ ["[", "\\["], ["]", "\\]"], [".", "\\."], ["(", "\\("], [")", "\\)"], ["+", "\\+"] ]); var regexStringFromAtom = (atom) => { switch (atom.kind) { case "anything": return "."; case "character": return charEscapes.get(atom.char) ?? atom.char; case "characterClass": { const { exclude, ranges } = atom; return ranges.length === 1 && ranges[0].lower === 48 && ranges[0].upper === 57 ? `\\d` : `[${exclude ? "^" : ""}${ranges.map( ({ lower: lower2, upper: upper2 }) => lower2 === upper2 ? repr(lower2) : `${repr(lower2)}-${repr(upper2)}` ).join("")}]`; } case "subgroup": return `(${regexStringFromPattern(atom.subpattern)})`; } }; var regexStringFromQuantifiedAtom = (quantifiedAtom) => { switch (quantifiedAtom.kind) { case "star": return `${regexStringFromAtom(quantifiedAtom.atom)}*${quantifiedAtom.greedy ? "" : "?"}`; case "plus": return `${regexStringFromAtom(quantifiedAtom.atom)}+${quantifiedAtom.greedy ? "" : "?"}`; case "question": return `${regexStringFromAtom(quantifiedAtom.atom)}?`; case "exactly": return `${regexStringFromAtom(quantifiedAtom.atom)}{${quantifiedAtom.count}}`; case "between": return `${regexStringFromAtom(quantifiedAtom.atom)}{${quantifiedAtom.min},${quantifiedAtom.max}}`; case "minimum": return `${regexStringFromAtom(quantifiedAtom.atom)}{${quantifiedAtom.min},}`; } }; var regexStringFromTerm = (term) => { switch (term.tag) { case "atom": return regexStringFromAtom(term); case "quantifiedAtom": return regexStringFromQuantifiedAtom(term); } }; var regexStringFromPattern = (pattern) => { switch (pattern.tag) { case "atom": return regexStringFromAtom(pattern); case "disjunction": return `${regexStringFromPattern(pattern.left)}|${regexStringFromPattern( pattern.right )}`; case "quantifiedAtom": return regexStringFromQuantifiedAtom(pattern); case "termSequence": return pattern.terms.map(regexStringFromTerm).join(""); } }; var regexFromPattern = (pattern, caseInsensitive = false, global = false, multiline = false) => new RegExp( `${global ? "" : "^("}${regexStringFromPattern(pattern)}${global ? "" : ")$"}`, `${global ? "g" : ""}${caseInsensitive ? "i" : ""}${multiline ? "m" : ""}` ); // src/patterns/index.ts var patterns_exports = {}; __export(patterns_exports, { anyUUID: () => anyUUID, base64: () => base64, base64Character: () => base64Character, base64Url: () => base64Url, creditCard: () => creditCard, emailAddress: () => emailAddress, hexColor: () => hexColor, hexadecimal: () => hexadecimal, hslColor: () => hslColor, jwt: () => jwt, latLong: () => latLong, rgbColor: () => rgbColor, rgbColorDecimal: () => rgbColorDecimal, rgbColorPercent: () => rgbColorPercent, rgbColorWithAlphaDecimal: () => rgbColorWithAlphaDecimal, rgbColorWithAlphaPercent: () => rgbColorWithAlphaPercent, uuidV1: () => uuidV1, uuidV2: () => uuidV2, uuidV3: () => uuidV3, uuidV4: () => uuidV4, uuidV5: () => uuidV5 }); // src/patterns/base64.ts var base64Character = pipe(alnum, and(characterClass(false, "+", "/"))); var base64 = pipe( base64Character, exactly(4), subgroup, anyNumber(), andThen( maybe( subgroup( oneOf( sequence(exactly(2)(base64Character), exactly(2)(char("="))), sequence(exactly(3)(base64Character), char("=")) ) ) ) ) ); // src/patterns/base64url.ts var base64Url = pipe( word, and("-"), anyNumber({ greedy: true }) ); // src/patterns/credit-card.ts var visa = pipe( char("4"), andThen(pipe(exactly(12)(digit), or(exactly(15)(digit)), subgroup)) ); var mastercard = pipe( subgroup( pipe( sequence(char("5"), characterClass(false, ["1", "5"]), exactly(4)(digit)), or( sequence( exactString("222"), characterClass(false, ["1", "9"]), exactly(2)(digit) ) ), or( sequence( exactString("22"), characterClass(false, ["3", "9"]), exactly(3)(digit) ) ), or( sequence( exactString("2"), characterClass(false, ["3", "6"]), exactly(4)(digit) ) ), or( sequence( exactString("27"), characterClass(false, "0", "1"), exactly(3)(digit) ) ), or(sequence(exactString("2720"), exactly(2)(digit))) ) ), andThen(exactly(10)(digit)) ); var amex = sequence( char("3"), characterClass(false, "4", "7"), exactly(13)(digit) ); var dinersClub = pipe( sequence( char("3"), subgroup( pipe( sequence( char("0"), subgroup( pipe( sequence(characterClass(false, ["0", "5"]), exactly(5)(digit)), or(sequence(exactString("95"), exactly(4)(digit))) ) ) ), or(sequence(characterClass(false, "8", "9"), exactly(6)(digit))) ) ), between(8, 11)(digit) ), or(sequence(exactString("36"), exactly(6)(digit), between(6, 11)(digit))), subgroup ); var discover = pipe( oneOf( pipe( exactString("6011"), andThen( subgroup( oneOf( sequence( char("0"), characterClass(false, ["5", "9"]), exactly(2)(digit) ), sequence(characterClass(false, ["2", "4"]), exactly(3)(digit)), sequence(exactString("74"), exactly(2)(digit)), sequence( exactString("7"), characterClass(false, ["7", "9"]), exactly(2)(digit) ), sequence( exactString("8"), characterClass(false, ["6", "9"]), exactly(2)(digit) ), sequence(exactString("9"), exactly(3)(digit)) ) ) ) ), sequence( exactString("64"), characterClass(false, ["4", "9"]), exactly(5)(digit) ), sequence( exactString("650"), characterClass(false, ["0", "5"]), exactly(4)(digit) ), sequence( exactString("65060"), characterClass(false, ["1", "9"]), exactly(2)(digit) ), sequence( exactString("65061"), characterClass(false, ["1", "9"]), exactly(2)(digit) ), sequence( exactString("6506"), characterClass(false, ["2", "9"]), exactly(3)(digit) ), sequence( exactString("650"), characterClass(false, ["7", "9"]), exactly(4)(digit) ), sequence( exactString("65"), characterClass(false, ["1", "9"]), exactly(5)(digit) ) ), subgroup, andThen(between(8, 11)(digit)) ); var jcb = pipe( sequence( exactString("352"), characterClass(false, "8", "9"), exactly(4)(digit) ), or( sequence( exactString("35"), characterClass(false, ["3", "8"]), exactly(5)(digit) ) ), subgroup, andThen(between(8, 11)(digit)) ); var rupay = subgroup( oneOf( sequence( subgroup( oneOf( exactString("60"), exactString("65"), exactString("81"), exactString("82") ) ), exactly(14)(digit) ), sequence(exactString("508"), exactly(14)(digit)) ) ); var unionPay = sequence( exactString("62"), subgroup( oneOf( sequence( char("2"), subgroup( oneOf( sequence( exactString("12"), characterClass(false, ["6", "9"]), exactly(2)(digit) ), sequence( char("1"), characterClass(false, ["3", "9"]), exactly(3)(digit) ), sequence(characterClass(false, ["2", "8"]), digit), sequence( exactString("9"), characterClass(false, "0", "1"), exactly(3)(digit) ), sequence( exactString("92"), characterClass(false, ["0", "5"]), exactly(2)(digit) ) ) ) ), sequence(characterClass(false, ["4", "6"]), exactly(5)(digit)), sequence( exactString("8"), characterClass(false, ["2", "8"]), exactly(4)(digit) ) ) ), between(8, 11)(digit) ); var creditCard = oneOf( visa, mastercard, amex, dinersClub, discover, jcb, rupay, unionPay ); // src/patterns/email-address.ts var localPartQuoted = pipe( char('"'), andThen(atLeastOne({ greedy: true })(characterClass(true, '"', [0, 31]))), andThen(char('"')) ); var localPartUnquotedAllowedCharacters = characterClass( false, ["A", "Z"], ["a", "z"], ["0", "9"], "!", "#", "$", "%", "&", "'", "*", "+", "-", "/", "=", "?", "^", "_", "`", "{", "|", "}", "~" ); var localPartUnquoted = pipe( atLeastOne({ greedy: true })(localPartUnquotedAllowedCharacters), andThen( pipe( char("."), andThen(atLeastOne({ greedy: true })(localPartUnquotedAllowedCharacters)), subgroup, anyNumber({ greedy: true }) ) ) ); var localPart = pipe(localPartUnquoted, or(localPartQuoted), subgroup); var ipAddressByte = between(1, 3)(digit); var domainIpAddress = pipe( sequence( char("["), ipAddressByte, char("."), ipAddressByte, char("."), ipAddressByte, char("."), ipAddressByte, char("]") ) ); var domainName = pipe( alnum, and("-"), atMost(63), andThen(char(".")), subgroup, atLeastOne({ greedy: true }), andThen(atLeast(2)(alpha)) ); var domain = pipe(domainIpAddress, or(domainName), subgroup); var emailAddress = pipe( localPart, andThen(char("@")), andThen(domain) ); // src/patterns/hex-color.ts var hexColor = pipe( maybe(char("#")), andThen( subgroup( pipe( between(3, 4)(hexDigit), or(exactly(6)(hexDigit)), or(exactly(8)(hexDigit)) ) ) ) ); // src/patterns/hexadecimal.ts var hexadecimal = pipe( exactString("0x"), or(exactString("0X")), or(exactString("0h")), or(exactString("0H")), subgroup, maybe, andThen(atLeastOne()(xdigit)) ); // src/patterns/hsl-color.ts var anyDecimal = subgroup( sequence(char("."), atLeastOne({ greedy: true })(digit)) ); var zeroDecimal = subgroup( sequence(char("."), atLeastOne({ greedy: true })(char("0"))) ); var exponential = subgroup( sequence( char("e"), maybe(subgroup(oneOf(char("+"), char("-")))), atLeastOne({ greedy: true })(digit) ) ); var hue = subgroup( sequence( maybe(subgroup(oneOf(char("+"), char("-")))), subgroup( oneOf( pipe(atLeastOne({ greedy: true })(digit), andThen(maybe(anyDecimal))), anyDecimal ) ), maybe(exponential), maybe( subgroup( oneOf( exactString("deg"), exactString("grad"), exactString("rad"), exactString("turn") ) ) ) ) ); var percentage = subgroup( sequence( maybe(char("+")), anyNumber({ greedy: true })(char("0")), subgroup( oneOf( pipe(exactString("100"), andThen(maybe(zeroDecimal))), pipe(subgroup(integerRange(0, 99)), andThen(maybe(anyDecimal))), anyDecimal ) ), maybe(exponential), char("%") ) ); var alpha2 = subgroup( sequence( anyNumber({ greedy: true })(digit), subgroup(oneOf(digit, anyDecimal)), maybe(exponential), maybe(char("%")) ) ); var anySpace = anyNumber({ greedy: true })(blank); var commaDelimiter = subgroup(sequence(anySpace, char(","), anySpace)); var slashDelimiter = subgroup(sequence(anySpace, char("/"), anySpace)); var hslColor = sequence( exactString("hsl"), maybe(char("a")), char("("), anySpace, hue, subgroup( oneOf( sequence( commaDelimiter, percentage, commaDelimiter, percentage, maybe(subgroup(sequence(commaDelimiter, alpha2))) ), sequence( anySpace, percentage, anySpace, percentage, maybe(subgroup(sequence(slashDelimiter, alpha2))) ) ) ), anySpace, char(")") ); // src/patterns/jwt.ts var jwt = sequence( subgroup(base64Url), char("."), subgroup(base64Url), maybe(subgroup(sequence(char("."), subgroup(base64Url)))) ); // src/patterns/lat-long.ts var latPattern = pipe( maybe(characterClass(false, "+", "-")), andThen( subgroup( oneOf( sequence( char("9"), char("0"), maybe( subgroup( pipe(char("."), andThen(atLeastOne({ greedy: true })(char("0")))) ) ) ), pipe( integerRange(0, 89), subgroup, andThen( maybe( subgroup( pipe(char("."), andThen(atLeastOne({ greedy: true })(digit))) ) ) ) ) ) ) ) ); var longPattern = pipe( maybe(characterClass(false, "+", "-")), andThen( subgroup( oneOf( sequence( char("1"), char("8"), char("0"), maybe( subgroup( pipe(char("."), andThen(atLeastOne({ greedy: true })(char("0")))) ) ) ), pipe( integerRange(0, 179), subgroup, andThen( maybe( subgroup( pipe(char("."), andThen(atLeastOne({ greedy: true })(digit))) ) ) ) ) ) ) ) ); var latLong = oneOf( pipe( latPattern, andThen(char(",")), andThen(anyNumber({ greedy: true })(space)), andThen(longPattern) ), pipe( char("("), andThen(latPattern), andThen(char(",")), andThen(anyNumber({ greedy: true })(space)), andThen(longPattern), andThen(char(")")) ) ); // src/patterns/rgb-color.ts var rgbColorDecimal = sequence( exactString("rgb("), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(")") ); var rgbColorWithAlphaDecimal = sequence( exactString("rgba("), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup( oneOf( char("0"), char("1"), exactString("1.0"), sequence( maybe(char("0")), char("."), atLeastOne({ greedy: true })(digit) ) ) ), char(")") ); var rgbColorPercent = sequence( exactString("rgb("), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%)") ); var rgbColorWithAlphaPercent = sequence( exactString("rgba("), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup( oneOf( char("0"), char("1"), exactString("1.0"), sequence( maybe(char("0")), char("."), atLeastOne({ greedy: true })(digit) ) ) ), char(")") ); var rgbColor = oneOf( rgbColorDecimal, rgbColorWithAlphaDecimal, rgbColorPercent, rgbColorWithAlphaPercent ); // src/patterns/uuid.ts var nHexDigits = (n) => exactly(n)(hexDigit); var uuidV1 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("1"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV2 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("2"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV3 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("3"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV4 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("4"), nHexDigits(3), char("-"), characterClass(false, "A", "a", "B", "b", "8", "9"), nHexDigits(3), char("-"), nHexDigits(12) ); var uuidV5 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("5"), nHexDigits(3), char("-"), characterClass(false, "A", "a", "B", "b", "8", "9"), nHexDigits(3), char("-"), nHexDigits(12) ); var anyUUID = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), nHexDigits(4), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { alnum, alpha, and, andThen, anyNumber, anything, atLeast, atLeastOne, atMost, between, blank, char, characterClass, digit, empty, exactString, exactly, graph, hexDigit, integerRange, lower, maybe, non, oneOf, or, patterns, pipe, print, punct, regexFromPattern, sequence, space, subgroup, times, upper, word, xdigit });