UNPKG

cspell-lib

Version:

A library of useful functions used across various cspell tools.

130 lines 5.05 kB
// cspell:ignore ings ning gimuy anrvtbf gimuxy export const regExUpperSOrIng = /([\p{Lu}\p{M}]+(?:\\?['’])?(?:s|ing|ies|es|ings|ed|ning))(?!\p{Ll})/gu; export const regExSplitWords = /(\p{Ll}\p{M}?)(\p{Lu})/gu; export const regExSplitWords2 = /(\p{Lu}\p{M}?)((\p{Lu}\p{M}?)\p{Ll})/gu; export const regExpCamelCaseWordBreaksWithEnglishSuffix = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})(?!\p{Lu}\p{M}?(?:s|ing|ies|es|ings|ed|ning)(?!\p{Ll}))/gu; export const regExpCamelCaseWordBreaks = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})/gu; export const regExpAllPossibleWordBreaks = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})|(?<=\p{Lu}\p{M}?\p{Lu}\p{M}?)(?=\p{Ll})|(?<=\p{L}\p{M}?)(?=\P{L})|(?<=\P{L})(?=\p{L})/gu; export const regExWords = /\p{L}\p{M}?(?:(?:\\?['’])?\p{L}\p{M}?)*/gu; // Words can be made of letters, numbers, period, underscore, dash, plus, and single quote export const regExWordsAndDigits = /[\p{L}\w'’`.+-](?:(?:\\(?=[']))?[\p{L}\p{M}\w'’`.+-])*/gu; export const regExIgnoreCharacters = /[\p{sc=Hiragana}\p{sc=Han}\p{sc=Katakana}\u30A0-\u30FF\p{sc=Hangul}]/gu; export const regExFirstUpper = /^\p{Lu}\p{M}?\p{Ll}+$/u; export const regExAllUpper = /^(?:\p{Lu}\p{M}?)+$/u; export const regExAllLower = /^(?:\p{Ll}\p{M}?)+$/u; export const regExPossibleWordBreaks = /[-+_’'`.\s]/g; export const regExMatchRegExParts = /^\s*\/([\s\S]*?)\/([gimuxy]*)\s*$/; export const regExAccents = /\p{M}/gu; export const regExEscapeCharacters = /(?<=\\)[anrvtbf]/gi; /** Matches against leading `'` or `{single letter}'` */ export const regExDanglingQuote = /(?<=(?:^|(?!\p{M})\P{L})(?:\p{L}\p{M}?)?)[']/gu; /** Match tailing endings after CAPS words */ export const regExTrailingEndings = /(?<=(?:\p{Lu}\p{M}?){2})['’]?(?:s|d|ings?|ies|e[ds]?|ning|th|nth)(?!\p{Ll})/gu; export const regExNumericLiteral = /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?$/; export function stringToRegExp(pattern, defaultFlags = 'gimu', forceFlags = 'g') { if (pattern instanceof RegExp) { return pattern; } try { const [, pat, flag] = [ ...(pattern.match(regExMatchRegExParts) || ['', pattern.trim(), defaultFlags]), forceFlags, ]; if (pat) { const regPattern = flag.includes('x') ? removeVerboseFromRegExp(pat) : pat; // Make sure the flags are unique. const flags = [...new Set(forceFlags + flag)].join('').replaceAll(/[^gimuy]/g, ''); const regex = new RegExp(regPattern, flags); return regex; } } catch { /* empty */ } return undefined; } const SPACES = { ' ': true, '\n': true, '\r': true, '\t': true, }; /** * Remove all whitespace and comments from a regexp string. The format follows Pythons Verbose. * Note: this is a best attempt. Special cases for comments: `#` and spaces should be proceeded with a `\` * * All space must be proceeded by a `\` or in a character class `[]` * * @param pattern - the pattern to clean */ function removeVerboseFromRegExp(pattern) { function escape(acc) { const char = pattern[acc.idx]; if (char !== '\\') return undefined; const next = pattern[++acc.idx]; acc.idx++; if (next === '#') { acc.result += '#'; return acc; } if (!(next in SPACES)) { acc.result += '\\' + next; return acc; } acc.result += next; if (next === '\r' && pattern[acc.idx] === '\n') { acc.result += '\n'; acc.idx++; } return acc; } function braces(acc) { const char = pattern[acc.idx]; if (char !== '[') return undefined; acc.result += char; acc.idx++; let escCount = 0; while (acc.idx < pattern.length) { const char = pattern[acc.idx]; acc.result += char; acc.idx++; if (char === ']' && !(escCount & 1)) break; escCount = char === '\\' ? escCount + 1 : 0; } return acc; } function spaces(acc) { const char = pattern[acc.idx]; if (!(char in SPACES)) return undefined; acc.idx++; return acc; } function comments(acc) { const char = pattern[acc.idx]; if (char !== '#') return undefined; while (acc.idx < pattern.length && pattern[acc.idx] !== '\n') { acc.idx++; } return acc; } function copy(acc) { const char = pattern[acc.idx++]; acc.result += char; return acc; } const reducers = [escape, braces, spaces, comments, copy]; const result = { idx: 0, result: '' }; while (result.idx < pattern.length) { for (const r of reducers) { if (r(result)) break; } } return result.result; } //# sourceMappingURL=textRegex.js.map