@leeoniya/ufuzzy
Version:
A tiny, efficient fuzzy matcher that doesn't suck
211 lines (161 loc) • 7.14 kB
TypeScript
declare class uFuzzy {
constructor(opts?: uFuzzy.Options);
/** search API composed of filter/info/sort, with a info/ranking threshold (1e3) and fast outOfOrder impl */
search(
haystack: string[],
needle: string,
/** limit how many terms will be permuted, default = 0; 5 will result in up to 5! (120) search iterations. be careful with this! */
outOfOrder?: number,
/** default = 1e3 */
infoThresh?: number,
preFiltered?: uFuzzy.HaystackIdxs | null
): uFuzzy.SearchResult;
/** initial haystack filter, can accept idxs from previous prefix/typeahead match as optimization */
filter(
haystack: string[],
needle: string,
idxs?: uFuzzy.HaystackIdxs
): uFuzzy.HaystackIdxs | null;
/** collects stats about pre-filtered matches, does additional filtering based on term boundary settings, finds highlight ranges */
info(
idxs: uFuzzy.HaystackIdxs,
haystack: string[],
needle: string
): uFuzzy.Info;
/** performs final result sorting via Array.sort(), relying on Info */
sort(
info: uFuzzy.Info,
haystack: string[],
needle: string
): uFuzzy.InfoIdxOrder;
/** utility for splitting needle into terms following defined interSplit/intraSplit opts. useful for out-of-order permutes */
split(needle: string, keepCase?: boolean): uFuzzy.Terms;
/** util for creating out-of-order permutations of a needle terms array */
static permute(arr: unknown[]): unknown[][];
/** util for replacing common diacritics/accents */
static latinize<T extends string[] | string>(strings: T): T;
/** util for highlighting matched substr parts of a result */
static highlight<TAccum = string, TMarkedPart = string>(
match: string,
ranges: number[],
mark?: (part: string, matched: boolean) => TMarkedPart,
accum?: TAccum,
append?: (accum: TAccum, part: TMarkedPart) => TAccum | undefined
): TAccum;
}
export default uFuzzy;
declare namespace uFuzzy {
/** needle's terms */
export type Terms = string[];
/** subset of idxs of a haystack array */
export type HaystackIdxs = number[];
/** sorted order in which info facets should be iterated */
export type InfoIdxOrder = number[];
export type AbortedResult = [null, null, null];
export type FilteredResult = [uFuzzy.HaystackIdxs, null, null];
export type RankedResult = [
uFuzzy.HaystackIdxs,
uFuzzy.Info,
uFuzzy.InfoIdxOrder
];
export type SearchResult = FilteredResult | RankedResult | AbortedResult;
/** partial RegExp */
type PartialRegExp = string;
/** what should be considered acceptable term bounds */
export const enum BoundMode {
/** will match 'man' substr anywhere. e.g. tasmania */
Any = 0,
/** will match 'man' at whitespace, punct, case-change, and alpha-num boundaries. e.g. mantis, SuperMan, fooManBar, 0007man */
Loose = 1,
/** will match 'man' at whitespace, punct boundaries only. e.g. mega man, walk_man, man-made, foo.man.bar */
Strict = 2,
}
export const enum IntraMode {
/** allows any number of extra char insertions within a term, but all term chars must be present for a match */
MultiInsert = 0,
/** allows for a single-char substitution, transposition, insertion, or deletion within terms (excluding first and last chars) */
SingleError = 1,
}
export type IntraSliceIdxs = [from: number, to: number];
type CompareFn = (a: string, b: string) => number;
export interface Options {
// whether regexps use a /u unicode flag
unicode?: boolean; // false
/** @deprecated renamed to opts.alpha */
letters?: PartialRegExp | null; // a-z
// regexp character class [] of chars which should be treated as letters (case insensitive)
alpha?: PartialRegExp | null; // a-z
/** term segmentation & punct/whitespace merging */
interSplit?: PartialRegExp; // '[^A-Za-z\\d']+'
intraSplit?: PartialRegExp | null; // '[a-z][A-Z]'
/** inter bounds that will be used to increase lft2/rgt2 info counters */
interBound?: PartialRegExp | null; // '[^A-Za-z\\d]'
/** intra bounds that will be used to increase lft1/rgt1 info counters */
intraBound?: PartialRegExp | null; // '[A-Za-z][0-9]|[0-9][A-Za-z]|[a-z][A-Z]'
/** inter-term modes, during .info() can discard matches when bounds conditions are not met */
interLft?: BoundMode; // 0
interRgt?: BoundMode; // 0
/** allowance between terms */
interChars?: PartialRegExp; // '.'
interIns?: number; // Infinity
/** allowance between chars within terms */
intraChars?: PartialRegExp; // '[a-z\\d]'
intraIns?: number; // 0
/** contractions detection */
intraContr?: PartialRegExp; // "'[a-z]{1,2}\\b"
/** error tolerance mode within terms. will clamp intraIns to 1 when set to SingleError */
intraMode?: IntraMode; // 0
/** which part of each term should tolerate errors (when intraMode: 1) */
intraSlice?: IntraSliceIdxs; // [1, Infinity]
/** max substitutions (when intraMode: 1) */
intraSub?: 0 | 1; // 0
/** max transpositions (when intraMode: 1) */
intraTrn?: 0 | 1; // 0
/** max omissions/deletions (when intraMode: 1) */
intraDel?: 0 | 1; // 0
/** can dynamically adjust error tolerance rules per term in needle (when intraMode: 1) */
intraRules?: (term: string) => {
intraSlice?: IntraSliceIdxs;
intraIns: 0 | 1;
intraSub: 0 | 1;
intraTrn: 0 | 1;
intraDel: 0 | 1;
};
/** post-filters matches during .info() based on cmp of term in needle vs partial match */
intraFilt?: (term: string, match: string, index: number) => boolean; // should this also accept WIP info?
/** default: toLocaleUpperCase() */
toUpper?: (str: string) => string;
/** default: toLocaleLowerCase() */
toLower?: (str: string) => string;
/** final sorting cmp when all other match metrics are equal */
compare?: CompareFn;
sort?: (info: Info, haystack: string[], needle: string, compare?: CompareFn) => InfoIdxOrder;
}
export interface Info {
/** matched idxs from haystack */
idx: HaystackIdxs;
/** match offsets */
start: number[];
/** number of left BoundMode.Strict term boundaries found */
interLft2: number[];
/** number of right BoundMode.Strict term boundaries found */
interRgt2: number[];
/** number of left BoundMode.Loose term boundaries found */
interLft1: number[];
/** number of right BoundMode.Loose term boundaries found */
interRgt1: number[];
/** total number of extra chars matched within all terms. higher = matched terms have more fuzz in them */
intraIns: number[];
/** total number of chars found in between matched terms. higher = terms are more sparse, have more fuzz in between them */
interIns: number[];
/** total number of matched contiguous chars (substrs but not necessarily full terms) */
chars: number[];
/** number of exactly-matched terms (intra = 0) where both lft and rgt landed on a BoundMode.Loose or BoundMode.Strict boundary */
terms: number[];
/** number of needle terms with case-sensitive partial matches */
cases: number[];
/** offset ranges within match for highlighting: [startIdx0, endIdx0, startIdx1, endIdx1,...] */
ranges: number[][];
}
}
export as namespace uFuzzy;