UNPKG

@polgubau/utils

Version:

A collection of utility functions for TypeScript

1 lines 7.63 kB
{"version":3,"sources":["../../../../src/functions/fuzzy-finder/fuzzyFinder.ts"],"sourcesContent":["// The scores are arranged so that a continuous match of characters will\n// result in a total score of 1.\n//\n// The best case, this character is a match, and either this is the start\n// of the string, or the previous character was also a match.\nconst MATCH_CONTINUOUS = 1;\n// A new match at the start of a word scores better than a new match\n// elsewhere as it's more likely that the user will type the starts\n// of fragments.\n// NOTE: We score word jumps between spaces slightly higher than slashes, brackets\n// hyphens, etc.\nconst MATCH_NEW_WORD_SPACE = 0.9;\nconst MATCH_NEW_WORD_NON_SPACE = 0.8;\n// Any other match isn't ideal, but we include it for completeness.\nconst MATCH_CHARACTER_JUMP = 0.17;\n// If the user transposed two letters, it should be significantly penalized.\n//\n// i.e. \"ouch\" is more likely than \"curtain\" when \"uc\" is typed.\nconst PENALTY_TRANSPOSITION = 0.1;\n// The goodness of a match should decay slightly with each missing\n// character.\n//\n// i.e. \"bad\" is more likely than \"bard\" when \"bd\" is typed.\n//\n// This will not change the order of suggestions based on SCORE_* until\n// 100 characters are inserted between matches.\nconst PENALTY_SKIPPED_CHAR = 0.999;\n// The goodness of an exact-case match should be higher than a\n// case-insensitive match by a small amount.\n//\n// i.e. \"HTML\" is more likely than \"haml\" when \"HM\" is typed.\n//\n// This will not change the order of suggestions based on SCORE_* until\n// 1000 characters are inserted between matches.\nconst PENALTY_CASE_DIFFERENCE = 0.9999;\n// Match higher for letters closer to the beginning of the word\n// If the word has more characters than the user typed, it should\n// be penalised slightly.\n//\n// i.e. \"html\" is more likely than \"html5\" if I type \"html\".\n//\n// However, it may well be the case that there's a sensible secondary\n// ordering (like alphabetical) that it makes sense to rely on when\n// there are many prefix matches, so we don't make the penalty increase\n// with the number of tokens.\nconst PENALTY_INCOMPLETE_MATCH = 0.99;\n\nconst IS_GAP_REGEXP = /[\\\\\\/_+.#\"@\\[\\(\\{&]/;\nconst IS_SPACE_REGEXP = /[\\s-]/;\nconst COUNT_SPACE_REGEXP = /[\\s-]/g;\n\nexport function fuzzyFinderInner(\n string: string,\n abbreviation: string,\n lowerString: string,\n lowerAbbreviation: string,\n stringIndex: number,\n abbreviationIndex: number,\n memoizedResults: Record<string, number>,\n) {\n if (abbreviationIndex === abbreviation.length) {\n if (stringIndex === string.length) {\n return MATCH_CONTINUOUS;\n }\n return PENALTY_INCOMPLETE_MATCH;\n }\n\n const memoizeKey = `${stringIndex},${abbreviationIndex}`;\n if (memoizedResults[memoizeKey] !== undefined) {\n return memoizedResults[memoizeKey];\n }\n\n const abbreviationChar = lowerAbbreviation.charAt(abbreviationIndex);\n let index = lowerString.indexOf(abbreviationChar, stringIndex);\n let highScore = 0;\n\n let score: number;\n let transposedScore: number;\n let spaceBreaks: RegExpMatchArray | null;\n\n while (index >= 0) {\n score = fuzzyFinderInner(\n string,\n abbreviation,\n lowerString,\n lowerAbbreviation,\n index + 1,\n abbreviationIndex + 1,\n memoizedResults,\n );\n if (score > highScore) {\n if (index === stringIndex) {\n score *= MATCH_CONTINUOUS;\n } else if (IS_GAP_REGEXP.test(string.charAt(index - 1))) {\n score *= MATCH_NEW_WORD_NON_SPACE;\n } else if (IS_SPACE_REGEXP.test(string.charAt(index - 1))) {\n score *= MATCH_NEW_WORD_SPACE;\n spaceBreaks = string.slice(stringIndex, index - 1).match(COUNT_SPACE_REGEXP);\n\n if (spaceBreaks && stringIndex > 0) {\n score *= PENALTY_SKIPPED_CHAR ** spaceBreaks.length;\n }\n } else {\n score *= MATCH_CHARACTER_JUMP;\n if (stringIndex > 0) {\n score *= PENALTY_SKIPPED_CHAR ** (index - stringIndex);\n }\n }\n\n if (string.charAt(index) !== abbreviation.charAt(abbreviationIndex)) {\n score *= PENALTY_CASE_DIFFERENCE;\n }\n }\n\n if (\n (score < PENALTY_TRANSPOSITION &&\n lowerString.charAt(index - 1) === lowerAbbreviation.charAt(abbreviationIndex + 1)) ||\n (lowerAbbreviation.charAt(abbreviationIndex + 1) === lowerAbbreviation.charAt(abbreviationIndex) && // allow duplicate letters. Ref #7428\n lowerString.charAt(index - 1) !== lowerAbbreviation.charAt(abbreviationIndex))\n ) {\n transposedScore = fuzzyFinderInner(\n string,\n abbreviation,\n lowerString,\n lowerAbbreviation,\n index + 1,\n abbreviationIndex + 2,\n memoizedResults,\n );\n\n if (transposedScore * PENALTY_TRANSPOSITION > score) {\n score = transposedScore * PENALTY_TRANSPOSITION;\n }\n }\n\n if (score > highScore) {\n highScore = score;\n }\n\n index = lowerString.indexOf(abbreviationChar, index + 1);\n }\n\n memoizedResults[memoizeKey] = highScore;\n return highScore;\n}\n\nfunction formatInput(string: string) {\n return string.toLowerCase().replace(COUNT_SPACE_REGEXP, \" \"); // convert all valid space characters to space so they match each other\n}\n\nexport function fuzzyFinder(string: string, abbreviation: string, aliases: string[]): number {\n const s = aliases && aliases.length > 0 ? `${`${string} ${aliases.join(\" \")}`}` : string;\n return fuzzyFinderInner(s, abbreviation, formatInput(string), formatInput(abbreviation), 0, 0, {});\n}\n"],"mappings":";AAKA,IAAM,mBAAmB;AAMzB,IAAM,uBAAuB;AAC7B,IAAM,2BAA2B;AAEjC,IAAM,uBAAuB;AAI7B,IAAM,wBAAwB;AAQ9B,IAAM,uBAAuB;AAQ7B,IAAM,0BAA0B;AAWhC,IAAM,2BAA2B;AAEjC,IAAM,gBAAgB;AACtB,IAAM,kBAAkB;AACxB,IAAM,qBAAqB;AAEpB,SAAS,iBACd,QACA,cACA,aACA,mBACA,aACA,mBACA,iBACA;AACA,MAAI,sBAAsB,aAAa,QAAQ;AAC7C,QAAI,gBAAgB,OAAO,QAAQ;AACjC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,GAAG,WAAW,IAAI,iBAAiB;AACtD,MAAI,gBAAgB,UAAU,MAAM,QAAW;AAC7C,WAAO,gBAAgB,UAAU;AAAA,EACnC;AAEA,QAAM,mBAAmB,kBAAkB,OAAO,iBAAiB;AACnE,MAAI,QAAQ,YAAY,QAAQ,kBAAkB,WAAW;AAC7D,MAAI,YAAY;AAEhB,MAAI;AACJ,MAAI;AACJ,MAAI;AAEJ,SAAO,SAAS,GAAG;AACjB,YAAQ;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,MACR,oBAAoB;AAAA,MACpB;AAAA,IACF;AACA,QAAI,QAAQ,WAAW;AACrB,UAAI,UAAU,aAAa;AACzB,iBAAS;AAAA,MACX,WAAW,cAAc,KAAK,OAAO,OAAO,QAAQ,CAAC,CAAC,GAAG;AACvD,iBAAS;AAAA,MACX,WAAW,gBAAgB,KAAK,OAAO,OAAO,QAAQ,CAAC,CAAC,GAAG;AACzD,iBAAS;AACT,sBAAc,OAAO,MAAM,aAAa,QAAQ,CAAC,EAAE,MAAM,kBAAkB;AAE3E,YAAI,eAAe,cAAc,GAAG;AAClC,mBAAS,wBAAwB,YAAY;AAAA,QAC/C;AAAA,MACF,OAAO;AACL,iBAAS;AACT,YAAI,cAAc,GAAG;AACnB,mBAAS,yBAAyB,QAAQ;AAAA,QAC5C;AAAA,MACF;AAEA,UAAI,OAAO,OAAO,KAAK,MAAM,aAAa,OAAO,iBAAiB,GAAG;AACnE,iBAAS;AAAA,MACX;AAAA,IACF;AAEA,QACG,QAAQ,yBACP,YAAY,OAAO,QAAQ,CAAC,MAAM,kBAAkB,OAAO,oBAAoB,CAAC,KACjF,kBAAkB,OAAO,oBAAoB,CAAC,MAAM,kBAAkB,OAAO,iBAAiB;AAAA,IAC7F,YAAY,OAAO,QAAQ,CAAC,MAAM,kBAAkB,OAAO,iBAAiB,GAC9E;AACA,wBAAkB;AAAA,QAChB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ;AAAA,QACR,oBAAoB;AAAA,QACpB;AAAA,MACF;AAEA,UAAI,kBAAkB,wBAAwB,OAAO;AACnD,gBAAQ,kBAAkB;AAAA,MAC5B;AAAA,IACF;AAEA,QAAI,QAAQ,WAAW;AACrB,kBAAY;AAAA,IACd;AAEA,YAAQ,YAAY,QAAQ,kBAAkB,QAAQ,CAAC;AAAA,EACzD;AAEA,kBAAgB,UAAU,IAAI;AAC9B,SAAO;AACT;AAEA,SAAS,YAAY,QAAgB;AACnC,SAAO,OAAO,YAAY,EAAE,QAAQ,oBAAoB,GAAG;AAC7D;AAEO,SAAS,YAAY,QAAgB,cAAsB,SAA2B;AAC3F,QAAM,IAAI,WAAW,QAAQ,SAAS,IAAI,GAAG,GAAG,MAAM,IAAI,QAAQ,KAAK,GAAG,CAAC,EAAE,KAAK;AAClF,SAAO,iBAAiB,GAAG,cAAc,YAAY,MAAM,GAAG,YAAY,YAAY,GAAG,GAAG,GAAG,CAAC,CAAC;AACnG;","names":[]}