bits-ui
Version:
The headless components for Svelte.
161 lines (160 loc) • 7.81 kB
JavaScript
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-nocheck
// The scores are arranged so that a continuous match of characters will
// result in a total score of 1.
//
// The best case, this character is a match, and either this is the start
// of the string, or the previous character was also a match.
const SCORE_CONTINUE_MATCH = 1;
// A new match at the start of a word scores better than a new match
// elsewhere as it's more likely that the user will type the starts
// of fragments.
// NOTE: We score word jumps between spaces slightly higher than slashes, brackets
// hyphens, etc.
const SCORE_SPACE_WORD_JUMP = 0.9;
const SCORE_NON_SPACE_WORD_JUMP = 0.8;
// Any other match isn't ideal, but we include it for completeness.
const SCORE_CHARACTER_JUMP = 0.17;
// If the user transposed two letters, it should be significantly penalized.
//
// i.e. "ouch" is more likely than "curtain" when "uc" is typed.
const SCORE_TRANSPOSITION = 0.1;
// The goodness of a match should decay slightly with each missing
// character.
//
// i.e. "bad" is more likely than "bard" when "bd" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 100 characters are inserted between matches.
const PENALTY_SKIPPED = 0.999;
// The goodness of an exact-case match should be higher than a
// case-insensitive match by a small amount.
//
// i.e. "HTML" is more likely than "haml" when "HM" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 1000 characters are inserted between matches.
const PENALTY_CASE_MISMATCH = 0.9999;
// Match higher for letters closer to the beginning of the word
// If the word has more characters than the user typed, it should
// be penalized slightly.
//
// i.e. "html" is more likely than "html5" if I type "html".
//
// However, it may well be the case that there's a sensible secondary
// ordering (like alphabetical) that it makes sense to rely on when
// there are many prefix matches, so we don't make the penalty increase
// with the number of tokens.
const PENALTY_NOT_COMPLETE = 0.99;
const IS_GAP_REGEXP = /[\\/_+.#"@[({&]/;
const COUNT_GAPS_REGEXP = /[\\/_+.#"@[({&]/g;
const IS_SPACE_REGEXP = /[\s-]/;
const COUNT_SPACE_REGEXP = /[\s-]/g;
function computeCommandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, stringIndex, abbreviationIndex, memoizedResults) {
if (abbreviationIndex === abbreviation.length) {
if (stringIndex === string.length)
return SCORE_CONTINUE_MATCH;
return PENALTY_NOT_COMPLETE;
}
const memoizeKey = `${stringIndex},${abbreviationIndex}`;
if (memoizedResults[memoizeKey] !== undefined)
return memoizedResults[memoizeKey];
const abbreviationChar = lowerAbbreviation.charAt(abbreviationIndex);
let index = lowerString.indexOf(abbreviationChar, stringIndex);
let highScore = 0;
let score, transposedScore, wordBreaks, spaceBreaks;
while (index >= 0) {
score = computeCommandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, index + 1, abbreviationIndex + 1, memoizedResults);
if (score > highScore) {
if (index === stringIndex) {
score *= SCORE_CONTINUE_MATCH;
}
else if (IS_GAP_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_NON_SPACE_WORD_JUMP;
wordBreaks = string.slice(stringIndex, index - 1).match(COUNT_GAPS_REGEXP);
if (wordBreaks && stringIndex > 0) {
score *= PENALTY_SKIPPED ** wordBreaks.length;
}
}
else if (IS_SPACE_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_SPACE_WORD_JUMP;
spaceBreaks = string.slice(stringIndex, index - 1).match(COUNT_SPACE_REGEXP);
if (spaceBreaks && stringIndex > 0) {
score *= PENALTY_SKIPPED ** spaceBreaks.length;
}
}
else {
score *= SCORE_CHARACTER_JUMP;
if (stringIndex > 0) {
score *= PENALTY_SKIPPED ** (index - stringIndex);
}
}
if (string.charAt(index) !== abbreviation.charAt(abbreviationIndex)) {
score *= PENALTY_CASE_MISMATCH;
}
}
if ((score < SCORE_TRANSPOSITION &&
lowerString.charAt(index - 1) ===
lowerAbbreviation.charAt(abbreviationIndex + 1)) ||
(lowerAbbreviation.charAt(abbreviationIndex + 1) ===
lowerAbbreviation.charAt(abbreviationIndex) &&
lowerString.charAt(index - 1) !== lowerAbbreviation.charAt(abbreviationIndex))) {
transposedScore = computeCommandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, index + 1, abbreviationIndex + 2, memoizedResults);
if (transposedScore * SCORE_TRANSPOSITION > score) {
score = transposedScore * SCORE_TRANSPOSITION;
}
}
if (score > highScore) {
highScore = score;
}
index = lowerString.indexOf(abbreviationChar, index + 1);
}
memoizedResults[memoizeKey] = highScore;
return highScore;
}
/**
*
* @param string
* @returns
*/
function formatInput(string) {
// convert all valid space characters to space so they match each other
return string.toLowerCase().replace(COUNT_SPACE_REGEXP, " ");
}
/**
* Given a command, a search query, and (optionally) a list of keywords for the command,
* computes a score between 0 and 1 that represents how well the search query matches the
* abbreviation and keywords. 1 is a perfect match, 0 is no match.
*
* The score is calculated based on the following rules:
* - The scores are arranged so that a continuous match of characters will result in a total
* score of 1. The best case, this character is a match, and either this is the start of the string
* or the previous character was also a match.
* - A new match at the start of a word scores better than a new match elsewhere as it's more likely
* that the user will type the starts of fragments.
* - Word jumps between spaces are scored slightly higher than slashes, brackets, hyphens, etc.
* - A continuous match of characters will result in a total score of 1.
* - A new match at the start of a word scores better than a new match elsewhere as it's more likely that the user will type the starts of fragments.
* - Any other match isn't ideal, but we include it for completeness.
* - If the user transposed two letters, it should be significantly penalized.
* - The goodness of a match should decay slightly with each missing character.
* - Match higher for letters closer to the beginning of the word.
*
* @param command - The value to score against the search string (e.g. a command name like "Calculator")
* @param search - The search string to score against the value/aliases
* @param commandKeywords - An optional list of aliases/keywords to score against the search string - e.g. ["math", "add", "divide", "multiply", "subtract"]
* @returns A score between 0 and 1 that represents how well the search string matches the
* command (and keywords)
*/
export function computeCommandScore(command, search, commandKeywords) {
/**
* NOTE: We used to do lower-casing on each recursive call, but this meant that `toLowerCase()`
* was the dominating cost in the algorithm. Passing both is a little ugly, but considerably
* faster.
*/
command =
commandKeywords && commandKeywords.length > 0
? `${`${command} ${commandKeywords?.join(" ")}`}`
: command;
return computeCommandScoreInner(command, search, formatInput(command), formatInput(search), 0, 0, {});
}