UNPKG

quick-score

Version:

A JavaScript string-scoring and fuzzy-matching library based on the Quicksilver algorithm, designed for smart auto-complete.

928 lines (826 loc) 28.7 kB
/** * A class representing a half-open interval of characters. A range's `location` * property and `max()` value can be used as arguments for the `substring()` * method to extract a range of characters. */ class Range { /** * @memberOf Range.prototype * @member {number} location Starting index of the range. */ /** * @memberOf Range.prototype * @member {number} length Number of characters in the range. */ /** * @param {number} [location=-1] Starting index of the range. * @param {number} [length=0] Number of characters in the range. */ constructor( location = -1, length = 0) { this.location = location; this.length = length; } /* eslint no-inline-comments: 0 */ /** * Gets the end index of the range, which indicates the character * immediately after the last one in the range. * * @returns {number} */ /** * Sets the end index of the range, which indicates the character * immediately after the last one in the range. * * @param {number} [value] End of the range. * * @returns {number} */ max( value) { if (typeof value == "number") { this.length = value - this.location; } // the NSMaxRange() function in Objective-C returns this value return this.location + this.length; } /** * Returns whether the range contains a location >= 0. * * @returns {boolean} */ isValid() { return (this.location > -1); } /** * Returns an array of the range's start and end indexes. * * @returns {RangeTuple} */ toArray() { return [this.location, this.max()]; } /** * Returns a string representation of the range's open interval. * * @returns {string} */ toString() { if (this.location == -1) { return "invalid range"; } else { return "[" + this.location + "," + this.max() + ")"; } } } const BaseConfigDefaults = { wordSeparators: "-/\\:()<>%._=&[]+ \t\n\r", uppercaseLetters: (() => { const charCodeA = "A".charCodeAt(0); const uppercase = []; for (let i = 0; i < 26; i++) { uppercase.push(String.fromCharCode(charCodeA + i)); } return uppercase.join(""); })(), ignoredScore: 0.9, skippedScore: 0.15, emptyQueryScore: 0, // long, nearly-matching queries can generate up to 2^queryLength loops, // so support worst-case queries up to 16 characters and then give up // and return 0 for longer queries that may or may not actually match maxIterations: Math.pow(2, 16) }; const QSConfigDefaults = { longStringLength: 150, maxMatchStartPct: 0.15, minMatchDensityPct: 0.75, maxMatchDensityPct: 0.95, beginningOfStringPct: 0.1 }; class Config { constructor( options) { Object.assign(this, BaseConfigDefaults, options); } useSkipReduction() { return true; } adjustRemainingScore( string, query, remainingScore, skippedSpecialChar, searchRange, remainingSearchRange, matchedRange, fullMatchedRange) { // use the original Quicksilver expression for the remainingScore return remainingScore * remainingSearchRange.length; } } class QuickScoreConfig extends Config { constructor( options) { super(Object.assign({}, QSConfigDefaults, options)); } useSkipReduction( string, query, remainingScore, searchRange, remainingSearchRange, matchedRange, fullMatchedRange) { const len = string.length; const isShortString = len <= this.longStringLength; const matchStartPercentage = fullMatchedRange.location / len; return isShortString || matchStartPercentage < this.maxMatchStartPct; } adjustRemainingScore( string, query, remainingScore, skippedSpecialChar, searchRange, remainingSearchRange, matchedRange, fullMatchedRange) { const isShortString = string.length <= this.longStringLength; const matchStartPercentage = fullMatchedRange.location / string.length; let matchRangeDiscount = 1; let matchStartDiscount = (1 - matchStartPercentage); // discount the remainingScore based on how much larger the match is // than the query, unless the match is in the first 10% of the // string, the match range isn't too sparse and the whole string is // not too long. also only discount if we didn't skip any whitespace // or capitals. if (!skippedSpecialChar) { matchRangeDiscount = query.length / fullMatchedRange.length; matchRangeDiscount = (isShortString && matchStartPercentage <= this.beginningOfStringPct && matchRangeDiscount >= this.minMatchDensityPct) ? 1 : matchRangeDiscount; matchStartDiscount = matchRangeDiscount >= this.maxMatchDensityPct ? 1 : matchStartDiscount; } // discount the scores of very long strings return remainingScore * Math.min(remainingSearchRange.length, this.longStringLength) * matchRangeDiscount * matchStartDiscount; } } function createConfig( options) { if (options instanceof Config) { // this is a full-fledged Config instance, so we don't need to do // anything to it return options; } else { // create a complete config from this return new QuickScoreConfig(options); } } const DefaultConfig = createConfig(); const BaseConfig = new Config(); const QuicksilverConfig = new Config({ // the Quicksilver algorithm returns .9 for empty queries emptyQueryScore: 0.9, adjustRemainingScore: function( string, query, remainingScore, skippedSpecialChar, searchRange, remainingSearchRange, matchedRange, fullMatchedRange) { let score = remainingScore * remainingSearchRange.length; if (!skippedSpecialChar) { // the current QuickSilver algorithm reduces the score by half // this value when no special chars are skipped, so add the half // back in to match it score += ((matchedRange.location - searchRange.location) / 2.0); } return score; } }); /** * Scores a string against a query. * * @param {string} string The string to score. * * @param {string} query The query string to score the `string` parameter against. * * @param {Array<RangeTuple>} [matches] If supplied, `quickScore()` will push onto * `matches` an array with start and end indexes for each substring range of * `string` that matches `query`. These indexes can be used to highlight the * matching characters in an auto-complete UI. * * @param {string} [transformedString] A transformed version of the string that * will be used for matching. This defaults to a lowercase version of `string`, * but it could also be used to match against a string with all the diacritics * removed, so an unaccented character in the query would match an accented one * in the string. * * @param {string} [transformedQuery] A transformed version of `query`. The * same transformation applied to `transformedString` should be applied to this * parameter, or both can be left as `undefined` for the default lowercase * transformation. * * @param {object} [config] A configuration object that can modify how the * `quickScore` algorithm behaves. * * @param {Range} [stringRange] The range of characters in `string` that should * be checked for matches against `query`. Defaults to the entire `string` * parameter. * * @returns {number} A number between 0 and 1 that represents how well the * `query` matches the `string`. */ function quickScore( string, query, matches, transformedString = string.toLocaleLowerCase(), transformedQuery = query.toLocaleLowerCase(), config = DefaultConfig, stringRange = new Range(0, string.length)) { let iterations = 0; if (query) { return calcScore(stringRange, new Range(0, query.length), new Range()); } else { return config.emptyQueryScore; } function calcScore( searchRange, queryRange, fullMatchedRange) { if (!queryRange.length) { // deduct some points for all remaining characters return config.ignoredScore; } else if (queryRange.length > searchRange.length) { return 0; } const initialMatchesLength = matches && matches.length; for (let i = queryRange.length; i > 0; i--) { if (iterations > config.maxIterations) { // a long query that matches the string except for the last // character can generate 2^queryLength iterations of this // loop before returning 0, so short-circuit that when we've // seen too many iterations (bit of an ugly kludge, but it // avoids locking up the UI if the user somehow types an // edge-case query) return 0; } iterations++; const querySubstring = transformedQuery.substring(queryRange.location, queryRange.location + i); // reduce the length of the search range by the number of chars // we're skipping in the query, to make sure there's enough string // left to possibly contain the skipped chars const matchedRange = getRangeOfSubstring(transformedString, querySubstring, new Range(searchRange.location, searchRange.length - queryRange.length + i)); if (!matchedRange.isValid()) { // we didn't find the query substring, so try again with a // shorter substring continue; } if (!fullMatchedRange.isValid()) { fullMatchedRange.location = matchedRange.location; } else { fullMatchedRange.location = Math.min(fullMatchedRange.location, matchedRange.location); } fullMatchedRange.max(matchedRange.max()); if (matches) { matches.push(matchedRange.toArray()); } const remainingSearchRange = new Range(matchedRange.max(), searchRange.max() - matchedRange.max()); const remainingQueryRange = new Range(queryRange.location + i, queryRange.length - i); const remainingScore = calcScore(remainingSearchRange, remainingQueryRange, fullMatchedRange); if (remainingScore) { let score = remainingSearchRange.location - searchRange.location; // default to true since we only want to apply a discount if // we hit the final else clause below, and we won't get to // any of them if the match is right at the start of the // searchRange let skippedSpecialChar = true; const useSkipReduction = config.useSkipReduction(string, query, remainingScore, remainingSearchRange, searchRange, remainingSearchRange, matchedRange, fullMatchedRange); if (matchedRange.location > searchRange.location) { // some letters were skipped when finding this match, so // adjust the score based on whether spaces or capital // letters were skipped if (useSkipReduction && config.wordSeparators.indexOf(string[matchedRange.location - 1]) > -1) { for (let j = matchedRange.location - 2; j >= searchRange.location; j--) { if (config.wordSeparators.indexOf(string[j]) > -1) { score--; } else { score -= config.skippedScore; } } } else if (useSkipReduction && config.uppercaseLetters.indexOf(string[matchedRange.location]) > -1) { for (let j = matchedRange.location - 1; j >= searchRange.location; j--) { if (config.uppercaseLetters.indexOf(string[j]) > -1) { score--; } else { score -= config.skippedScore; } } } else { // reduce the score by the number of chars we've // skipped since the beginning of the search range score -= matchedRange.location - searchRange.location; skippedSpecialChar = false; } } score += config.adjustRemainingScore(string, query, remainingScore, skippedSpecialChar, searchRange, remainingSearchRange, matchedRange, fullMatchedRange); score /= searchRange.length; return score; } else if (matches) { // the remaining query does not appear in the remaining // string, so strip off any matches we've added during the // current call, as they'll be invalid when we start over // with a shorter piece of the query matches.length = initialMatchesLength; } } return 0; } } // make createConfig() available on quickScore so that the QuickScore // constructor has access to it quickScore.createConfig = createConfig; function getRangeOfSubstring( string, query, searchRange) { const index = string.indexOf(query, searchRange.location); const result = new Range(); if (index > -1 && index < searchRange.max()) { result.location = index; result.length = query.length; } return result; } /** * A class for scoring and sorting a list of items against a query string. Each * item receives a floating point score between `0` and `1`. */ class QuickScore { /** * @memberOf QuickScore.prototype * @member {Array<object>} items The array of items to search, which * should only be modified via the [setItems()]{@link QuickScore#setItems} * method. * @readonly */ /** * @memberOf QuickScore.prototype * @member {Array<ItemKey>} keys The keys to search on each item, which * should only be modified via the [setItems()]{@link QuickScore#setKeys} * method. * @readonly */ /** * @param {Array<string|object>} [items] The list of items to score. If * the list is not a flat array of strings, a `keys` array must be supplied * via the second parameter. QuickScore makes a shallow copy of the `items` * array, so changes to it won't have any affect, but changes to the objects * referenced by the array need to be passed to the instance by a call to * its [setItems()]{@link QuickScore#setItems} method. * * @param {Array<ItemKey>|Options} [options] If the `items` parameter * is an array of flat strings, the `options` parameter can be left out. If * it is a list of objects containing keys that should be scored, the * `options` parameter must either be an array of key names or an object * containing a `keys` property. * * @param {Array<ItemKey>} [options.keys] In the simplest case, an array of * key names to score on the objects in the `items` array. * * The key names can point to a nested key by passing either a dot-delimited * string or an array of sub-keys that specify the path to the value. So a * key `name` of `"foo.bar"` would evaluate to `"baz"` given an object like * `{ foo: { bar: "baz" } }`. Alternatively, that path could be passed as * an array, like `["foo", "bar"]`. In either case, if this sub-key's match * produces the highest score for an item in the search results, its * `scoreKey` name will be `"foo.bar"`. * * If your items have keys that contain periods, e.g., `"first.name"`, but * you don't want these names to be treated as paths to nested keys, simply * wrap the name in an array, like `{ keys: ["ssn", ["first.name"], * ["last.name"]] }`. * * Instead of a string or string array, an item in `keys` can also be passed * as a `{name, scorer}` object, which lets you specify a different scoring * function for each key. The scoring function should behave as described * next. * * @param {string} [options.sortKey=options.keys[0]] An optional key name * that will be used to sort items with identical scores. Defaults to the * name of the first item in the `keys` parameter. If `sortKey` points to * a nested key, use a dot-delimited string instead of an array to specify * the path. * * @param {number} [options.minimumScore=0] An optional value that * specifies the minimum score an item must have to appear in the results * returned from [search()]{@link QuickScore#search}. Defaults to `0`, * so items that don't match the full `query` will not be returned. This * value is ignored if the `query` is empty or undefined, in which case all * items are returned, sorted alphabetically and case-insensitively on the * `sortKey`, if any. * * @param {TransformStringFunction} [options.transformString] An optional * function that takes a `string` parameter and returns a transformed * version of that string. This function will be called on each of the * searchable keys in the `items` array as well as on the `query` * parameter to the `search()` method. The default function calls * `toLocaleLowerCase()` on each string, for a case-insensitive search. The * result of this function is cached for each searchable key on each item. * * You can pass a function here to do other kinds of preprocessing, such as * removing diacritics from all the strings or converting Chinese characters * to pinyin. For example, you could use the * [`latinize`](https://www.npmjs.com/package/latinize) npm package to * convert characters with diacritics to the base character so that your * users can type an unaccented character in the query while still matching * items that have accents or diacritics. Pass in an `options` object like * this to use a custom `transformString()` function: * `{ transformString: s => latinize(s.toLocaleLowerCase()) }` * * @param {ScorerFunction} [options.scorer] An optional function that takes * `string` and `query` parameters and returns a floating point number * between 0 and 1 that represents how well the `query` matches the * `string`. It defaults to the [quickScore()]{@link quickScore} function * in this library. * * If the function gets a third `matches` parameter, it should fill the * passed-in array with indexes corresponding to where the query * matches the string, as described in the [search()]{@link QuickScore#search} * method. * * @param {Config} [options.config] An optional object that is passed to * the scorer function to further customize its behavior. If the * `scorer` function has a `createConfig()` method on it, the `QuickScore` * instance will call that with the `config` value and store the result. * This can be used to extend the `config` parameter with default values. */ constructor( items = [], options = {}) { const { scorer = quickScore, transformString = toLocaleLowerCase, keys = [], sortKey = "", minimumScore = 0, config } = Array.isArray(options) ? { keys: options } : options; this.scorer = scorer; this.minimumScore = minimumScore; this.config = config; this.transformStringFunc = transformString; if (typeof scorer.createConfig === "function") { // let the scorer fill out the config with default values this.config = scorer.createConfig(config); } this.setKeys(keys, sortKey); this.setItems(items); // the scoring function needs access to this.sortKey this.compareScoredStrings = this.compareScoredStrings.bind(this); } /** * Scores the instance's items against the `query` and sorts them from * highest to lowest. * * @param {string} query The string to score each item against. The * instance's `transformString()` function is called on this string before * it's matched against each item. * * @returns {Array<ScoredString|ScoredObject>} When the instance's `items` * are flat strings, an array of [`ScoredString`]{@link ScoredString} * objects containing the following properties is returned: * * - `item`: the string that was scored * - `score`: the floating point score of the string for the current query * - `matches`: an array of arrays that specify the character ranges * where the query matched the string * * When the `items` are objects, an array of [`ScoredObject`]{@link ScoredObject} * results is returned: * * - `item`: the object that was scored * - `score`: the highest score from among the individual key scores * - `scoreKey`: the name of the key with the highest score, which will be * an empty string if they're all zero * - `scoreValue`: the value of the key with the highest score, which makes * it easier to access if it's a nested string * - `scores`: a hash of the individual scores for each key * - `matches`: a hash of arrays that specify the character ranges of the * query match for each key * * The results array is sorted high to low on each item's score. Items with * identical scores are sorted alphabetically and case-insensitively on the * `sortKey` option. Items with scores that are <= the `minimumScore` option * (defaults to `0`) are not returned, unless the `query` is falsy, in which * case all of the items are returned, sorted alphabetically. * * The start and end indices in each [`RangeTuple`]{@link RangeTuple} in the * `matches` array can be used as parameters to the `substring()` method to * extract the characters from each string that match the query. This can * then be used to format the matching characters with a different color or * style. * * Each `ScoredObject` item also has a `_` property, which caches transformed * versions of the item's strings, and might contain additional internal * metadata in the future. It can be ignored. */ search( query) { const results = []; const {items, transformedItems, keys: sharedKeys, config} = this; // if the query is empty, we want to return all items, so make the // minimum score less than 0 const minScore = query ? this.minimumScore : -1; const transformedQuery = this.transformString(query); const itemCount = items.length; const sharedKeyCount = sharedKeys.length; if (typeof items[0] === "string") { // items is an array of strings for (let i = 0; i < itemCount; i++) { const item = items[i]; const transformedItem = transformedItems[i]; const matches = []; const score = this.scorer(item, query, matches, transformedItem, transformedQuery, config); if (score > minScore) { results.push({ item, score, matches, _: transformedItem }); } } } else { for (let i = 0; i < itemCount; i++) { const item = items[i]; const transformedItem = transformedItems[i]; const result = { item, score: 0, scoreKey: "", scoreValue: "", scores: {}, matches: {}, _: transformedItem }; // if an empty keys array was passed into the constructor, // score all of the non-empty string keys on the object const keys = sharedKeyCount ? sharedKeys : Object.keys(transformedItem); const keyCount = keys.length; let highScore = 0; let scoreKey = ""; let scoreValue = ""; // find the highest score for each keyed string on this item for (let j = 0; j < keyCount; j++) { const key = keys[j]; // use the key as the name if it's just a string, and // default to the instance's scorer function const {name = key, scorer = this.scorer} = key; const transformedString = transformedItem[name]; // setItems() checks for non-strings and empty strings // when creating the transformed objects, so if the key // doesn't exist there, we can skip the processing // below for this key in this item if (transformedString) { const string = this.getItemString(item, key); const matches = []; const newScore = scorer(string, query, matches, transformedString, transformedQuery, config); result.scores[name] = newScore; result.matches[name] = matches; if (newScore > highScore) { highScore = newScore; scoreKey = name; scoreValue = string; } } } if (highScore > minScore) { result.score = highScore; result.scoreKey = scoreKey; result.scoreValue = scoreValue; results.push(result); } } } results.sort(this.compareScoredStrings); return results; } /** * Sets the `keys` configuration. `setItems()` must be called after * changing the keys so that the items' transformed strings get cached. * * @param {Array<ItemKey>} keys List of keys to score, as either strings * or `{name, scorer}` objects. * * @param {string} [sortKey=keys[0]] Name of key on which to sort * identically scored items. Defaults to the first `keys` item. */ setKeys( keys, sortKey) { // create a shallow copy of the keys array so that changes to its // order outside of this instance won't affect searching this.keys = keys.slice(); this.sortKey = sortKey; if (this.keys.length) { const {scorer} = this; // associate each key with the scorer function, if it isn't already this.keys = this.keys.map(itemKey => { // items in the keys array should either be a string or // array specifying a key name, or a { name, scorer } object const key = itemKey.length ? { name: itemKey, scorer } : itemKey; if (Array.isArray(key.name)) { if (key.name.length > 1) { key.path = key.name; key.name = key.path.join("."); } else { // this path consists of just one key name, which was // probably wrapped in an array because it contains // dots but isn't intended as a key path. so don't // create a path array on this key, so that we're not // constantly calling reduce() to get this one key. [key.name] = key.name; } } else if (key.name.indexOf(".") > -1) { key.path = key.name.split("."); } return key; }); this.sortKey = this.sortKey || this.keys[0].name; } } /** * Sets the `items` array and caches a transformed copy of all the item * strings specified by the `keys` parameter to the constructor, using the * `transformString` option (which defaults to `toLocaleLowerCase()`). * * @param {Array<string|object>} items List of items to score. */ setItems( items) { // create a shallow copy of the items array so that changes to its // order outside of this instance won't affect searching const itemArray = items.slice(); const itemCount = itemArray.length; const transformedItems = []; const sharedKeys = this.keys; const sharedKeyCount = sharedKeys.length; if (typeof itemArray[0] === "string") { for (let i = 0; i < itemCount; i++) { transformedItems.push(this.transformString(itemArray[i])); } } else { for (let i = 0; i < itemCount; i++) { const item = itemArray[i]; const transformedItem = {}; const keys = sharedKeyCount ? sharedKeys : Object.keys(item); const keyCount = keys.length; for (let j = 0; j < keyCount; j++) { const key = keys[j]; const string = this.getItemString(item, key); if (string && typeof string === "string") { transformedItem[key.name || key] = this.transformString(string); } } transformedItems.push(transformedItem); } } this.items = itemArray; this.transformedItems = transformedItems; } /** * Gets an item's key, possibly at a nested path. * * @private * @param {object} item An object with multiple string properties. * @param {object|string} key A key object with * the name of the string to get from `item`, or a plain string when all * keys on an item are being matched. * @returns {string} */ getItemString( item, key) { const {name, path} = key; if (path) { return path.reduce((value, prop) => value && value[prop], item); } else { // if this instance is scoring all the keys on each item, key // will just be a string, not a { name, scorer } object return item[name || key]; } } /** * Transforms a string into a canonical form for scoring. * * @private * @param {string} string The string to transform. * @returns {string} */ transformString( string) { return this.transformStringFunc(string); } /** * Compares two items based on their scores, or on their `sortKey` if the * scores are identical. * * @private * @param {object} a First item. * @param {object} b Second item. * @returns {number} */ compareScoredStrings( a, b) { // use the transformed versions of the strings for sorting const itemA = a._; const itemB = b._; const itemAString = typeof itemA === "string" ? itemA : itemA[this.sortKey]; const itemBString = typeof itemB === "string" ? itemB : itemB[this.sortKey]; if (a.score === b.score) { // sort undefineds to the end of the array, as per the ES spec if (itemAString === undefined || itemBString === undefined) { if (itemAString === undefined && itemBString === undefined) { return 0; } else if (itemAString === undefined) { return 1; } else { return -1; } } else if (itemAString === itemBString) { return 0; } else if (itemAString < itemBString) { return -1; } else { return 1; } } else { return b.score - a.score; } } } /** * Default function for transforming each string to be searched. * * @private * @param {string} string The string to transform. * @returns {string} The transformed string. */ function toLocaleLowerCase( string) { return string.toLocaleLowerCase(); } export { BaseConfig, DefaultConfig, QuickScore, QuicksilverConfig, Range, createConfig, quickScore };