UNPKG

kelvinnnotkevinn-match-sorter

Version:

Simple, expected, and deterministic best-match sorting of an array in JavaScript

438 lines (412 loc) 13.3 kB
import removeAccents from 'remove-accents'; /** * @name match-sorter * @license MIT license. * @copyright (c) 2020 Kent C. Dodds * @author Kent C. Dodds <me@kentcdodds.com> (https://kentcdodds.com) */ const rankings = { CASE_SENSITIVE_EQUAL: 7, EQUAL: 6, STARTS_WITH: 5, WORD_STARTS_WITH: 4, CONTAINS: 3, ACRONYM: 2, MATCHES: 1, NO_MATCH: 0 }; const defaultBaseSortFn = (a, b) => String(a.rankedValue).localeCompare(String(b.rankedValue)); /** * Takes an array of items and a value and returns a new array with the items that match the given value * @param {Array} items - the items to sort * @param {String} value - the value to use for ranking * @param {Object} options - Some options to configure the sorter * @return {Array} - the new sorted array */ function matchSorter(items, value, options = {}) { const { keys, threshold = rankings.MATCHES, baseSort = defaultBaseSortFn, sorter = matchedItems => matchedItems.sort((a, b) => sortRankedValues(a, b, baseSort)) } = options; const matchedItems = items.reduce(reduceItemsToRanked, []); return sorter(matchedItems).map(({ item }) => item); function reduceItemsToRanked(matches, item, index) { const rankingInfo = getHighestRanking(item, keys, value, options); const { rank, keyThreshold = threshold } = rankingInfo; if (rank >= keyThreshold) { matches.push({ ...rankingInfo, item, index }); } return matches; } } matchSorter.rankings = rankings; /** * Gets the highest ranking for value for the given item based on its values for the given keys * @param {*} item - the item to rank * @param {Array} keys - the keys to get values from the item for the ranking * @param {String} value - the value to rank against * @param {Object} options - options to control the ranking * @return {{rank: Number, keyIndex: Number, keyThreshold: Number}} - the highest ranking */ function getHighestRanking(item, keys, value, options) { if (!keys) { // if keys is not specified, then we assume the item given is ready to be matched const stringItem = item; return { // ends up being duplicate of 'item' in matches but consistent rankedValue: stringItem, rank: getMatchRanking(stringItem, value, options), keyIndex: -1, keyThreshold: options.threshold }; } const valuesToRank = getAllValuesToRank(item, keys); return valuesToRank.reduce(({ rank, rankedValue, keyIndex, keyThreshold }, { itemValue, attributes }, i) => { let newRank = getMatchRanking(itemValue, value, options); let newRankedValue = rankedValue; const { minRanking, maxRanking, threshold } = attributes; if (newRank < minRanking && newRank >= rankings.MATCHES) { newRank = minRanking; } else if (newRank > maxRanking) { newRank = maxRanking; } if (newRank > rank) { rank = newRank; keyIndex = i; keyThreshold = threshold; newRankedValue = itemValue; } return { rankedValue: newRankedValue, rank, keyIndex, keyThreshold }; }, { rankedValue: item, rank: rankings.NO_MATCH, keyIndex: -1, keyThreshold: options.threshold }); } function* indexesOf(testString, stringToRank) { let index = -1; while ((index = testString.indexOf(stringToRank, index + 1)) > -1) { yield index; } return -1; } /** * Gives a rankings score based on how well the two strings match. * @param {String} testString - the string to test against * @param {String} stringToRank - the string to rank * @param {Object} options - options for the match (like keepDiacritics for comparison) * @returns {Number} the ranking for how well stringToRank matches testString */ function getMatchRanking(testString, stringToRank, options) { testString = prepareValueForComparison(testString, options); stringToRank = prepareValueForComparison(stringToRank, options); // too long if (stringToRank.length > testString.length) { return rankings.NO_MATCH; } // case sensitive equals if (testString === stringToRank) { return rankings.CASE_SENSITIVE_EQUAL; } // Lower casing before further comparison testString = testString.toLowerCase(); stringToRank = stringToRank.toLowerCase(); // Use indexOf to check for equality/includes const indexesOfStringToRankInTestString = indexesOf(testString, stringToRank); const firstIndexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next(); const indexOfStringToRankInTestString = firstIndexOfStringToRankInTestStringResult.value; // case insensitive equals if (testString.length === stringToRank.length && indexOfStringToRankInTestString === 0) { return rankings.EQUAL; } // starts with if (indexOfStringToRankInTestString === 0) { return rankings.STARTS_WITH; } // word starts with let indexOfStringToRankInTestStringResult = firstIndexOfStringToRankInTestStringResult; while (!indexOfStringToRankInTestStringResult.done) { if (indexOfStringToRankInTestStringResult.value > 0 && testString[indexOfStringToRankInTestStringResult.value - 1] === ' ') { return rankings.WORD_STARTS_WITH; } indexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next(); } // contains if (indexOfStringToRankInTestString > 0) { return rankings.CONTAINS; } else if (stringToRank.length === 1) { // If the only character in the given stringToRank // isn't even contained in the testString, then // it's definitely not a match. return rankings.NO_MATCH; } // acronym if (getAcronym(testString).includes(stringToRank)) { return rankings.ACRONYM; } // will return a number between rankings.MATCHES and // rankings.MATCHES + 1 depending on how close of a match it is. return getClosenessRanking(testString, stringToRank); } /** * Generates an acronym for a string. * * Segment starts ︱ at the beginning of the phrase, after a **space**, or after a **hyphen**. * We capture the first non-delimiter character of every segment and skip runs of delimiters. * * @example * getAcronym('The Tail-spin Test') // → "TTsT" * getAcronym('edge-case') // → "ec" * getAcronym('multiple spaces') // → "ms" * * @param {String} string the string for which to produce the acronym * @returns {String} the acronym */ function getAcronym(string) { let acronym = ''; let prev = ' '; // virtual delimiter so the very first char qualifies for (let i = 0; i < string.length; i++) { const ch = string.charAt(i); const prevWasDelimiter = prev === ' ' || prev === '-'; const currIsDelimiter = ch === ' ' || ch === '-'; if (prevWasDelimiter && !currIsDelimiter) { acronym += ch; } prev = ch; } return acronym; } /** * Returns a score based on how spread apart the * characters from the stringToRank are within the testString. * A number close to rankings.MATCHES represents a loose match. A number close * to rankings.MATCHES + 1 represents a tighter match. * @param {String} testString - the string to test against * @param {String} stringToRank - the string to rank * @returns {Number} the number between rankings.MATCHES and * rankings.MATCHES + 1 for how well stringToRank matches testString */ function getClosenessRanking(testString, stringToRank) { let matchingInOrderCharCount = 0; let charNumber = 0; function findMatchingCharacter(matchChar, string, index) { for (let j = index, J = string.length; j < J; j++) { const stringChar = string[j]; if (stringChar === matchChar) { matchingInOrderCharCount += 1; return j + 1; } } return -1; } function getRanking(spread) { const spreadPercentage = 1 / spread; const inOrderPercentage = matchingInOrderCharCount / stringToRank.length; const ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage; return ranking; } const firstIndex = findMatchingCharacter(stringToRank[0], testString, 0); if (firstIndex < 0) { return rankings.NO_MATCH; } charNumber = firstIndex; for (let i = 1, I = stringToRank.length; i < I; i++) { const matchChar = stringToRank[i]; charNumber = findMatchingCharacter(matchChar, testString, charNumber); const found = charNumber > -1; if (!found) { return rankings.NO_MATCH; } } const spread = charNumber - firstIndex; return getRanking(spread); } /** * Sorts items that have a rank, index, and keyIndex * @param {Object} a - the first item to sort * @param {Object} b - the second item to sort * @return {Number} -1 if a should come first, 1 if b should come first, 0 if equal */ function sortRankedValues(a, b, baseSort) { const aFirst = -1; const bFirst = 1; const { rank: aRank, keyIndex: aKeyIndex } = a; const { rank: bRank, keyIndex: bKeyIndex } = b; const same = aRank === bRank; if (same) { if (aKeyIndex === bKeyIndex) { // use the base sort function as a tie-breaker return baseSort(a, b); } else { return aKeyIndex < bKeyIndex ? aFirst : bFirst; } } else { return aRank > bRank ? aFirst : bFirst; } } /** * Prepares value for comparison by stringifying it, removing diacritics (if specified) * @param {String} value - the value to clean * @param {Object} options - {keepDiacritics: whether to remove diacritics} * @return {String} the prepared value */ function prepareValueForComparison(value, { keepDiacritics }) { // value might not actually be a string at this point (we don't get to choose) // so part of preparing the value for comparison is ensure that it is a string value = `${value}`; // toString if (!keepDiacritics) { value = removeAccents(value); } return value; } /** * Gets value for key in item at arbitrarily nested keypath * @param {Object} item - the item * @param {Object|Function} key - the potentially nested keypath or property callback * @return {Array} - an array containing the value(s) at the nested keypath */ function getItemValues(item, key) { if (typeof key === 'object') { key = key.key; } let value; if (typeof key === 'function') { value = key(item); } else if (item == null) { value = null; } else if (Object.hasOwnProperty.call(item, key)) { value = item[key]; } else if (key.includes('.')) { // eslint-disable-next-line @typescript-eslint/no-unsafe-call return getNestedValues(key, item); } else { value = null; } // because `value` can also be undefined if (value == null) { return []; } if (Array.isArray(value)) { return value; } return [String(value)]; } /** * Given path: "foo.bar.baz" * And item: {foo: {bar: {baz: 'buzz'}}} * -> 'buzz' * @param path a dot-separated set of keys * @param item the item to get the value from */ function getNestedValues(path, item) { const keys = path.split('.'); let values = [item]; for (let i = 0, I = keys.length; i < I; i++) { const nestedKey = keys[i]; let nestedValues = []; for (let j = 0, J = values.length; j < J; j++) { const nestedItem = values[j]; if (nestedItem == null) continue; if (Object.hasOwnProperty.call(nestedItem, nestedKey)) { const nestedValue = nestedItem[nestedKey]; if (nestedValue != null) { nestedValues.push(nestedValue); } } else if (nestedKey === '*') { // ensure that values is an array nestedValues = nestedValues.concat(nestedItem); } } values = nestedValues; } if (Array.isArray(values[0])) { // keep allowing the implicit wildcard for an array of strings at the end of // the path; don't use `.flat()` because that's not available in node.js v10 const result = []; return result.concat(...values); } // Based on our logic it should be an array of strings by now... // assuming the user's path terminated in strings return values; } /** * Gets all the values for the given keys in the given item and returns an array of those values * @param item - the item from which the values will be retrieved * @param keys - the keys to use to retrieve the values * @return objects with {itemValue, attributes} */ function getAllValuesToRank(item, keys) { const allValues = []; for (let j = 0, J = keys.length; j < J; j++) { const key = keys[j]; const attributes = getKeyAttributes(key); const itemValues = getItemValues(item, key); for (let i = 0, I = itemValues.length; i < I; i++) { allValues.push({ itemValue: itemValues[i], attributes }); } } return allValues; } const defaultKeyAttributes = { maxRanking: Infinity, minRanking: -Infinity }; /** * Gets all the attributes for the given key * @param key - the key from which the attributes will be retrieved * @return object containing the key's attributes */ function getKeyAttributes(key) { if (typeof key === 'string') { return defaultKeyAttributes; } return { ...defaultKeyAttributes, ...key }; } /* eslint no-continue: "off", */ export { defaultBaseSortFn, matchSorter, rankings };