kelvinnnotkevinn-match-sorter
Version:
Simple, expected, and deterministic best-match sorting of an array in JavaScript
438 lines (412 loc) • 13.3 kB
JavaScript
import removeAccents from 'remove-accents';
/**
* @name match-sorter
* @license MIT license.
* @copyright (c) 2020 Kent C. Dodds
* @author Kent C. Dodds <me@kentcdodds.com> (https://kentcdodds.com)
*/
const rankings = {
CASE_SENSITIVE_EQUAL: 7,
EQUAL: 6,
STARTS_WITH: 5,
WORD_STARTS_WITH: 4,
CONTAINS: 3,
ACRONYM: 2,
MATCHES: 1,
NO_MATCH: 0
};
const defaultBaseSortFn = (a, b) => String(a.rankedValue).localeCompare(String(b.rankedValue));
/**
* Takes an array of items and a value and returns a new array with the items that match the given value
* @param {Array} items - the items to sort
* @param {String} value - the value to use for ranking
* @param {Object} options - Some options to configure the sorter
* @return {Array} - the new sorted array
*/
function matchSorter(items, value, options = {}) {
const {
keys,
threshold = rankings.MATCHES,
baseSort = defaultBaseSortFn,
sorter = matchedItems => matchedItems.sort((a, b) => sortRankedValues(a, b, baseSort))
} = options;
const matchedItems = items.reduce(reduceItemsToRanked, []);
return sorter(matchedItems).map(({
item
}) => item);
function reduceItemsToRanked(matches, item, index) {
const rankingInfo = getHighestRanking(item, keys, value, options);
const {
rank,
keyThreshold = threshold
} = rankingInfo;
if (rank >= keyThreshold) {
matches.push({
...rankingInfo,
item,
index
});
}
return matches;
}
}
matchSorter.rankings = rankings;
/**
* Gets the highest ranking for value for the given item based on its values for the given keys
* @param {*} item - the item to rank
* @param {Array} keys - the keys to get values from the item for the ranking
* @param {String} value - the value to rank against
* @param {Object} options - options to control the ranking
* @return {{rank: Number, keyIndex: Number, keyThreshold: Number}} - the highest ranking
*/
function getHighestRanking(item, keys, value, options) {
if (!keys) {
// if keys is not specified, then we assume the item given is ready to be matched
const stringItem = item;
return {
// ends up being duplicate of 'item' in matches but consistent
rankedValue: stringItem,
rank: getMatchRanking(stringItem, value, options),
keyIndex: -1,
keyThreshold: options.threshold
};
}
const valuesToRank = getAllValuesToRank(item, keys);
return valuesToRank.reduce(({
rank,
rankedValue,
keyIndex,
keyThreshold
}, {
itemValue,
attributes
}, i) => {
let newRank = getMatchRanking(itemValue, value, options);
let newRankedValue = rankedValue;
const {
minRanking,
maxRanking,
threshold
} = attributes;
if (newRank < minRanking && newRank >= rankings.MATCHES) {
newRank = minRanking;
} else if (newRank > maxRanking) {
newRank = maxRanking;
}
if (newRank > rank) {
rank = newRank;
keyIndex = i;
keyThreshold = threshold;
newRankedValue = itemValue;
}
return {
rankedValue: newRankedValue,
rank,
keyIndex,
keyThreshold
};
}, {
rankedValue: item,
rank: rankings.NO_MATCH,
keyIndex: -1,
keyThreshold: options.threshold
});
}
function* indexesOf(testString, stringToRank) {
let index = -1;
while ((index = testString.indexOf(stringToRank, index + 1)) > -1) {
yield index;
}
return -1;
}
/**
* Gives a rankings score based on how well the two strings match.
* @param {String} testString - the string to test against
* @param {String} stringToRank - the string to rank
* @param {Object} options - options for the match (like keepDiacritics for comparison)
* @returns {Number} the ranking for how well stringToRank matches testString
*/
function getMatchRanking(testString, stringToRank, options) {
testString = prepareValueForComparison(testString, options);
stringToRank = prepareValueForComparison(stringToRank, options);
// too long
if (stringToRank.length > testString.length) {
return rankings.NO_MATCH;
}
// case sensitive equals
if (testString === stringToRank) {
return rankings.CASE_SENSITIVE_EQUAL;
}
// Lower casing before further comparison
testString = testString.toLowerCase();
stringToRank = stringToRank.toLowerCase();
// Use indexOf to check for equality/includes
const indexesOfStringToRankInTestString = indexesOf(testString, stringToRank);
const firstIndexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next();
const indexOfStringToRankInTestString = firstIndexOfStringToRankInTestStringResult.value;
// case insensitive equals
if (testString.length === stringToRank.length && indexOfStringToRankInTestString === 0) {
return rankings.EQUAL;
}
// starts with
if (indexOfStringToRankInTestString === 0) {
return rankings.STARTS_WITH;
}
// word starts with
let indexOfStringToRankInTestStringResult = firstIndexOfStringToRankInTestStringResult;
while (!indexOfStringToRankInTestStringResult.done) {
if (indexOfStringToRankInTestStringResult.value > 0 && testString[indexOfStringToRankInTestStringResult.value - 1] === ' ') {
return rankings.WORD_STARTS_WITH;
}
indexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next();
}
// contains
if (indexOfStringToRankInTestString > 0) {
return rankings.CONTAINS;
} else if (stringToRank.length === 1) {
// If the only character in the given stringToRank
// isn't even contained in the testString, then
// it's definitely not a match.
return rankings.NO_MATCH;
}
// acronym
if (getAcronym(testString).includes(stringToRank)) {
return rankings.ACRONYM;
}
// will return a number between rankings.MATCHES and
// rankings.MATCHES + 1 depending on how close of a match it is.
return getClosenessRanking(testString, stringToRank);
}
/**
* Generates an acronym for a string.
*
* Segment starts ︱ at the beginning of the phrase, after a **space**, or after a **hyphen**.
* We capture the first non-delimiter character of every segment and skip runs of delimiters.
*
* @example
* getAcronym('The Tail-spin Test') // → "TTsT"
* getAcronym('edge-case') // → "ec"
* getAcronym('multiple spaces') // → "ms"
*
* @param {String} string the string for which to produce the acronym
* @returns {String} the acronym
*/
function getAcronym(string) {
let acronym = '';
let prev = ' '; // virtual delimiter so the very first char qualifies
for (let i = 0; i < string.length; i++) {
const ch = string.charAt(i);
const prevWasDelimiter = prev === ' ' || prev === '-';
const currIsDelimiter = ch === ' ' || ch === '-';
if (prevWasDelimiter && !currIsDelimiter) {
acronym += ch;
}
prev = ch;
}
return acronym;
}
/**
* Returns a score based on how spread apart the
* characters from the stringToRank are within the testString.
* A number close to rankings.MATCHES represents a loose match. A number close
* to rankings.MATCHES + 1 represents a tighter match.
* @param {String} testString - the string to test against
* @param {String} stringToRank - the string to rank
* @returns {Number} the number between rankings.MATCHES and
* rankings.MATCHES + 1 for how well stringToRank matches testString
*/
function getClosenessRanking(testString, stringToRank) {
let matchingInOrderCharCount = 0;
let charNumber = 0;
function findMatchingCharacter(matchChar, string, index) {
for (let j = index, J = string.length; j < J; j++) {
const stringChar = string[j];
if (stringChar === matchChar) {
matchingInOrderCharCount += 1;
return j + 1;
}
}
return -1;
}
function getRanking(spread) {
const spreadPercentage = 1 / spread;
const inOrderPercentage = matchingInOrderCharCount / stringToRank.length;
const ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage;
return ranking;
}
const firstIndex = findMatchingCharacter(stringToRank[0], testString, 0);
if (firstIndex < 0) {
return rankings.NO_MATCH;
}
charNumber = firstIndex;
for (let i = 1, I = stringToRank.length; i < I; i++) {
const matchChar = stringToRank[i];
charNumber = findMatchingCharacter(matchChar, testString, charNumber);
const found = charNumber > -1;
if (!found) {
return rankings.NO_MATCH;
}
}
const spread = charNumber - firstIndex;
return getRanking(spread);
}
/**
* Sorts items that have a rank, index, and keyIndex
* @param {Object} a - the first item to sort
* @param {Object} b - the second item to sort
* @return {Number} -1 if a should come first, 1 if b should come first, 0 if equal
*/
function sortRankedValues(a, b, baseSort) {
const aFirst = -1;
const bFirst = 1;
const {
rank: aRank,
keyIndex: aKeyIndex
} = a;
const {
rank: bRank,
keyIndex: bKeyIndex
} = b;
const same = aRank === bRank;
if (same) {
if (aKeyIndex === bKeyIndex) {
// use the base sort function as a tie-breaker
return baseSort(a, b);
} else {
return aKeyIndex < bKeyIndex ? aFirst : bFirst;
}
} else {
return aRank > bRank ? aFirst : bFirst;
}
}
/**
* Prepares value for comparison by stringifying it, removing diacritics (if specified)
* @param {String} value - the value to clean
* @param {Object} options - {keepDiacritics: whether to remove diacritics}
* @return {String} the prepared value
*/
function prepareValueForComparison(value, {
keepDiacritics
}) {
// value might not actually be a string at this point (we don't get to choose)
// so part of preparing the value for comparison is ensure that it is a string
value = `${value}`; // toString
if (!keepDiacritics) {
value = removeAccents(value);
}
return value;
}
/**
* Gets value for key in item at arbitrarily nested keypath
* @param {Object} item - the item
* @param {Object|Function} key - the potentially nested keypath or property callback
* @return {Array} - an array containing the value(s) at the nested keypath
*/
function getItemValues(item, key) {
if (typeof key === 'object') {
key = key.key;
}
let value;
if (typeof key === 'function') {
value = key(item);
} else if (item == null) {
value = null;
} else if (Object.hasOwnProperty.call(item, key)) {
value = item[key];
} else if (key.includes('.')) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
return getNestedValues(key, item);
} else {
value = null;
}
// because `value` can also be undefined
if (value == null) {
return [];
}
if (Array.isArray(value)) {
return value;
}
return [String(value)];
}
/**
* Given path: "foo.bar.baz"
* And item: {foo: {bar: {baz: 'buzz'}}}
* -> 'buzz'
* @param path a dot-separated set of keys
* @param item the item to get the value from
*/
function getNestedValues(path, item) {
const keys = path.split('.');
let values = [item];
for (let i = 0, I = keys.length; i < I; i++) {
const nestedKey = keys[i];
let nestedValues = [];
for (let j = 0, J = values.length; j < J; j++) {
const nestedItem = values[j];
if (nestedItem == null) continue;
if (Object.hasOwnProperty.call(nestedItem, nestedKey)) {
const nestedValue = nestedItem[nestedKey];
if (nestedValue != null) {
nestedValues.push(nestedValue);
}
} else if (nestedKey === '*') {
// ensure that values is an array
nestedValues = nestedValues.concat(nestedItem);
}
}
values = nestedValues;
}
if (Array.isArray(values[0])) {
// keep allowing the implicit wildcard for an array of strings at the end of
// the path; don't use `.flat()` because that's not available in node.js v10
const result = [];
return result.concat(...values);
}
// Based on our logic it should be an array of strings by now...
// assuming the user's path terminated in strings
return values;
}
/**
* Gets all the values for the given keys in the given item and returns an array of those values
* @param item - the item from which the values will be retrieved
* @param keys - the keys to use to retrieve the values
* @return objects with {itemValue, attributes}
*/
function getAllValuesToRank(item, keys) {
const allValues = [];
for (let j = 0, J = keys.length; j < J; j++) {
const key = keys[j];
const attributes = getKeyAttributes(key);
const itemValues = getItemValues(item, key);
for (let i = 0, I = itemValues.length; i < I; i++) {
allValues.push({
itemValue: itemValues[i],
attributes
});
}
}
return allValues;
}
const defaultKeyAttributes = {
maxRanking: Infinity,
minRanking: -Infinity
};
/**
* Gets all the attributes for the given key
* @param key - the key from which the attributes will be retrieved
* @return object containing the key's attributes
*/
function getKeyAttributes(key) {
if (typeof key === 'string') {
return defaultKeyAttributes;
}
return {
...defaultKeyAttributes,
...key
};
}
/*
eslint
no-continue: "off",
*/
export { defaultBaseSortFn, matchSorter, rankings };