kelvinnnotkevinn-match-sorter
Version:
Simple, expected, and deterministic best-match sorting of an array in JavaScript
932 lines (902 loc) • 21.6 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.matchSorter = {}));
})(this, (function (exports) { 'use strict';
function getDefaultExportFromCjs (x) {
return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
}
var removeAccents$2 = {exports: {}};
var characterMap = {
"À": "A",
"Á": "A",
"Â": "A",
"Ã": "A",
"Ä": "A",
"Å": "A",
"Ấ": "A",
"Ắ": "A",
"Ẳ": "A",
"Ẵ": "A",
"Ặ": "A",
"Æ": "AE",
"Ầ": "A",
"Ằ": "A",
"Ȃ": "A",
"Ả": "A",
"Ạ": "A",
"Ẩ": "A",
"Ẫ": "A",
"Ậ": "A",
"Ç": "C",
"Ḉ": "C",
"È": "E",
"É": "E",
"Ê": "E",
"Ë": "E",
"Ế": "E",
"Ḗ": "E",
"Ề": "E",
"Ḕ": "E",
"Ḝ": "E",
"Ȇ": "E",
"Ẻ": "E",
"Ẽ": "E",
"Ẹ": "E",
"Ể": "E",
"Ễ": "E",
"Ệ": "E",
"Ì": "I",
"Í": "I",
"Î": "I",
"Ï": "I",
"Ḯ": "I",
"Ȋ": "I",
"Ỉ": "I",
"Ị": "I",
"Ð": "D",
"Ñ": "N",
"Ò": "O",
"Ó": "O",
"Ô": "O",
"Õ": "O",
"Ö": "O",
"Ø": "O",
"Ố": "O",
"Ṍ": "O",
"Ṓ": "O",
"Ȏ": "O",
"Ỏ": "O",
"Ọ": "O",
"Ổ": "O",
"Ỗ": "O",
"Ộ": "O",
"Ờ": "O",
"Ở": "O",
"Ỡ": "O",
"Ớ": "O",
"Ợ": "O",
"Ù": "U",
"Ú": "U",
"Û": "U",
"Ü": "U",
"Ủ": "U",
"Ụ": "U",
"Ử": "U",
"Ữ": "U",
"Ự": "U",
"Ý": "Y",
"à": "a",
"á": "a",
"â": "a",
"ã": "a",
"ä": "a",
"å": "a",
"ấ": "a",
"ắ": "a",
"ẳ": "a",
"ẵ": "a",
"ặ": "a",
"æ": "ae",
"ầ": "a",
"ằ": "a",
"ȃ": "a",
"ả": "a",
"ạ": "a",
"ẩ": "a",
"ẫ": "a",
"ậ": "a",
"ç": "c",
"ḉ": "c",
"è": "e",
"é": "e",
"ê": "e",
"ë": "e",
"ế": "e",
"ḗ": "e",
"ề": "e",
"ḕ": "e",
"ḝ": "e",
"ȇ": "e",
"ẻ": "e",
"ẽ": "e",
"ẹ": "e",
"ể": "e",
"ễ": "e",
"ệ": "e",
"ì": "i",
"í": "i",
"î": "i",
"ï": "i",
"ḯ": "i",
"ȋ": "i",
"ỉ": "i",
"ị": "i",
"ð": "d",
"ñ": "n",
"ò": "o",
"ó": "o",
"ô": "o",
"õ": "o",
"ö": "o",
"ø": "o",
"ố": "o",
"ṍ": "o",
"ṓ": "o",
"ȏ": "o",
"ỏ": "o",
"ọ": "o",
"ổ": "o",
"ỗ": "o",
"ộ": "o",
"ờ": "o",
"ở": "o",
"ỡ": "o",
"ớ": "o",
"ợ": "o",
"ù": "u",
"ú": "u",
"û": "u",
"ü": "u",
"ủ": "u",
"ụ": "u",
"ử": "u",
"ữ": "u",
"ự": "u",
"ý": "y",
"ÿ": "y",
"Ā": "A",
"ā": "a",
"Ă": "A",
"ă": "a",
"Ą": "A",
"ą": "a",
"Ć": "C",
"ć": "c",
"Ĉ": "C",
"ĉ": "c",
"Ċ": "C",
"ċ": "c",
"Č": "C",
"č": "c",
"C̆": "C",
"c̆": "c",
"Ď": "D",
"ď": "d",
"Đ": "D",
"đ": "d",
"Ē": "E",
"ē": "e",
"Ĕ": "E",
"ĕ": "e",
"Ė": "E",
"ė": "e",
"Ę": "E",
"ę": "e",
"Ě": "E",
"ě": "e",
"Ĝ": "G",
"Ǵ": "G",
"ĝ": "g",
"ǵ": "g",
"Ğ": "G",
"ğ": "g",
"Ġ": "G",
"ġ": "g",
"Ģ": "G",
"ģ": "g",
"Ĥ": "H",
"ĥ": "h",
"Ħ": "H",
"ħ": "h",
"Ḫ": "H",
"ḫ": "h",
"Ĩ": "I",
"ĩ": "i",
"Ī": "I",
"ī": "i",
"Ĭ": "I",
"ĭ": "i",
"Į": "I",
"į": "i",
"İ": "I",
"ı": "i",
"IJ": "IJ",
"ij": "ij",
"Ĵ": "J",
"ĵ": "j",
"Ķ": "K",
"ķ": "k",
"Ḱ": "K",
"ḱ": "k",
"K̆": "K",
"k̆": "k",
"Ĺ": "L",
"ĺ": "l",
"Ļ": "L",
"ļ": "l",
"Ľ": "L",
"ľ": "l",
"Ŀ": "L",
"ŀ": "l",
"Ł": "l",
"ł": "l",
"Ḿ": "M",
"ḿ": "m",
"M̆": "M",
"m̆": "m",
"Ń": "N",
"ń": "n",
"Ņ": "N",
"ņ": "n",
"Ň": "N",
"ň": "n",
"ʼn": "n",
"N̆": "N",
"n̆": "n",
"Ō": "O",
"ō": "o",
"Ŏ": "O",
"ŏ": "o",
"Ő": "O",
"ő": "o",
"Œ": "OE",
"œ": "oe",
"P̆": "P",
"p̆": "p",
"Ŕ": "R",
"ŕ": "r",
"Ŗ": "R",
"ŗ": "r",
"Ř": "R",
"ř": "r",
"R̆": "R",
"r̆": "r",
"Ȓ": "R",
"ȓ": "r",
"Ś": "S",
"ś": "s",
"Ŝ": "S",
"ŝ": "s",
"Ş": "S",
"Ș": "S",
"ș": "s",
"ş": "s",
"Š": "S",
"š": "s",
"Ţ": "T",
"ţ": "t",
"ț": "t",
"Ț": "T",
"Ť": "T",
"ť": "t",
"Ŧ": "T",
"ŧ": "t",
"T̆": "T",
"t̆": "t",
"Ũ": "U",
"ũ": "u",
"Ū": "U",
"ū": "u",
"Ŭ": "U",
"ŭ": "u",
"Ů": "U",
"ů": "u",
"Ű": "U",
"ű": "u",
"Ų": "U",
"ų": "u",
"Ȗ": "U",
"ȗ": "u",
"V̆": "V",
"v̆": "v",
"Ŵ": "W",
"ŵ": "w",
"Ẃ": "W",
"ẃ": "w",
"X̆": "X",
"x̆": "x",
"Ŷ": "Y",
"ŷ": "y",
"Ÿ": "Y",
"Y̆": "Y",
"y̆": "y",
"Ź": "Z",
"ź": "z",
"Ż": "Z",
"ż": "z",
"Ž": "Z",
"ž": "z",
"ſ": "s",
"ƒ": "f",
"Ơ": "O",
"ơ": "o",
"Ư": "U",
"ư": "u",
"Ǎ": "A",
"ǎ": "a",
"Ǐ": "I",
"ǐ": "i",
"Ǒ": "O",
"ǒ": "o",
"Ǔ": "U",
"ǔ": "u",
"Ǖ": "U",
"ǖ": "u",
"Ǘ": "U",
"ǘ": "u",
"Ǚ": "U",
"ǚ": "u",
"Ǜ": "U",
"ǜ": "u",
"Ứ": "U",
"ứ": "u",
"Ṹ": "U",
"ṹ": "u",
"Ǻ": "A",
"ǻ": "a",
"Ǽ": "AE",
"ǽ": "ae",
"Ǿ": "O",
"ǿ": "o",
"Þ": "TH",
"þ": "th",
"Ṕ": "P",
"ṕ": "p",
"Ṥ": "S",
"ṥ": "s",
"X́": "X",
"x́": "x",
"Ѓ": "Г",
"ѓ": "г",
"Ќ": "К",
"ќ": "к",
"A̋": "A",
"a̋": "a",
"E̋": "E",
"e̋": "e",
"I̋": "I",
"i̋": "i",
"Ǹ": "N",
"ǹ": "n",
"Ồ": "O",
"ồ": "o",
"Ṑ": "O",
"ṑ": "o",
"Ừ": "U",
"ừ": "u",
"Ẁ": "W",
"ẁ": "w",
"Ỳ": "Y",
"ỳ": "y",
"Ȁ": "A",
"ȁ": "a",
"Ȅ": "E",
"ȅ": "e",
"Ȉ": "I",
"ȉ": "i",
"Ȍ": "O",
"ȍ": "o",
"Ȑ": "R",
"ȑ": "r",
"Ȕ": "U",
"ȕ": "u",
"B̌": "B",
"b̌": "b",
"Č̣": "C",
"č̣": "c",
"Ê̌": "E",
"ê̌": "e",
"F̌": "F",
"f̌": "f",
"Ǧ": "G",
"ǧ": "g",
"Ȟ": "H",
"ȟ": "h",
"J̌": "J",
"ǰ": "j",
"Ǩ": "K",
"ǩ": "k",
"M̌": "M",
"m̌": "m",
"P̌": "P",
"p̌": "p",
"Q̌": "Q",
"q̌": "q",
"Ř̩": "R",
"ř̩": "r",
"Ṧ": "S",
"ṧ": "s",
"V̌": "V",
"v̌": "v",
"W̌": "W",
"w̌": "w",
"X̌": "X",
"x̌": "x",
"Y̌": "Y",
"y̌": "y",
"A̧": "A",
"a̧": "a",
"B̧": "B",
"b̧": "b",
"Ḑ": "D",
"ḑ": "d",
"Ȩ": "E",
"ȩ": "e",
"Ɛ̧": "E",
"ɛ̧": "e",
"Ḩ": "H",
"ḩ": "h",
"I̧": "I",
"i̧": "i",
"Ɨ̧": "I",
"ɨ̧": "i",
"M̧": "M",
"m̧": "m",
"O̧": "O",
"o̧": "o",
"Q̧": "Q",
"q̧": "q",
"U̧": "U",
"u̧": "u",
"X̧": "X",
"x̧": "x",
"Z̧": "Z",
"z̧": "z",
"й": "и",
"Й": "И",
"ё": "е",
"Ё": "Е"
};
var chars = Object.keys(characterMap).join('|');
var allAccents = new RegExp(chars, 'g');
var firstAccent = new RegExp(chars, '');
function matcher(match) {
return characterMap[match];
}
var removeAccents = function (string) {
return string.replace(allAccents, matcher);
};
var hasAccents = function (string) {
return !!string.match(firstAccent);
};
removeAccents$2.exports = removeAccents;
removeAccents$2.exports.has = hasAccents;
removeAccents$2.exports.remove = removeAccents;
var removeAccentsExports = removeAccents$2.exports;
var removeAccents$1 = /*@__PURE__*/getDefaultExportFromCjs(removeAccentsExports);
/**
* @name match-sorter
* @license MIT license.
* @copyright (c) 2020 Kent C. Dodds
* @author Kent C. Dodds <me@kentcdodds.com> (https://kentcdodds.com)
*/
const rankings = {
CASE_SENSITIVE_EQUAL: 7,
EQUAL: 6,
STARTS_WITH: 5,
WORD_STARTS_WITH: 4,
CONTAINS: 3,
ACRONYM: 2,
MATCHES: 1,
NO_MATCH: 0
};
const defaultBaseSortFn = (a, b) => String(a.rankedValue).localeCompare(String(b.rankedValue));
/**
* Takes an array of items and a value and returns a new array with the items that match the given value
* @param {Array} items - the items to sort
* @param {String} value - the value to use for ranking
* @param {Object} options - Some options to configure the sorter
* @return {Array} - the new sorted array
*/
function matchSorter(items, value, options = {}) {
const {
keys,
threshold = rankings.MATCHES,
baseSort = defaultBaseSortFn,
sorter = matchedItems => matchedItems.sort((a, b) => sortRankedValues(a, b, baseSort))
} = options;
const matchedItems = items.reduce(reduceItemsToRanked, []);
return sorter(matchedItems).map(({
item
}) => item);
function reduceItemsToRanked(matches, item, index) {
const rankingInfo = getHighestRanking(item, keys, value, options);
const {
rank,
keyThreshold = threshold
} = rankingInfo;
if (rank >= keyThreshold) {
matches.push({
...rankingInfo,
item,
index
});
}
return matches;
}
}
matchSorter.rankings = rankings;
/**
* Gets the highest ranking for value for the given item based on its values for the given keys
* @param {*} item - the item to rank
* @param {Array} keys - the keys to get values from the item for the ranking
* @param {String} value - the value to rank against
* @param {Object} options - options to control the ranking
* @return {{rank: Number, keyIndex: Number, keyThreshold: Number}} - the highest ranking
*/
function getHighestRanking(item, keys, value, options) {
if (!keys) {
// if keys is not specified, then we assume the item given is ready to be matched
const stringItem = item;
return {
// ends up being duplicate of 'item' in matches but consistent
rankedValue: stringItem,
rank: getMatchRanking(stringItem, value, options),
keyIndex: -1,
keyThreshold: options.threshold
};
}
const valuesToRank = getAllValuesToRank(item, keys);
return valuesToRank.reduce(({
rank,
rankedValue,
keyIndex,
keyThreshold
}, {
itemValue,
attributes
}, i) => {
let newRank = getMatchRanking(itemValue, value, options);
let newRankedValue = rankedValue;
const {
minRanking,
maxRanking,
threshold
} = attributes;
if (newRank < minRanking && newRank >= rankings.MATCHES) {
newRank = minRanking;
} else if (newRank > maxRanking) {
newRank = maxRanking;
}
if (newRank > rank) {
rank = newRank;
keyIndex = i;
keyThreshold = threshold;
newRankedValue = itemValue;
}
return {
rankedValue: newRankedValue,
rank,
keyIndex,
keyThreshold
};
}, {
rankedValue: item,
rank: rankings.NO_MATCH,
keyIndex: -1,
keyThreshold: options.threshold
});
}
function* indexesOf(testString, stringToRank) {
let index = -1;
while ((index = testString.indexOf(stringToRank, index + 1)) > -1) {
yield index;
}
return -1;
}
/**
* Gives a rankings score based on how well the two strings match.
* @param {String} testString - the string to test against
* @param {String} stringToRank - the string to rank
* @param {Object} options - options for the match (like keepDiacritics for comparison)
* @returns {Number} the ranking for how well stringToRank matches testString
*/
function getMatchRanking(testString, stringToRank, options) {
testString = prepareValueForComparison(testString, options);
stringToRank = prepareValueForComparison(stringToRank, options);
// too long
if (stringToRank.length > testString.length) {
return rankings.NO_MATCH;
}
// case sensitive equals
if (testString === stringToRank) {
return rankings.CASE_SENSITIVE_EQUAL;
}
// Lower casing before further comparison
testString = testString.toLowerCase();
stringToRank = stringToRank.toLowerCase();
// Use indexOf to check for equality/includes
const indexesOfStringToRankInTestString = indexesOf(testString, stringToRank);
const firstIndexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next();
const indexOfStringToRankInTestString = firstIndexOfStringToRankInTestStringResult.value;
// case insensitive equals
if (testString.length === stringToRank.length && indexOfStringToRankInTestString === 0) {
return rankings.EQUAL;
}
// starts with
if (indexOfStringToRankInTestString === 0) {
return rankings.STARTS_WITH;
}
// word starts with
let indexOfStringToRankInTestStringResult = firstIndexOfStringToRankInTestStringResult;
while (!indexOfStringToRankInTestStringResult.done) {
if (indexOfStringToRankInTestStringResult.value > 0 && testString[indexOfStringToRankInTestStringResult.value - 1] === ' ') {
return rankings.WORD_STARTS_WITH;
}
indexOfStringToRankInTestStringResult = indexesOfStringToRankInTestString.next();
}
// contains
if (indexOfStringToRankInTestString > 0) {
return rankings.CONTAINS;
} else if (stringToRank.length === 1) {
// If the only character in the given stringToRank
// isn't even contained in the testString, then
// it's definitely not a match.
return rankings.NO_MATCH;
}
// acronym
if (getAcronym(testString).includes(stringToRank)) {
return rankings.ACRONYM;
}
// will return a number between rankings.MATCHES and
// rankings.MATCHES + 1 depending on how close of a match it is.
return getClosenessRanking(testString, stringToRank);
}
/**
* Generates an acronym for a string.
*
* Segment starts ︱ at the beginning of the phrase, after a **space**, or after a **hyphen**.
* We capture the first non-delimiter character of every segment and skip runs of delimiters.
*
* @example
* getAcronym('The Tail-spin Test') // → "TTsT"
* getAcronym('edge-case') // → "ec"
* getAcronym('multiple spaces') // → "ms"
*
* @param {String} string the string for which to produce the acronym
* @returns {String} the acronym
*/
function getAcronym(string) {
let acronym = '';
let prev = ' '; // virtual delimiter so the very first char qualifies
for (let i = 0; i < string.length; i++) {
const ch = string.charAt(i);
const prevWasDelimiter = prev === ' ' || prev === '-';
const currIsDelimiter = ch === ' ' || ch === '-';
if (prevWasDelimiter && !currIsDelimiter) {
acronym += ch;
}
prev = ch;
}
return acronym;
}
/**
* Returns a score based on how spread apart the
* characters from the stringToRank are within the testString.
* A number close to rankings.MATCHES represents a loose match. A number close
* to rankings.MATCHES + 1 represents a tighter match.
* @param {String} testString - the string to test against
* @param {String} stringToRank - the string to rank
* @returns {Number} the number between rankings.MATCHES and
* rankings.MATCHES + 1 for how well stringToRank matches testString
*/
function getClosenessRanking(testString, stringToRank) {
let matchingInOrderCharCount = 0;
let charNumber = 0;
function findMatchingCharacter(matchChar, string, index) {
for (let j = index, J = string.length; j < J; j++) {
const stringChar = string[j];
if (stringChar === matchChar) {
matchingInOrderCharCount += 1;
return j + 1;
}
}
return -1;
}
function getRanking(spread) {
const spreadPercentage = 1 / spread;
const inOrderPercentage = matchingInOrderCharCount / stringToRank.length;
const ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage;
return ranking;
}
const firstIndex = findMatchingCharacter(stringToRank[0], testString, 0);
if (firstIndex < 0) {
return rankings.NO_MATCH;
}
charNumber = firstIndex;
for (let i = 1, I = stringToRank.length; i < I; i++) {
const matchChar = stringToRank[i];
charNumber = findMatchingCharacter(matchChar, testString, charNumber);
const found = charNumber > -1;
if (!found) {
return rankings.NO_MATCH;
}
}
const spread = charNumber - firstIndex;
return getRanking(spread);
}
/**
* Sorts items that have a rank, index, and keyIndex
* @param {Object} a - the first item to sort
* @param {Object} b - the second item to sort
* @return {Number} -1 if a should come first, 1 if b should come first, 0 if equal
*/
function sortRankedValues(a, b, baseSort) {
const aFirst = -1;
const bFirst = 1;
const {
rank: aRank,
keyIndex: aKeyIndex
} = a;
const {
rank: bRank,
keyIndex: bKeyIndex
} = b;
const same = aRank === bRank;
if (same) {
if (aKeyIndex === bKeyIndex) {
// use the base sort function as a tie-breaker
return baseSort(a, b);
} else {
return aKeyIndex < bKeyIndex ? aFirst : bFirst;
}
} else {
return aRank > bRank ? aFirst : bFirst;
}
}
/**
* Prepares value for comparison by stringifying it, removing diacritics (if specified)
* @param {String} value - the value to clean
* @param {Object} options - {keepDiacritics: whether to remove diacritics}
* @return {String} the prepared value
*/
function prepareValueForComparison(value, {
keepDiacritics
}) {
// value might not actually be a string at this point (we don't get to choose)
// so part of preparing the value for comparison is ensure that it is a string
value = `${value}`; // toString
if (!keepDiacritics) {
value = removeAccents$1(value);
}
return value;
}
/**
* Gets value for key in item at arbitrarily nested keypath
* @param {Object} item - the item
* @param {Object|Function} key - the potentially nested keypath or property callback
* @return {Array} - an array containing the value(s) at the nested keypath
*/
function getItemValues(item, key) {
if (typeof key === 'object') {
key = key.key;
}
let value;
if (typeof key === 'function') {
value = key(item);
} else if (item == null) {
value = null;
} else if (Object.hasOwnProperty.call(item, key)) {
value = item[key];
} else if (key.includes('.')) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
return getNestedValues(key, item);
} else {
value = null;
}
// because `value` can also be undefined
if (value == null) {
return [];
}
if (Array.isArray(value)) {
return value;
}
return [String(value)];
}
/**
* Given path: "foo.bar.baz"
* And item: {foo: {bar: {baz: 'buzz'}}}
* -> 'buzz'
* @param path a dot-separated set of keys
* @param item the item to get the value from
*/
function getNestedValues(path, item) {
const keys = path.split('.');
let values = [item];
for (let i = 0, I = keys.length; i < I; i++) {
const nestedKey = keys[i];
let nestedValues = [];
for (let j = 0, J = values.length; j < J; j++) {
const nestedItem = values[j];
if (nestedItem == null) continue;
if (Object.hasOwnProperty.call(nestedItem, nestedKey)) {
const nestedValue = nestedItem[nestedKey];
if (nestedValue != null) {
nestedValues.push(nestedValue);
}
} else if (nestedKey === '*') {
// ensure that values is an array
nestedValues = nestedValues.concat(nestedItem);
}
}
values = nestedValues;
}
if (Array.isArray(values[0])) {
// keep allowing the implicit wildcard for an array of strings at the end of
// the path; don't use `.flat()` because that's not available in node.js v10
const result = [];
return result.concat(...values);
}
// Based on our logic it should be an array of strings by now...
// assuming the user's path terminated in strings
return values;
}
/**
* Gets all the values for the given keys in the given item and returns an array of those values
* @param item - the item from which the values will be retrieved
* @param keys - the keys to use to retrieve the values
* @return objects with {itemValue, attributes}
*/
function getAllValuesToRank(item, keys) {
const allValues = [];
for (let j = 0, J = keys.length; j < J; j++) {
const key = keys[j];
const attributes = getKeyAttributes(key);
const itemValues = getItemValues(item, key);
for (let i = 0, I = itemValues.length; i < I; i++) {
allValues.push({
itemValue: itemValues[i],
attributes
});
}
}
return allValues;
}
const defaultKeyAttributes = {
maxRanking: Infinity,
minRanking: -Infinity
};
/**
* Gets all the attributes for the given key
* @param key - the key from which the attributes will be retrieved
* @return object containing the key's attributes
*/
function getKeyAttributes(key) {
if (typeof key === 'string') {
return defaultKeyAttributes;
}
return {
...defaultKeyAttributes,
...key
};
}
/*
eslint
no-continue: "off",
*/
exports.defaultBaseSortFn = defaultBaseSortFn;
exports.matchSorter = matchSorter;
exports.rankings = rankings;
}));
//# sourceMappingURL=2106348-match-sorter.umd.js.map