didyoumean2
Version:
a library for matching human-quality input to a list of potential matches using the Levenshtein distance algorithm
275 lines (254 loc) • 8.88 kB
JavaScript
import { distance } from 'fastest-levenshtein';
import deburr from 'lodash.deburr';
let ReturnTypeEnums = /*#__PURE__*/function (ReturnTypeEnums) {
ReturnTypeEnums["ALL_CLOSEST_MATCHES"] = "all-closest-matches";
ReturnTypeEnums["ALL_MATCHES"] = "all-matches";
ReturnTypeEnums["ALL_SORTED_MATCHES"] = "all-sorted-matches";
ReturnTypeEnums["FIRST_CLOSEST_MATCH"] = "first-closest-match";
ReturnTypeEnums["FIRST_MATCH"] = "first-match";
return ReturnTypeEnums;
}({});
let ThresholdTypeEnums = /*#__PURE__*/function (ThresholdTypeEnums) {
ThresholdTypeEnums["EDIT_DISTANCE"] = "edit-distance";
ThresholdTypeEnums["SIMILARITY"] = "similarity";
return ThresholdTypeEnums;
}({});
const fillDefaultOptions = options => {
const optionsWithDefaultValues = {
caseSensitive: false,
deburr: true,
matchPath: [],
returnType: ReturnTypeEnums.FIRST_CLOSEST_MATCH,
thresholdType: ThresholdTypeEnums.SIMILARITY,
trimSpaces: true,
...options
};
if (!Object.values(ReturnTypeEnums).includes(optionsWithDefaultValues.returnType)) {
throw new TypeError('unknown returnType');
}
if (!Object.values(ThresholdTypeEnums).includes(optionsWithDefaultValues.thresholdType)) {
throw new TypeError('unknown thresholdType');
}
switch (optionsWithDefaultValues.thresholdType) {
case ThresholdTypeEnums.EDIT_DISTANCE:
return {
threshold: 20,
...optionsWithDefaultValues
};
case ThresholdTypeEnums.SIMILARITY:
return {
threshold: 0.4,
...optionsWithDefaultValues
};
}
};
/**
* Using edit distance between `a` and `b` to calculate similarity
*
* @param {string} a - `input`
* @param {string} b - String from `matchList`
* @returns {number} similarity between `a` and `b`
*/
const getSimilarity = (a, b) => {
if (!a || !b) return 0;
if (a === b) return 1;
const editDistance = distance(a, b);
const longestLength = Math.max(a.length, b.length);
return (longestLength - editDistance) / longestLength;
};
/**
* Normalize a string
*
* @param {string} str - any string
* @param {object} options - options that allows you to modify the behavior
* @returns {string} - normalized string
*/
const normalizeString = (str, options) => {
let s = str;
if (options.trimSpaces) {
s = s.trim().replaceAll(/\s+/gu, ' ');
}
if (options.deburr) {
s = deburr(s);
}
if (!options.caseSensitive) {
s = s.toLowerCase();
}
return s;
};
const getMatchItemStr = (matchItem, matchPath) => {
const matchItemStr = matchPath.length > 0 ? matchPath.reduce((acc, prop) => {
// @ts-expect-error skip redundant type check
return acc?.[prop];
}, matchItem) : matchItem;
if (typeof matchItemStr !== 'string') return '';
return matchItemStr;
};
/**
* Process matchItem according to options
*
* @param {object | string} matchItem - Item for matching with `input`
* @param {object} options - options that allows you to modify the behavior
* @returns {string} - processed matchItem
*/
const matchItemProcessor = (matchItem, options) => {
const {
matchPath
} = options;
const matchItemStr = getMatchItemStr(matchItem, matchPath);
return normalizeString(matchItemStr, options);
};
/**
* Generate result
*
* @param {object[] | string[]} matchList - List for matching with `input`
* @param {number[]} matchedIndexes - indexes of matchList that need to be returned as result
* @param {ReturnTypeEnums} returnType - how the result will response to user
* @returns {Array | null | object | string} - matched result(s), return object if `match` is `{Object[]}`
*/
const resultProcessor = (matchList, matchedIndexes, returnType
// eslint-disable-next-line functional/prefer-immutable-types
) => {
switch (returnType) {
case ReturnTypeEnums.ALL_CLOSEST_MATCHES:
case ReturnTypeEnums.ALL_MATCHES:
case ReturnTypeEnums.ALL_SORTED_MATCHES:
return matchedIndexes.map(matchedIndex => matchList[matchedIndex]);
case ReturnTypeEnums.FIRST_CLOSEST_MATCH:
case ReturnTypeEnums.FIRST_MATCH:
{
const matchedIndex = matchedIndexes[0];
if (matchedIndex === undefined) return null;
return matchList[matchedIndex];
}
}
};
/**
* Main function for didyoumean2
*
* @param {string} input - string that you are not sure and want to match with `matchList`
* @param {object[] | string[]} matchList - List for matching with `input`
* @param {null | object | undefined} options - options that allows you to modify the behavior
* @returns {Array | null | object | string} - matched result(s), return object if `match` is `{Object[]}`
*/
function didYouMean(input, matchList, options
// eslint-disable-next-line functional/prefer-immutable-types
) {
/*+++++++++++++++++++
+ Initiate options +
+++++++++++++++++++*/
const optionsWithDefaults = fillDefaultOptions(options);
const {
returnType,
threshold,
thresholdType
} = optionsWithDefaults;
/*++++++++++++++++++++
+ Deal with options +
++++++++++++++++++++*/
const normalizedInput = normalizeString(input, optionsWithDefaults);
let checkIfMatched; // Validate if score is matched
let scoreProcessor; // Get score
switch (thresholdType) {
case ThresholdTypeEnums.EDIT_DISTANCE:
checkIfMatched = score => score <= threshold;
scoreProcessor = matchItem => distance(normalizedInput, matchItemProcessor(matchItem, optionsWithDefaults));
break;
case ThresholdTypeEnums.SIMILARITY:
checkIfMatched = score => score >= threshold;
scoreProcessor = matchItem => getSimilarity(normalizedInput, matchItemProcessor(matchItem, optionsWithDefaults));
break;
}
/*+++++++++++
+ Matching +
+++++++++++*/
// eslint-disable-next-line functional/prefer-immutable-types
const matchedIndexes = [];
switch (returnType) {
case ReturnTypeEnums.ALL_CLOSEST_MATCHES:
case ReturnTypeEnums.FIRST_CLOSEST_MATCH:
{
// eslint-disable-next-line functional/prefer-immutable-types
const scores = [];
let marginValue;
switch (thresholdType) {
case ThresholdTypeEnums.EDIT_DISTANCE:
// Process score and save the smallest score
marginValue = Number.POSITIVE_INFINITY;
for (const matchItem of matchList) {
const score = scoreProcessor(matchItem);
if (marginValue > score) marginValue = score;
scores.push(score);
}
break;
case ThresholdTypeEnums.SIMILARITY:
// Process score and save the largest score
marginValue = 0;
for (const matchItem of matchList) {
const score = scoreProcessor(matchItem);
if (marginValue < score) marginValue = score;
scores.push(score);
}
break;
}
for (const [i, score] of scores.entries()) {
if (checkIfMatched(score) && score === marginValue) {
matchedIndexes.push(i);
}
}
break;
}
case ReturnTypeEnums.ALL_MATCHES:
for (const [i, matchItem] of matchList.entries()) {
const score = scoreProcessor(matchItem);
// save all indexes of matched scores
if (checkIfMatched(score)) {
matchedIndexes.push(i);
}
}
break;
case ReturnTypeEnums.ALL_SORTED_MATCHES:
{
// eslint-disable-next-line functional/prefer-immutable-types
const unsortedResults = [];
for (const [i, matchItem] of matchList.entries()) {
const score = scoreProcessor(matchItem);
// save all indexes of matched scores
if (checkIfMatched(score)) {
unsortedResults.push({
score,
index: i
});
}
}
switch (thresholdType) {
case ThresholdTypeEnums.EDIT_DISTANCE:
unsortedResults.sort((a, b) => a.score - b.score);
break;
case ThresholdTypeEnums.SIMILARITY:
unsortedResults.sort((a, b) => b.score - a.score);
break;
}
for (const unsortedResult of unsortedResults) {
matchedIndexes.push(unsortedResult.index);
}
break;
}
case ReturnTypeEnums.FIRST_MATCH:
for (const [i, matchItem] of matchList.entries()) {
const score = scoreProcessor(matchItem);
// Return once matched, performance is main target in this returnType
if (checkIfMatched(score)) {
matchedIndexes.push(i);
break;
}
}
break;
}
/*+++++++++++++++++++++++
+ Process return value +
+++++++++++++++++++++++*/
return resultProcessor(matchList, matchedIndexes, returnType);
}
export { ReturnTypeEnums, ThresholdTypeEnums, didYouMean as default };
//# sourceMappingURL=index.mjs.map