seaq
Version:
ES6 Text Search.
240 lines (203 loc) • 7.28 kB
JavaScript
/**
* string_score is an implementation of the string score algo developed by
* https://github.com/joshaven/string_score
*
* "hello world".score("axl") //=> 0
* "hello world".score("ow") //=> 0.35454545454545455
*
* // Single letter match
* "hello world".score("e") //=>0.1090909090909091
*
* // Single letter match plus bonuses for beginning of word and beginning of phrase
* "hello world".score("h") //=>0.5363636363636364
*
* "hello world".score("he") //=>0.5727272727272728
* "hello world".score("hel") //=>0.6090909090909091
* "hello world".score("hell") //=>0.6454545454545455
* "hello world".score("hello") //=>0.6818181818181818
* ...
* "hello world".score("hello worl") //=>0.8636363636363635
* "hello world".score("hello world") //=> 1
*
*
* // Using a "1" in place of an "l" is a mismatch unless the score is fuzzy
* "hello world".score("hello wor1") //=>0
* "hello world".score("hello wor1",0.5) //=>0.6081818181818182 (fuzzy)
*
* // Finding a match in a shorter string is more significant.
* 'Hello'.score('h') //=>0.52
* 'He'.score('h') //=>0.6249999999999999
*
* // Same case matches better than wrong case
* 'Hello'.score('h') //=>0.52
* 'Hello'.score('H') //=>0.5800000000000001
*
* // Acronyms are given a little more weight
* "Hillsdale Michigan".score("HiMi") > "Hillsdale Michigan".score("Hills")
* "Hillsdale Michigan".score("HiMi") < "Hillsdale Michigan".score("Hillsd")
*
* @export
* @param {string} target
* @param {string} query
* @param {number} [fuzziness]
* @returns {number}
*/
function string_score(target, query, fuzziness) {
// If the string is equal to the word, perfect match.
if (target === query) {
return 1;
} // if it's not a perfect match and is empty return 0
if (query === '') {
return 0;
}
var runningScore = 0;
var charScore;
var finalScore;
var rawString = target;
var lString = rawString.toLowerCase();
var strLength = rawString.length;
var lWord = query.toLowerCase();
var wordLength = query.length;
var idxOf;
var startAt = 0;
var fuzzies = 1;
var fuzzyFactor = 0;
var i; // Cache fuzzyFactor for speed increase
if (fuzziness) {
fuzzyFactor = 1 - fuzziness;
} // Walk through word and add up scores.
// Code duplication occurs to prevent checking fuzziness inside for loop
if (fuzziness) {
for (i = 0; i < wordLength; i += 1) {
// Find next first case-insensitive match of a character.
idxOf = lString.indexOf(lWord[i], startAt);
if (idxOf === -1) {
fuzzies += fuzzyFactor;
} else {
if (startAt === idxOf) {
// Consecutive letter & start-of-string Bonus
charScore = 0.7;
} else {
charScore = 0.1; // Acronym Bonus
// Weighing Logic: Typing the first character of an acronym is as if you
// preceded it with two perfect character matches.
if (rawString[idxOf - 1] === ' ') {
charScore += 0.8;
}
} // Same case bonus.
if (rawString[idxOf] === query[i]) {
charScore += 0.1;
} // Update scores and startAt position for next round of indexOf
runningScore += charScore;
startAt = idxOf + 1;
}
}
} else {
for (i = 0; i < wordLength; i += 1) {
// Find next first case-insensitive match of a character.
idxOf = lString.indexOf(lWord[i], startAt);
if (-1 === idxOf) {
return 0;
}
if (startAt === idxOf) {
// Consecutive letter & start-of-string Bonus
charScore = 0.7;
} else {
charScore = 0.1; // Acronym Bonus
// Weighing Logic: Typing the first character of an acronym is as if you
// preceded it with two perfect character matches.
if (rawString[idxOf - 1] === ' ') {
charScore += 0.8;
}
} // Same case bonus.
if (rawString[idxOf] === query[i]) {
charScore += 0.1;
} // Update scores and startAt position for next round of indexOf
runningScore += charScore;
startAt = idxOf + 1;
}
} // Reduce penalty for longer strings.
finalScore = 0.5 * (runningScore / strLength + runningScore / wordLength) / fuzzies;
if (lWord[0] === lString[0] && finalScore < 0.85) {
finalScore += 0.15;
}
return finalScore;
}
/**
* Seaq is a Fuzzy searching utility function.
*/
/**
* Given an input list Array<T>, a set of object keys to search, and a search
* query, Seaq will return a new Array<T> containing the results ordered by
* their Score which is calculated using a variation of string_score algorithm.
*
* @export
* @template T generic
* @param {Array<T>} list list of objects or strings to search
* @param {string} query query string to match against keys in objects
* @param {(Array<Extract<keyof T, string>> | string[])} keys optional keys to search in the object
* @param {number} [fuzzy] optional fuzziness should be between 0 and 1. low fuzziness like 0.01 means a mismatch will drop the score more then a fuzziness of something like 0.9.
* @returns {Array<T>}
*/
function seaq(list, query, keys, fuzzy) {
var l = getMetaDataList(list, query, keys, fuzzy);
return l.sort(function (a, b) {
return b.score - a.score;
}).map(function (item) {
return item.item;
});
}
function getMetaDataList(list, query, keys, fuzzy) {
// get a list of all items whose score is > 0
var fullList = list.map(function (item) {
// get a string representation of all keys joined with ' ' or if no keys, the item stringified
var searchString = keys ? keys.map(function (key) {
var value = getProperty(item, key).join(' ');
return value;
}).join(' ') : JSON.stringify(item); // calculate match score
var score = string_score(searchString, query, fuzzy); // return original item and its matching score
return {
item: item,
score: score
};
}); // return only those items whose score is > 0
return fullList.filter(function (item) {
return item.score > 0;
});
}
function getProperty(obj, path, list) {
if (list === void 0) {
list = [];
}
if (!path) {
// If there's no path left, we've gotten to the object we care about.
list.push(JSON.stringify(obj));
} else {
var dotIndex = path.indexOf('.');
var firstSegment = path;
var remaining = null;
if (dotIndex !== -1) {
firstSegment = path.slice(0, dotIndex);
remaining = path.slice(dotIndex + 1);
}
var value = obj[firstSegment];
if (value !== null && value !== undefined) {
if (!remaining && (typeof value === 'string' || typeof value === 'number')) {
list.push(value.toString());
} else if (Array.isArray(value)) {
// Search each item in the array.
for (var i = 0, len = value.length; i < len; i += 1) {
getProperty(value[i], remaining, list);
}
} else if (remaining) {
// An object. Recurse further.
getProperty(value, remaining, list);
} else {
getProperty(value, null, list);
}
}
}
return list;
}
export { seaq };
//# sourceMappingURL=seaq.esm.js.map