cmpstr
Version:
lightweight npm package to calculate string similarity
63 lines (50 loc) • 1.28 kB
JavaScript
/**
* q-Gram Similarity
* CmpStr module
*
* Q-gram similarity is a string-matching algorithm that compares two
* strings by breaking them into substrings of length Q. It's used to
* determine how similar the two strings are.
*
* @author Paul Köhler (komed3)
* @license MIT
*/
;
/**
* private helper function
* convert string to array of substrings
* @private
*
* @param {String} str string
* @param {Int} q length of substrings
* @returns {String[]} array of substrings
*/
const _qGrams = ( str, q ) => {
let grams = [];
for ( let i = 0; i <= str.length - q; i++ ) {
grams.push( str.slice( i, i + q ) );
}
return grams;
};
/**
* module exports
* @public
*
* @param {String} a string a
* @param {String} b string b
* @param {Object} options having {
* @param {Int} [q=2] length of substrings
* }
* @returns {Number} similarity score (0..1)
*/
module.exports = ( a, b, { q = 2 } = {} ) => {
let setA = new Set ( _qGrams( a, q ) ),
setB = new Set ( _qGrams( b, q ) );
return (
new Set( [ ...setA ].filter(
test => setB.has( test )
) )
).size / Math.max(
setA.size, setB.size
);
};