compare-strings
Version:
Finds degree of similarity between two strings, based on Dice's Coefficient and Levenshtein Distance.
109 lines (94 loc) • 3.64 kB
JavaScript
function compareStrings(first, second) {
first = first.toLowerCase();
second = second.toLowerCase();
first = first.replace(/\s+/g, '');
second = second.replace(/\s+/g, '');
if (!first.length && !second.length) return 1; // if both are empty strings
if (!first.length || !second.length) return 0; // if only one is empty string
if (first === second) return 1; // identical
if (first.length === 1 && second.length === 1) return 0; // both are 1-letter strings
if (first.length < 2 || second.length < 2) return 0; // if either is a 1-letter string
return (coefficient(first, second) + distance(first, second) + stringSimilarity(first, second)) / 3;
// return coefficient(first, second);
// return distance(first, second);
// return stringSimilarity(first, second);
}
function stringSimilarity(str1, str2) {
let substringLength = 2;
if (str1.length < substringLength || str2.length < substringLength)
return 0;
const map = new Map();
for (let i = 0; i < str1.length - (substringLength - 1); i++) {
const substr1 = str1.substr(i, substringLength);
map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1);
}
let match = 0;
for (let j = 0; j < str2.length - (substringLength - 1); j++) {
const substr2 = str2.substr(j, substringLength);
const count = map.has(substr2) ? map.get(substr2) : 0;
if (count > 0) {
map.set(substr2, count - 1);
match++;
}
}
return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2));
}
function coefficient(first, second) {
let firstBigrams = new Map();
for (let i = 0; i < first.length - 1; i++) {
const bigram = first.substring(i, i + 2);
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram) + 1
: 1;
firstBigrams.set(bigram, count);
}
let intersectionSize = 0;
for (let i = 0; i < second.length - 1; i++) {
const bigram = second.substring(i, i + 2);
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram)
: 0;
if (count > 0) {
firstBigrams.set(bigram, count - 1);
intersectionSize++;
}
}
return (2.0 * intersectionSize) / (first.length + second.length - 2);
}
function distance(s1, s2) {
let longer = s1;
let shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
const longerLength = longer.length;
if (longerLength === 0) {
return 1;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
function editDistance(s1, s2) {
let costs = [];
for (let i = 0; i <= s1.length; i++) {
let lastValue = i;
for (let j = 0; j <= s2.length; j++) {
if (i === 0)
costs[j] = j;
else {
if (j > 0) {
let newValue = costs[j - 1];
if (s1.charAt(i - 1) !== s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
module.exports = compareStrings;