UNPKG

compare-strings

Version:

Finds degree of similarity between two strings, based on Dice's Coefficient and Levenshtein Distance.

109 lines (94 loc) 3.64 kB
function compareStrings(first, second) { first = first.toLowerCase(); second = second.toLowerCase(); first = first.replace(/\s+/g, ''); second = second.replace(/\s+/g, ''); if (!first.length && !second.length) return 1; // if both are empty strings if (!first.length || !second.length) return 0; // if only one is empty string if (first === second) return 1; // identical if (first.length === 1 && second.length === 1) return 0; // both are 1-letter strings if (first.length < 2 || second.length < 2) return 0; // if either is a 1-letter string return (coefficient(first, second) + distance(first, second) + stringSimilarity(first, second)) / 3; // return coefficient(first, second); // return distance(first, second); // return stringSimilarity(first, second); } function stringSimilarity(str1, str2) { let substringLength = 2; if (str1.length < substringLength || str2.length < substringLength) return 0; const map = new Map(); for (let i = 0; i < str1.length - (substringLength - 1); i++) { const substr1 = str1.substr(i, substringLength); map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1); } let match = 0; for (let j = 0; j < str2.length - (substringLength - 1); j++) { const substr2 = str2.substr(j, substringLength); const count = map.has(substr2) ? map.get(substr2) : 0; if (count > 0) { map.set(substr2, count - 1); match++; } } return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2)); } function coefficient(first, second) { let firstBigrams = new Map(); for (let i = 0; i < first.length - 1; i++) { const bigram = first.substring(i, i + 2); const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) + 1 : 1; firstBigrams.set(bigram, count); } let intersectionSize = 0; for (let i = 0; i < second.length - 1; i++) { const bigram = second.substring(i, i + 2); const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) : 0; if (count > 0) { firstBigrams.set(bigram, count - 1); intersectionSize++; } } return (2.0 * intersectionSize) / (first.length + second.length - 2); } function distance(s1, s2) { let longer = s1; let shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } const longerLength = longer.length; if (longerLength === 0) { return 1; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } function editDistance(s1, s2) { let costs = []; for (let i = 0; i <= s1.length; i++) { let lastValue = i; for (let j = 0; j <= s2.length; j++) { if (i === 0) costs[j] = j; else { if (j > 0) { let newValue = costs[j - 1]; if (s1.charAt(i - 1) !== s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; } module.exports = compareStrings;