ksdc
Version:
Measure how similar two strings are using the Sørensen–Dice Coefficient
74 lines (72 loc) • 2.7 kB
JavaScript
var __defineProperty = Object.defineProperty;
var __markAsModule = (target) => {
return __defineProperty(target, "__esModule", {value: true});
};
var __export = (target, all) => {
__markAsModule(target);
for (var name in all)
__defineProperty(target, name, {get: all[name], enumerable: true});
};
// index.js
__export(exports, {
compareStrings: () => compareStrings,
findMatch: () => findMatch
});
const totalBigrams = Symbol("totalBigrams");
const createBigrams = (word) => Object.defineProperty(Array.from({length: word.length - 1}).reduce((bigrams, _, index) => {
const bigram = word.slice(index, index + 2);
return bigrams.set(bigram, (bigrams.get(bigram) || 0) + 1);
}, new Map()), totalBigrams, {value: word.length - 1});
const calculateScore = (reference, referenceString, input, inputString) => {
if (Math.min(referenceString.length, inputString.length) <= 2) {
return Number(Object.is(referenceString, inputString));
}
const expendableReference = new Map(reference);
let overlaps = 0;
for (const [bigram, timesFound] of input) {
for (let times = 0; times < timesFound; times += 1) {
const referenceBigramCount = expendableReference.get(bigram) || 0;
if (referenceBigramCount > 0) {
overlaps += 1;
expendableReference.set(bigram, referenceBigramCount - 1);
}
}
}
return 2 * overlaps / (reference[totalBigrams] + input[totalBigrams]);
};
const findMatchCurried = (references, referenceStrings) => (inputString) => {
const input = createBigrams(inputString);
const matches = [];
const bestMatch = {
score: -Infinity
};
let index = 0;
for (const reference of references) {
const score = calculateScore(reference, referenceStrings[index], input, inputString);
const match = {
score,
reference: referenceStrings[index]
};
if (score > bestMatch.score) {
Object.assign(bestMatch, match, {index});
}
matches.push(match);
index += 1;
}
return {bestMatch, matches};
};
const findMatch = (references, input) => {
const referencesBigrams = references.map(createBigrams);
if (input !== void 0) {
return findMatchCurried(referencesBigrams, references)(input);
}
return findMatchCurried(referencesBigrams, references);
};
const compareStringsCurried = (reference, referenceString) => (inputString) => calculateScore(reference, referenceString, createBigrams(inputString), inputString);
const compareStrings = (reference, input) => {
const referenceBigrams = createBigrams(reference);
if (input !== void 0) {
return compareStringsCurried(referenceBigrams, reference)(input);
}
return compareStringsCurried(referenceBigrams, reference);
};