UNPKG

svelte

Version:

Cybernetically enhanced web apps

281 lines (237 loc) • 6.79 kB
/** * @param {string} name * @param {string[]} names * @returns {string | null} */ export default function fuzzymatch(name, names) { if (names.length === 0) return null; const set = new FuzzySet(names); const matches = set.get(name); return matches && matches[0][0] > 0.7 ? matches[0][1] : null; } // adapted from https://github.com/Glench/fuzzyset.js/blob/master/lib/fuzzyset.js in 2016 // BSD Licensed (see https://github.com/Glench/fuzzyset.js/issues/10) const GRAM_SIZE_LOWER = 2; const GRAM_SIZE_UPPER = 3; // return an edit distance from 0 to 1 /** * @param {string} str1 * @param {string} str2 */ function _distance(str1, str2) { if (str1 === null && str2 === null) { throw 'Trying to compare two null values'; } if (str1 === null || str2 === null) return 0; str1 = String(str1); str2 = String(str2); const distance = levenshtein(str1, str2); return 1 - distance / Math.max(str1.length, str2.length); } // helper functions /** * @param {string} str1 * @param {string} str2 */ function levenshtein(str1, str2) { /** @type {number[]} */ const current = []; let prev = 0; let value = 0; for (let i = 0; i <= str2.length; i++) { for (let j = 0; j <= str1.length; j++) { if (i && j) { if (str1.charAt(j - 1) === str2.charAt(i - 1)) { value = prev; } else { value = Math.min(current[j], current[j - 1], prev) + 1; } } else { value = i + j; } prev = current[j]; current[j] = value; } } return /** @type {number} */ (current.pop()); } const non_word_regex = /[^\w, ]+/; /** * @param {string} value * @param {any} gram_size */ function iterate_grams(value, gram_size = 2) { const simplified = '-' + value.toLowerCase().replace(non_word_regex, '') + '-'; const len_diff = gram_size - simplified.length; const results = []; if (len_diff > 0) { for (let i = 0; i < len_diff; ++i) { value += '-'; } } for (let i = 0; i < simplified.length - gram_size + 1; ++i) { results.push(simplified.slice(i, i + gram_size)); } return results; } /** * @param {string} value * @param {any} gram_size */ function gram_counter(value, gram_size = 2) { // return an object where key=gram, value=number of occurrences /** @type {Record<string, number>} */ const result = {}; const grams = iterate_grams(value, gram_size); let i = 0; for (i; i < grams.length; ++i) { if (grams[i] in result) { result[grams[i]] += 1; } else { result[grams[i]] = 1; } } return result; } /** * @param {MatchTuple} a * @param {MatchTuple} b */ function sort_descending(a, b) { return b[0] - a[0]; } class FuzzySet { /** @type {Record<string, string>} */ exact_set = {}; /** @type {Record<string, [number, number][]>} */ match_dict = {}; /** @type {Record<string, number[]>} */ items = {}; /** @param {string[]} arr */ constructor(arr) { // initialisation for (let i = GRAM_SIZE_LOWER; i < GRAM_SIZE_UPPER + 1; ++i) { this.items[i] = []; } // add all the items to the set for (let i = 0; i < arr.length; ++i) { this.add(arr[i]); } } /** @param {string} value */ add(value) { const normalized_value = value.toLowerCase(); if (normalized_value in this.exact_set) { return false; } let i = GRAM_SIZE_LOWER; for (i; i < GRAM_SIZE_UPPER + 1; ++i) { this._add(value, i); } } /** * @param {string} value * @param {number} gram_size */ _add(value, gram_size) { const normalized_value = value.toLowerCase(); const items = this.items[gram_size] || []; const index = items.length; items.push(0); const gram_counts = gram_counter(normalized_value, gram_size); let sum_of_square_gram_counts = 0; let gram; let gram_count; for (gram in gram_counts) { gram_count = gram_counts[gram]; sum_of_square_gram_counts += Math.pow(gram_count, 2); if (gram in this.match_dict) { this.match_dict[gram].push([index, gram_count]); } else { this.match_dict[gram] = [[index, gram_count]]; } } const vector_normal = Math.sqrt(sum_of_square_gram_counts); // @ts-ignore no idea what this code is doing items[index] = [vector_normal, normalized_value]; this.items[gram_size] = items; this.exact_set[normalized_value] = value; } /** @param {string} value */ get(value) { const normalized_value = value.toLowerCase(); const result = this.exact_set[normalized_value]; if (result) { return /** @type {MatchTuple[]} */ ([[1, result]]); } // start with high gram size and if there are no results, go to lower gram sizes for (let gram_size = GRAM_SIZE_UPPER; gram_size >= GRAM_SIZE_LOWER; --gram_size) { const results = this.__get(value, gram_size); if (results.length > 0) return results; } return null; } /** * @param {string} value * @param {number} gram_size * @returns {MatchTuple[]} */ __get(value, gram_size) { const normalized_value = value.toLowerCase(); /** @type {Record<string, number>} */ const matches = {}; const gram_counts = gram_counter(normalized_value, gram_size); const items = this.items[gram_size]; let sum_of_square_gram_counts = 0; let gram; let gram_count; let i; let index; let other_gram_count; for (gram in gram_counts) { gram_count = gram_counts[gram]; sum_of_square_gram_counts += Math.pow(gram_count, 2); if (gram in this.match_dict) { for (i = 0; i < this.match_dict[gram].length; ++i) { index = this.match_dict[gram][i][0]; other_gram_count = this.match_dict[gram][i][1]; if (index in matches) { matches[index] += gram_count * other_gram_count; } else { matches[index] = gram_count * other_gram_count; } } } } const vector_normal = Math.sqrt(sum_of_square_gram_counts); /** @type {MatchTuple[]} */ let results = []; let match_score; // build a results list of [score, str] for (const match_index in matches) { match_score = matches[match_index]; // @ts-ignore no idea what this code is doing results.push([match_score / (vector_normal * items[match_index][0]), items[match_index][1]]); } results.sort(sort_descending); /** @type {MatchTuple[]} */ let new_results = []; const end_index = Math.min(50, results.length); // truncate somewhat arbitrarily to 50 for (let i = 0; i < end_index; ++i) { // @ts-ignore no idea what this code is doing new_results.push([_distance(results[i][1], normalized_value), results[i][1]]); } results = new_results; results.sort(sort_descending); new_results = []; for (let i = 0; i < results.length; ++i) { if (results[i][0] === results[0][0]) { // @ts-ignore no idea what this code is doing new_results.push([results[i][0], this.exact_set[results[i][1]]]); } } return new_results; } } /** @typedef {[score: number, match: string]} MatchTuple */