UNPKG

stringzy

Version:

A versatile string manipulation library providing a range of text utilities for JavaScript and Node.js applications.

146 lines (145 loc) 5.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.stringSimilarity = stringSimilarity; /** * Calculates the percentage similarity between two texts using the selected algorithm. * @param textA The first text to compare. * @param textB The second text to compare. * @param algorithm The algorithm to use: 'Levenshtein' or 'Damerau-Levenshtein'. Default is 'Levenshtein'. * @returns Similarity percentage (0-100). */ function stringSimilarity(textA, textB, algorithm = 'Levenshtein') { validateParams(textA, textB, algorithm); if (textA === textB) { return 100.0; } if (textA.length === 0 && textB.length != 0) { return 0.0; } let distance; if (algorithm === 'Levenshtein') { distance = calculateLevenshteinDistance(textA, textB); } else { distance = calculateDamerauLevenshteinDistance(textA, textB); } return calculateSimilarityScore(distance, textA, textB); } /** * Converts the edit distance to a percentage similarity score. * @param distance The edit distance between the texts. * @param textA The first text. * @param textB The second text. * @returns Similarity percentage (0-100). */ function calculateSimilarityScore(distance, textA, textB) { const similarityScore = 1 - distance / Math.max(textA.length, textB.length); return parseFloat((similarityScore * 100).toFixed(2)); } /** * Calculates the Levenshtein distance between two texts. * @param textA The first text. * @param textB The second text. * @returns The Levenshtein distance. */ function calculateLevenshteinDistance(textA, textB) { const lenA = textA.length; const lenB = textB.length; if (lenA === 0) { return lenB; } if (lenB === 0) { return lenA; } const distancesMatrix = prepareDistanceMatrix(lenA, lenB); for (let i = 1; i <= lenA; i++) { for (let j = 1; j <= lenB; j++) { distancesMatrix[i][j] = applyBasicEditOperations(i, j, textA, textB, distancesMatrix); } } return distancesMatrix[lenA][lenB]; } /** * Calculates the Damerau-Levenshtein distance between two texts. * @param textA The first text. * @param textB The second text. * @returns The Damerau-Levenshtein distance. */ function calculateDamerauLevenshteinDistance(textA, textB) { const lenA = textA.length; const lenB = textB.length; if (lenA === 0) { return lenB; } if (lenB === 0) { return lenA; } const distancesMatrix = prepareDistanceMatrix(lenA, lenB); for (let i = 1; i <= lenA; i++) { for (let j = 1; j <= lenB; j++) { distancesMatrix[i][j] = applyBasicEditOperations(i, j, textA, textB, distancesMatrix); if (i > 1 && j > 1 && textA[i - 1] === textB[j - 2] && textA[i - 2] === textB[j - 1]) { distancesMatrix[i][j] = Math.min(distancesMatrix[i][j], distancesMatrix[i - 2][j - 2] + 1); } } } return distancesMatrix[lenA][lenB]; } /** * Prepares a distance matrix for edit distance calculations. * @param lenA Length of the first text. * @param lenB Length of the second text. * @returns A 2D array representing the distance matrix. */ function prepareDistanceMatrix(lenA, lenB) { const distancesMatrix = Array.from({ length: lenA + 1 }, () => Array(lenB + 1).fill(0)); for (let i = 0; i <= lenA; i++) { distancesMatrix[i][0] = i; } for (let j = 0; j <= lenB; j++) { distancesMatrix[0][j] = j; } return distancesMatrix; } /** * Applies basic edit operations (deletion, insertion, substitution) for edit distance algorithms. * @param i Current row index in the matrix. * @param j Current column index in the matrix. * @param textA The first text. * @param textB The second text. * @param matrix The distance matrix. * @returns The minimum cost for the current cell. */ function applyBasicEditOperations(i, j, textA, textB, matrix) { const cost = textA[i - 1] === textB[j - 1] ? 0 : 1; return Math.min(matrix[i - 1][j] + 1, // Deletion matrix[i][j - 1] + 1, // Insertion matrix[i - 1][j - 1] + cost // Substitution ); } ///////////////////////////////////////// //// Validation Functions ///////////////////////////////////////// /** * Checks if a value is a string. * @param value The value to check. * @returns True if the value is a string, otherwise false. */ function isString(value) { return typeof value === 'string'; } /** * Validates the input parameters for the string similarity functions. * @param textA The first text. * @param textB The second text. * @param algorithm The algorithm to use. * @throws Error if parameters are invalid. */ function validateParams(textA, textB, algorithm) { if (!isString(textA) || !isString(textB)) { throw new Error('Both text arguments must be strings'); } if (algorithm !== 'Levenshtein' && algorithm !== 'Damerau-Levenshtein') { throw new Error("Invalid optional algorithm param. Should be 'Levenshtein' or 'Damerau-Levenshtein'"); } }