cmpstr
Version:
lightweight npm package to calculate string similarity
78 lines (60 loc) • 1.76 kB
JavaScript
/**
* Smith-Waterman Algorithm
* CmpStr module
*
* The Smith-Waterman algorithm performs local alignment, finding the
* best matching subsequence between two strings. It is commonly used
* in bioinformatics.
*
* @author Paul Köhler
* @license MIT
*/
;
/**
* module exports
* @public
*
* @param {String} a string a
* @param {String} b string b
* @param {Object} options having {
* @param {Number} [match=2] score for a match
* @param {Number} [mismatch=-1] penalty for a mismatch
* @param {Number} [gap=-1] penalty for a gap
* }
* @returns {Number} similarity score (0..1)
*/
module.exports = ( a, b, {
match = 2, mismatch = -1, gap = -1
} = {} ) => {
let rows = a.length + 1,
cols = b.length + 1;
/* step 1: initialize scoring matrix */
let matrix = Array.from(
{ length: rows },
() => Array( cols ).fill( 0 )
);
/* step 2: fill the scoring matrix */
let maxScore = 0;
for ( let i = 1; i < rows; i++ ) {
for ( let j = 1; j < cols; j++ ) {
let matchScore = a[ i - 1 ] === b[ j - 1 ] ? match : mismatch;
matrix[ i ][ j ] = Math.max(
0,
matrix[ i - 1 ][ j - 1 ] + matchScore,
matrix[ i - 1 ][ j ] + gap,
matrix[ i ][ j - 1 ] + gap
);
maxScore = Math.max(
maxScore,
matrix[ i ][ j ]
);
}
}
/* step 3: normalize the score to a value between 0..1 */
return Math.max( 0, Math.min( 1,
maxScore / Math.min(
a.length * match,
b.length * match
)
) );
};