UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

76 lines (75 loc) 1.8 kB
import { distCosine } from "@thi.ng/vectors/dist-cosine"; import { jaccardSimilarity as $jaccard } from "@thi.ng/vectors/dist-jaccard"; import { distSq as $distSq } from "@thi.ng/vectors/distsq"; import { dot as $dot } from "@thi.ng/vectors/dot"; const cosineSimilarityDense = distCosine; const cosineSimilaritySparse = (a, b) => { const dot = dotProductSparse(a, b); return dot > 0 ? dot / (Math.sqrt(a.length) * Math.sqrt(b.length)) : 0; }; const jaccardSimilarityDense = $jaccard; const jaccardSimilaritySparse = (a, b) => { const na = a.length; const nb = b.length; let numIsec = 0; let apos, bpos; for (let i = 0, j = 0; i < na && j < nb; ) { apos = a[i]; bpos = b[j]; if (apos === bpos) { numIsec++; i++; j++; } else if (apos < bpos) { i++; } else { j++; } } const numUnion = na + nb - numIsec; return numUnion > 0 ? numIsec / numUnion : 0; }; const dotProductDense = $dot; const dotProductSparse = (a, b) => { let res = 0; let apos, bpos; for (let i = 0, j = 0, na = a.length, nb = b.length; i < na && j < nb; ) { apos = a[i]; bpos = b[j]; if (apos === bpos) { res++; i++; j++; } else if (apos < bpos) i++; else j++; } return res; }; const distSqDense = $distSq; const distSqSparse = (a, b) => { let res = 0; let apos, bpos; for (let i = 0, j = 0, na = a.length, nb = b.length; i < na && j < nb; ) { apos = a[i]; bpos = b[j]; if (apos === bpos) { i++; j++; } else { res++; if (apos < bpos) i++; else j++; } } return res; }; export { cosineSimilarityDense, cosineSimilaritySparse, distSqDense, distSqSparse, dotProductDense, dotProductSparse, jaccardSimilarityDense, jaccardSimilaritySparse };