UNPKG

@allemandi/embed-utils

Version:

Fast, type-safe utilities for comparing and searching vector embeddings.

1 lines 9.89 kB
{"version":3,"file":"index.cjs","sources":["../src/utils/similarity.js","../src/utils/neighbors.js"],"sourcesContent":["/**\n * Calculates the cosine similarity between two vectors.\n * Cosine similarity measures how similar two vectors are, ranging from -1 (opposite) to 1 (identical).\n * @public\n * @param {number[]} vecA - First vector.\n * @param {number[]} vecB - Second vector.\n * @returns {number} - Cosine similarity score between `vecA` and `vecB`.\n * @example\n * computeCosineSimilarity([1, 2, 3], [1, 2, 3]);\n * // => 1 (identical vectors)\n * computeCosineSimilarity([1, 0], [0, 1]);\n * // => 0 (orthogonal vectors)\n * computeCosineSimilarity([1, 2], [2, 3]);\n * // => 0.9922778767136677\n * computeCosineSimilarity([1, 0], [-1, 0]);\n * // => -1 (vectors diametrically opposed)\n * computeCosineSimilarity([0, 0], [1, 2]);\n * // => 0 (one vector has zero magnitude)\n */\nfunction computeCosineSimilarity(vecA, vecB) {\n let dot = 0;\n let magA = 0;\n let magB = 0;\n for (let i = 0; i < vecA.length; i++) {\n const a = vecA[i];\n const b = vecB[i];\n dot += a * b;\n magA += a * a;\n magB += b * b;\n }\n const denom = Math.sqrt(magA) * Math.sqrt(magB);\n return denom === 0 ? 0 : dot / denom;\n}\n\n/**\n * Normalizes a vector to unit length. If the vector has zero magnitude, returns the original vector.\n * @public\n * @param {number[]} vec - Input vector.\n * @returns {number[]} - A new vector scaled to unit length.\n * @example\n * normalizeVector([3, 4]);\n * // => [0.6, 0.8] (vector normalized to length 1)\n * normalizeVector([0, 0]);\n * // => [0, 0] (zero vector remains unchanged)\n * normalizeVector([1, 1, 1]);\n * // => [0.5773502691896258, 0.5773502691896258, 0.5773502691896258]\n */\nfunction normalizeVector(vec) {\n let sumSquares = 0;\n for (let i = 0; i < vec.length; i++) {\n sumSquares += vec[i] * vec[i];\n }\n const magnitude = Math.sqrt(sumSquares);\n if (magnitude === 0) return vec.slice();\n const result = new Array(vec.length);\n for (let i = 0; i < vec.length; i++) {\n result[i] = vec[i] / magnitude;\n }\n return result;\n}\n\n/**\n * Efficiently checks if a vector is L2-normalized (unit length).\n * @public\n * @param {number[]} vec - Input vector.\n * @param {number} [epsilon=1e-6] - Tolerance for floating-point comparison.\n * @returns {boolean} - True if the L2 norm is within epsilon of 1.\n * @example\n * isNormalized([1, 0]);\n * // => true (vector length is exactly 1)\n * isNormalized([0.6, 0.8]);\n * // => true (approximately unit length)\n * isNormalized([3, 4]);\n * // => false (length is 5)\n * isNormalized([0, 0]);\n * // => false (length is 0)\n */\nfunction isNormalized(vec, epsilon = 1e-6) {\n let sum = 0;\n for (let i = 0; i < vec.length; i++) {\n const x = vec[i];\n sum += x * x;\n }\n return Math.abs(sum - 1) <= epsilon;\n}\n\n/**\n * Computes the mean (centroid) vector from an array of vectors.\n * Assumes all vectors are of equal length.\n * @public\n * @param {number[][]} vectors - Array of input vectors.\n * @returns {number[]} - The mean vector.\n * @example\n * meanVector([[1, 2], [3, 4], [5, 6]]);\n * // => [3, 4]\n * meanVector([]);\n * // => []\n */\nfunction meanVector(vectors) {\n const numVectors = vectors.length;\n if (numVectors === 0) return [];\n const dim = vectors[0].length;\n const mean = new Array(dim).fill(0);\n for (let i = 0; i < numVectors; i++) {\n const vec = vectors[i];\n for (let j = 0; j < dim; j++) {\n mean[j] += vec[j];\n }\n }\n for (let j = 0; j < dim; j++) {\n mean[j] /= numVectors;\n }\n return mean;\n}\n\n\nexport {\n computeCosineSimilarity,\n normalizeVector,\n isNormalized,\n meanVector,\n};","import { computeCosineSimilarity } from './similarity.js';\n\n/**\n * Finds the nearest neighbors to a given query embedding from a list of samples\n * based on cosine similarity.\n * @public\n * @param {number[]} queryEmbedding - The embedding vector to compare against.\n * @param {{ embedding: number[], label: string }[]} samples - An array of samples, each with an `embedding` and a `label`.\n * @param {object} [options={}] - Optional settings.\n * @param {number} [options.topK=1] - Number of top results to return. Default is 1.\n * @param {number} [options.threshold=0] - Minimum similarity score threshold for results.\n * @returns {{ embedding: number[], label: string, similarityScore: number }[]} - An array of nearest neighbors with similarity scores.\n * @example\n * const samples = [\n * { embedding: [1, 0], label: 'A' },\n * { embedding: [0, 1], label: 'B' },\n * { embedding: [1, 1], label: 'C' },\n * ];\n *\n * findNearestNeighbors([1, 0], samples);\n * // => [{ embedding: [1, 0], label: 'A', similarityScore: 1 }]\n *\n * findNearestNeighbors([1, 1], samples, { topK: 2 });\n * // => [\n * // { embedding: [1, 1], label: 'C', similarityScore: 0.999... },\n * // { embedding: [1, 0], label: 'A', similarityScore: 0.707... }\n * // ]\n *\n * findNearestNeighbors([1, 0], samples, { threshold: 0.9 });\n * // => [{ embedding: [1, 0], label: 'A', similarityScore: 1 }]\n *\n * findNearestNeighbors([-1, 0], samples, { threshold: 1 });\n * // => []\n */\nfunction findNearestNeighbors(queryEmbedding, samples, options = {}) {\n const { topK = 1, threshold = 0 } = options;\n const scoredSamples = [];\n for (const sample of samples) {\n const similarityScore = computeCosineSimilarity(queryEmbedding, sample.embedding);\n\n if (similarityScore >= threshold) {\n scoredSamples.push({\n ...sample,\n similarityScore,\n });\n }\n }\n scoredSamples.sort((a, b) => b.similarityScore - a.similarityScore);\n return scoredSamples.slice(0, topK);\n}\n\n/**\n * Ranks all samples by cosine similarity to the query embedding.\n * Does NOT apply threshold or topK filtering.\n * @public\n * @param {number[]} queryEmbedding - The embedding vector to compare against.\n * @param {{ embedding: number[], label: string }[]} samples - Samples with embeddings and labels.\n * @returns {{ embedding: number[], label: string, similarityScore: number }[]} Sorted by descending similarity.\n * @example\n * const samples = [\n * { embedding: [1, 0], label: 'A' },\n * { embedding: [0, 1], label: 'B' },\n * { embedding: [1, 1], label: 'C' },\n * ];\n * rankBySimilarity([1, 0], samples);\n * // => [\n * // { embedding: [1, 0], label: 'A', similarityScore: 1 },\n * // { embedding: [1, 1], label: 'C', similarityScore: 0.707... },\n * // { embedding: [0, 1], label: 'B', similarityScore: 0 }\n * // ]\n *\n * rankBySimilarity([0, 1], samples);\n * // => [\n * // { embedding: [0, 1], label: 'B', similarityScore: 1 },\n * // { embedding: [1, 1], label: 'C', similarityScore: 0.707... },\n * // { embedding: [1, 0], label: 'A', similarityScore: 0 }\n * // ]\n */\nfunction rankBySimilarity(queryEmbedding, samples) {\n const results = new Array(samples.length);\n for (let i = 0; i < samples.length; i++) {\n results[i] = {\n ...samples[i],\n similarityScore: computeCosineSimilarity(queryEmbedding, samples[i].embedding),\n };\n }\n results.sort((a, b) => b.similarityScore - a.similarityScore);\n return results;\n}\n\nexport {\n findNearestNeighbors,\n rankBySimilarity,\n};"],"names":["computeCosineSimilarity","vecA","vecB","dot","magA","magB","i","length","a","b","denom","Math","sqrt","queryEmbedding","samples","options","_step","_options$topK","topK","_options$threshold","threshold","scoredSamples","_iterator","_createForOfIteratorHelperLoose","done","sample","value","similarityScore","embedding","push","_extends","sort","slice","vec","epsilon","sum","x","abs","vectors","numVectors","dim","mean","Array","fill","j","sumSquares","magnitude","result","results"],"mappings":"AAmBA,SAASA,EAAwBC,EAAMC,GAInC,IAHA,IAAIC,EAAM,EACNC,EAAO,EACPC,EAAO,EACFC,EAAI,EAAGA,EAAIL,EAAKM,OAAQD,IAAK,CAClC,IAAME,EAAIP,EAAKK,GACTG,EAAIP,EAAKI,GACfH,GAAOK,EAAIC,EACXL,GAAQI,EAAIA,EACZH,GAAQI,EAAIA,CAChB,CACA,IAAMC,EAAQC,KAAKC,KAAKR,GAAQO,KAAKC,KAAKP,GAC1C,OAAiB,IAAVK,EAAc,EAAIP,EAAMO,CACnC,8XCEA,SAA8BG,EAAgBC,EAASC,QAAAA,IAAAA,IAAAA,EAAU,CAAA,GAG7D,IAFA,IAE4BC,EAFeC,EAAPF,EAA5BG,KAAAA,OAAI,IAAAD,EAAG,EAACA,EAAAE,EAAoBJ,EAAlBK,UAAAA,OAAS,IAAAD,EAAG,EAACA,EACzBE,EAAgB,GACtBC,2pBAAAC,CAAqBT,KAAOE,EAAAM,KAAAE,MAAE,CAAA,IAAnBC,EAAMT,EAAAU,MACPC,EAAkB3B,EAAwBa,EAAgBY,EAAOG,WAEnED,GAAmBP,GACnBC,EAAcQ,KAAIC,EACXL,CAAAA,EAAAA,EACHE,CAAAA,gBAAAA,IAGZ,CAEA,OADAN,EAAcU,KAAK,SAACvB,EAAGC,GAAC,OAAKA,EAAEkB,gBAAkBnB,EAAEmB,eAAe,GAC3DN,EAAcW,MAAM,EAAGd,EAClC,uBD4BA,SAAsBe,EAAKC,QAAO,IAAPA,IAAAA,EAAU,MAEjC,IADA,IAAIC,EAAM,EACD7B,EAAI,EAAGA,EAAI2B,EAAI1B,OAAQD,IAAK,CACjC,IAAM8B,EAAIH,EAAI3B,GACd6B,GAAOC,EAAIA,CACf,CACA,OAAOzB,KAAK0B,IAAIF,EAAM,IAAMD,CAChC,qBAcA,SAAoBI,GAChB,IAAMC,EAAaD,EAAQ/B,OAC3B,GAAmB,IAAfgC,EAAkB,MAAO,GAG7B,IAFA,IAAMC,EAAMF,EAAQ,GAAG/B,OACjBkC,EAAO,IAAIC,MAAMF,GAAKG,KAAK,GACxBrC,EAAI,EAAGA,EAAIiC,EAAYjC,IAE5B,IADA,IAAM2B,EAAMK,EAAQhC,GACXsC,EAAI,EAAGA,EAAIJ,EAAKI,IACrBH,EAAKG,IAAMX,EAAIW,GAGvB,IAAK,IAAIA,EAAI,EAAGA,EAAIJ,EAAKI,IACrBH,EAAKG,IAAML,EAEf,OAAOE,CACX,0BAlEA,SAAyBR,GAErB,IADA,IAAIY,EAAa,EACRvC,EAAI,EAAGA,EAAI2B,EAAI1B,OAAQD,IAC5BuC,GAAcZ,EAAI3B,GAAK2B,EAAI3B,GAE/B,IAAMwC,EAAYnC,KAAKC,KAAKiC,GAC5B,GAAkB,IAAdC,EAAiB,OAAOb,EAAID,QAEhC,IADA,IAAMe,EAAS,IAAIL,MAAMT,EAAI1B,QACpBD,EAAI,EAAGA,EAAI2B,EAAI1B,OAAQD,IAC5ByC,EAAOzC,GAAK2B,EAAI3B,GAAKwC,EAEzB,OAAOC,CACX,2BCmBA,SAA0BlC,EAAgBC,GAEtC,IADA,IAAMkC,EAAU,IAAIN,MAAM5B,EAAQP,QACzBD,EAAI,EAAGA,EAAIQ,EAAQP,OAAQD,IAChC0C,EAAQ1C,GAAEwB,EAAA,CAAA,EACHhB,EAAQR,GAAE,CACbqB,gBAAiB3B,EAAwBa,EAAgBC,EAAQR,GAAGsB,aAI5E,OADAoB,EAAQjB,KAAK,SAACvB,EAAGC,GAAM,OAAAA,EAAEkB,gBAAkBnB,EAAEmB,eAAe,GACrDqB,CACX"}