UNPKG

@allemandi/embed-utils

Version:

Fast, type-safe utilities for vector embedding comparison and search.

219 lines (215 loc) 8.37 kB
/** * Calculates cosine similarity between two vectors. * Measures how similar their directions are, ignoring magnitude. * Use for comparing semantic or normalized vectors (e.g., text embeddings). * * @public * @param {number[]} vecA - First vector. * @param {number[]} vecB - Second vector. * @returns {number} - Cosine similarity score between `vecA` and `vecB`. * @example * computeCosineSimilarity([1, 2, 3], [1, 2, 3]); * // => 1 (identical vectors) * computeCosineSimilarity([1, 0], [0, 1]); * // => 0 (orthogonal vectors) * computeCosineSimilarity([1, 2], [2, 3]); * // => 0.992... * computeCosineSimilarity([1, 0], [-1, 0]); * // => -1 (vectors diametrically opposed) * computeCosineSimilarity([0, 0], [1, 2]); * // => 0 (one vector has zero magnitude) */ declare function computeCosineSimilarity(vecA: number[], vecB: number[]): number; /** * Calculates Euclidean distance between two vectors. * Measures straight-line distance considering both magnitude and direction. * Use for raw numeric data or spatial coordinates. * * @public * @param {number[]} vecA - First vector. * @param {number[]} vecB - Second vector. * @returns {number} - Euclidean distance between `vecA` and `vecB`. * @example * computeEuclideanDistance([1, 2], [4, 6]); * // => 5 (distance between (1,2) and (4,6)) * computeEuclideanDistance([0, 0], [0, 0]); * // => 0 (identical vectors) * computeEuclideanDistance([1, 0], [0, 1]); * // => 1.414... * computeEuclideanDistance([1, 2, 3], [4, 5, 6]); * // => 5.196... */ declare function computeEuclideanDistance(vecA: number[], vecB: number[]): number; /** * Calculates Manhattan distance between two vectors. * Measures sum of absolute differences. * Use for grid-like data or when less sensitive to large differences. * * @public * @param {number[]} vecA - First vector. * @param {number[]} vecB - Second vector. * @returns {number} - Manhattan distance between `vecA` and `vecB`. * @example * computeManhattanDistance([1, 2, 3], [4, 5, 6]); * // => 9 * computeManhattanDistance([1, 0], [0, 1]); * // => 2 * computeManhattanDistance([1, 2], [1, 2]); * // => 0 (identical vectors) * computeManhattanDistance([1, -1], [-1, 1]); * // => 4 */ declare function computeManhattanDistance(vecA: number[], vecB: number[]): number; /** * Normalizes a vector to unit length. If the vector has zero magnitude, returns the original vector. * @public * @param {number[]} vec - Input vector. * @returns {number[]} - A new vector scaled to unit length. * @example * normalizeVector([3, 4]); * // => [0.6, 0.8] (vector normalized to length 1) * normalizeVector([0, 0]); * // => [0, 0] (zero vector remains unchanged) * normalizeVector([1, 1, 1]); * // => [0.5773502691896258, 0.5773502691896258, 0.5773502691896258] */ declare function normalizeVector(vec: number[]): number[]; /** * Efficiently checks if a vector is L2-normalized (unit length). * @public * @param {number[]} vec - Input vector. * @param {number} [epsilon=1e-6] - Tolerance for floating-point comparison. * @returns {boolean} - True if the L2 norm is within epsilon of 1. * @example * isNormalized([1, 0]); * // => true (vector length is exactly 1) * isNormalized([0.6, 0.8]); * // => true (approximately unit length) * isNormalized([3, 4]); * // => false (length is 5) * isNormalized([0, 0]); * // => false (length is 0) */ declare function isNormalized(vec: number[], epsilon?: number): boolean; /** * Computes the mean (centroid) vector from an array of vectors. * Assumes all vectors are of equal length. * @public * @param {number[][]} vectors - Array of input vectors. * @returns {number[]} - The mean vector. * @example * meanVector([[1, 2], [3, 4], [5, 6]]); * // => [3, 4] * meanVector([]); * // => [] */ declare function meanVector(vectors: number[][]): number[]; /** * Finds the nearest neighbors to a given query embedding from a list of samples * based on the specified distance/similarity method. * * `'cosine'`: Cosine similarity (higher = more similar, range: [-1, 1]). * * `'euclidean'`: Euclidean distance (lower = closer, ≥ 0). * * `'manhattan'`: Manhattan distance (lower = closer, ≥ 0). * * @public * @param {number[]} queryEmbedding - The embedding vector to compare against. * @param {{ embedding: number[], label: string }[]} samples - An array of samples, each with an `embedding` and a `label`. * @param {object} [options={}] - Optional settings. * @param {number} [options.topK=1] - Number of top results to return. Default is 1. * @param {number} [options.threshold] - Minimum similarity score threshold for results (cosine) or maximum distance threshold (euclidean/manhattan). * @param {'cosine' | 'euclidean' | 'manhattan'} [options.method='cosine'] - The metric to compute: * @returns {{ embedding: number[], label: string, similarityScore?: number, distance?: number }[]} - An array of nearest neighbors with scores/distances. * @example * const samples = [ * { embedding: [1, 0], label: 'A' }, * { embedding: [0, 1], label: 'B' }, * { embedding: [1, 1], label: 'C' }, * ]; * * // Default cosine similarity * findNearestNeighbors([1, 0], samples); * // => [{ embedding: [1, 0], label: 'A', similarityScore: 1 }] * * // Euclidean distance * findNearestNeighbors([1, 0], samples, { method: 'euclidean', topK: 2 }); * // => [ * // { embedding: [1, 0], label: 'A', distance: 0 }, * // { embedding: [1, 1], label: 'C', distance: 1 } * // ] * * // Manhattan distance with threshold * findNearestNeighbors([1, 0], samples, { method: 'manhattan', threshold: 1.5 }); * // => [{ embedding: [1, 0], label: 'A', distance: 0 }, { embedding: [1, 1], label: 'C', distance: 1 }] * * // Cosine with threshold * findNearestNeighbors([1, 0], samples, { threshold: 0.9 }); * // => [{ embedding: [1, 0], label: 'A', similarityScore: 1 }] */ declare function findNearestNeighbors(queryEmbedding: number[], samples: { embedding: number[]; label: string; }[], options?: { topK?: number | undefined; threshold?: number | undefined; method?: "cosine" | "euclidean" | "manhattan" | undefined; }): { embedding: number[]; label: string; similarityScore?: number; distance?: number; }[]; /** * Ranks all samples by similarity/distance to the query embedding. * Does NOT apply threshold or topK filtering. * @public * @param {number[]} queryEmbedding - The embedding vector to compare against. * @param {{ embedding: number[], label: string }[]} samples - Samples with embeddings and labels. * @param {object} [options={}] - Optional settings. * @param {'cosine' | 'euclidean' | 'manhattan'} [options.method='cosine'] - Distance/similarity method to use. Default is 'cosine'. * @returns {{ embedding: number[], label: string, similarityScore?: number, distance?: number }[]} Sorted by best match first. * @example * const samples = [ * { embedding: [1, 0], label: 'A' }, * { embedding: [0, 1], label: 'B' }, * { embedding: [1, 1], label: 'C' }, * ]; * * // Default cosine similarity * rankBySimilarity([1, 0], samples); * // => [ * // { embedding: [1, 0], label: 'A', similarityScore: 1 }, * // { embedding: [1, 1], label: 'C', similarityScore: 0.707... }, * // { embedding: [0, 1], label: 'B', similarityScore: 0 } * // ] * * // Euclidean distance * rankBySimilarity([1, 0], samples, { method: 'euclidean' }); * // => [ * // { embedding: [1, 0], label: 'A', distance: 0 }, * // { embedding: [1, 1], label: 'C', distance: 1 }, * // { embedding: [0, 1], label: 'B', distance: 1.414... } * // ] * * // Manhattan distance * rankBySimilarity([0, 1], samples, { method: 'manhattan' }); * // => [ * // { embedding: [0, 1], label: 'B', distance: 0 }, * // { embedding: [1, 1], label: 'C', distance: 1 }, * // { embedding: [1, 0], label: 'A', distance: 2 } * // ] */ declare function rankBySimilarity(queryEmbedding: number[], samples: { embedding: number[]; label: string; }[], options?: { method?: "cosine" | "euclidean" | "manhattan" | undefined; }): { embedding: number[]; label: string; similarityScore?: number; distance?: number; }[]; export { computeCosineSimilarity, computeEuclideanDistance, computeManhattanDistance, findNearestNeighbors, isNormalized, meanVector, normalizeVector, rankBySimilarity };