usearch
Version:
Smaller & Faster Single-File Vector Search Engine from Unum
504 lines (503 loc) • 22.5 kB
JavaScript
import build from "node-gyp-build";
import * as path from "path";
import { existsSync } from "fs";
import { getFileName, getRoot } from "bindings";
const compiled = build(getBuildDir(getDirName()));
/**
* Enumeration representing the various metric kinds used to measure the distance between vectors in the index.
* @enum {string}
* @readonly
*/
export var MetricKind;
(function (MetricKind) {
MetricKind["Unknown"] = "unknown";
MetricKind["Cos"] = "cos";
MetricKind["IP"] = "ip";
MetricKind["L2sq"] = "l2sq";
MetricKind["Haversine"] = "haversine";
MetricKind["Divergence"] = "divergence";
MetricKind["Pearson"] = "pearson";
MetricKind["Jaccard"] = "jaccard";
MetricKind["Hamming"] = "hamming";
MetricKind["Tanimoto"] = "tanimoto";
MetricKind["Sorensen"] = "sorensen";
})(MetricKind || (MetricKind = {}));
/**
* Enumeration representing the various scalar kinds used to define the type of scalar values in vectors.
* @enum {string}
* @readonly
*/
export var ScalarKind;
(function (ScalarKind) {
ScalarKind["Unknown"] = "unknown";
ScalarKind["F32"] = "f32";
ScalarKind["F64"] = "f64";
ScalarKind["F16"] = "f16";
ScalarKind["BF16"] = "bf16";
ScalarKind["I8"] = "i8";
ScalarKind["B1"] = "b1";
})(ScalarKind || (ScalarKind = {}));
/**
* Represents a set of search results.
*/
export class Matches {
keys;
distances;
/**
* Constructs a Matches object.
*
* @param {BigUint64Array} keys - The keys of the nearest neighbors found.
* @param {Float32Array} distances - The distances of the nearest neighbors found.
*/
constructor(keys, distances) {
this.keys = keys;
this.distances = distances;
}
}
/**
* Represents a set of batched search results.
*/
export class BatchMatches {
keys;
distances;
counts;
k;
/**
* Constructs a BatchMatches object.
*
* @param {BigUint64Array} keys - The keys of the nearest neighbors found in the batch.
* @param {Float32Array} distances - The distances of the nearest neighbors found in the batch.
* @param {BigUint64Array} counts - The number of neighbors found for each query in the batch.
* @param {number} k - The limit for search results per query in the batch.
*/
constructor(keys, distances, counts, k) {
this.keys = keys;
this.distances = distances;
this.counts = counts;
this.k = k;
}
/**
* Retrieves a Matches object at the specified index in the batch.
*
* @param {number} i - The index at which to retrieve the Matches object.
* @returns {Matches} - A Matches object representing the search results at the specified index in the batch.
*/
get(i) {
const index = Number(i) * Number(this.k);
const count = Number(this.counts[i]);
const keysSlice = this.keys.slice(index, index + count);
const distancesSlice = this.distances.slice(index, index + count);
return new Matches(keysSlice, distancesSlice);
}
}
function isOneKey(keys) {
return ((!Number.isNaN(keys) && typeof keys === "number") ||
typeof keys === "bigint");
}
function normalizeKeys(keys) {
if (keys instanceof BigUint64Array) {
return keys;
}
let normalizedKeys;
if ((typeof keys === "number" && !Number.isNaN(keys)) ||
typeof keys === "bigint") {
normalizedKeys = BigUint64Array.of(BigInt(keys));
}
else if (Array.isArray(keys)) {
const bigintkeys = keys.map((key) => {
if (typeof key === "bigint") {
return key;
}
else if (typeof key === "number" &&
!Number.isNaN(key) &&
Number.isInteger(key) &&
key >= 0) {
return BigInt(key);
}
throw new Error("All keys must be positive integers or bigints.");
});
normalizedKeys = BigUint64Array.from(bigintkeys);
}
else {
throw new Error("Keys must be a number, bigint, an array of numbers or bigints, or a BigUint64Array.");
}
return normalizedKeys;
}
function isVector(vectors) {
return (vectors instanceof Float32Array ||
vectors instanceof Float64Array ||
vectors instanceof Int8Array);
}
function normalizeVectors(vectors, dimensions, targetType = Float32Array) {
let flattenedVectors;
if (isVector(vectors)) {
flattenedVectors =
vectors.constructor === targetType
? vectors
: new targetType(vectors);
}
else if (Array.isArray(vectors)) {
let totalLength = 0;
for (const vec of vectors)
totalLength += vec.length;
flattenedVectors = new targetType(totalLength);
let offset = 0;
for (const vec of vectors) {
flattenedVectors.set(vec, offset);
offset += vec.length;
}
}
else {
throw new Error("Vectors must be a TypedArray or an array of arrays.");
}
if (flattenedVectors.length % dimensions !== 0)
throw new Error("The size of the flattened vectors must be a multiple of the dimension of the vectors.");
return flattenedVectors;
}
export class Index {
/**
* Constructs a new index.
*
* @param {(number | {dimensions: number, metric: MetricKind = MetricKind.Cos, quantization: ScalarKind = ScalarKind.F32, connectivity: number = 0, expansion_add: number = 0, expansion_search: number = 0, multi: boolean = false})} dimensionsOrConfigs
* @param {MetricKind} [metric=MetricKind.Cos] - Optional, default is 'cos'.
* @param {ScalarKind} [quantization=ScalarKind.F32] - Optional, default is 'f32'.
* @param {number} [connectivity=0] - Optional, default is 0.
* @param {number} [expansion_add=0] - Optional, default is 0.
* @param {number} [expansion_search=0] - Optional, default is 0.
* @param {boolean} [multi=false] - Optional, default is false.
* @throws Will throw an error if any of the parameters are of incorrect type or invalid value.
*/
constructor(dimensionsOrConfigs, metric = MetricKind.Cos, quantization = ScalarKind.F32, connectivity = 0, expansion_add = 0, expansion_search = 0, multi = false) {
let dimensions;
if ((typeof dimensionsOrConfigs === "number" &&
!Number.isNaN(dimensionsOrConfigs)) ||
typeof dimensionsOrConfigs === "bigint") {
// Parameters are provided as individual arguments
dimensions = dimensionsOrConfigs;
}
else if (typeof dimensionsOrConfigs === "object" &&
dimensionsOrConfigs !== null) {
// Parameters are provided as an object
({
dimensions,
metric = MetricKind.Cos,
quantization = ScalarKind.F32,
connectivity = 0,
expansion_add = 0,
expansion_search = 0,
multi = false,
} = dimensionsOrConfigs);
}
else {
throw new Error("Invalid arguments. Expected either individual arguments or a single object argument.");
}
if ((typeof dimensions !== 'bigint' && (!Number.isInteger(dimensions) || dimensions <= 0)) ||
(typeof connectivity !== 'bigint' && (!Number.isInteger(connectivity) || connectivity < 0)) ||
(typeof expansion_add !== 'bigint' && (!Number.isInteger(expansion_add) || expansion_add < 0)) ||
(typeof expansion_search !== 'bigint' && (!Number.isInteger(expansion_search) || expansion_search < 0))) {
throw new Error("`dimensions`, `connectivity`, `expansion_add`, and `expansion_search` must be non-negative integers, with `dimensions` being positive.");
}
if (typeof multi !== "boolean") {
throw new Error("`multi` must be a boolean value.");
}
if (!Object.values(MetricKind).includes(metric)) {
throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(", ")}`);
}
if (!Object.values(ScalarKind).includes(quantization)) {
throw new Error(`Invalid quantization: ${quantization}. It must be one of: ${Object.values(ScalarKind).join(", ")}`);
}
// @ts-expect-error
this.#compiledIndex = new compiled.CompiledIndex(dimensions, metric, quantization, connectivity, expansion_add, expansion_search, multi);
}
#compiledIndex;
/**
* Add vectors to the index.
*
* This method accepts vectors and their corresponding keys for indexing.
* Each key should correspond to a vector. If a single key is provided,
* it is broadcasted to match the number of provided vectors.
*
* Vectors should be provided as a flat typed array representing a matrix
* where each row is a vector to be indexed. The matrix should have a size
* of n * d, where n is the number of vectors, and d is the dimensionality
* of the vectors.
*
* Keys should be provided as a BigInt or an array-like object of BigInts
* representing the unique identifier for each vector.
*
* @param {bigint|bigint[]|BigUint64Array} keys - Input identifiers for every vector.
* If a single key is provided, it is associated with all provided vectors.
* @param {Float32Array|Float64Array|Int8Array} vectors - Input matrix representing vectors,
* matrix of size n * d, where n is the number of vectors, and d is their dimensionality.
* @throws Will throw an error if the length of keys doesn't match the number of vectors
* or if it's not a single key.
*/
add(keys, vectors) {
let normalizedKeys = normalizeKeys(keys);
let normalizedVectors = normalizeVectors(vectors, this.#compiledIndex.dimensions());
let countVectors = normalizedVectors.length / this.#compiledIndex.dimensions();
// If a single key is provided but there are multiple vectors,
// broadcast the single key value to match the number of vectors
if (normalizedKeys.length === 1 && countVectors > 1) {
normalizedKeys = BigUint64Array.from({ length: countVectors }, () => normalizedKeys[0]);
}
else if (normalizedKeys.length !== countVectors) {
throw new Error(`The length of keys (${normalizedKeys.length}) must match the number of vectors (${countVectors}) or be a single key.`);
}
// Call the compiled method
this.#compiledIndex.add(normalizedKeys, normalizedVectors);
}
/**
* Perform a k-nearest neighbor search on the index.
*
* This method accepts a matrix of query vectors and returns the closest vectors
* from the index for each query. The method returns an object containing the keys,
* distances, and counts of the matches found.
*
* Vectors should be provided as a flat typed array representing a matrix where
* each row is a vector. The matrix should be of size n * d, where n is the
* number of query vectors, and d is their dimensionality.
*
* The parameter `k` specifies the number of nearest neighbors to return for each
* query vector. If there are not enough results for a query, the result array is
* padded with -1s.
*
* @param {Float32Array|Float64Array|Int8Array|Array<Array<number>>} vectors - Input matrix representing query vectors, can be a TypedArray or an array of TypedArray.
* @param {number} k - The number of nearest neighbors to search for each query vector.
* @return {Matches|BatchMatches} - Search results for one or more queries, containing keys, distances, and counts of the matches found.
* @throws Will throw an error if `k` is not a positive integer or if the size of the vectors is not a multiple of dimensions.
* @throws Will throw an error if `vectors` is not a valid input type (TypedArray or an array of TypedArray) or if its flattened size is not a multiple of dimensions.
*/
search(vectors, k) {
if ((!Number.isNaN(k) && typeof k !== "number") || k <= 0) {
throw new Error("`k` must be a positive integer representing the number of nearest neighbors to search for.");
}
const normalizedVectors = normalizeVectors(vectors, this.#compiledIndex.dimensions());
// Call the compiled method and create Matches or BatchMatches object with the result
const result = this.#compiledIndex.search(normalizedVectors, k);
const countInQueries = normalizedVectors.length / Number(this.#compiledIndex.dimensions());
const batchMatches = new BatchMatches(...result, k);
if (countInQueries === 1) {
return batchMatches.get(0);
}
else {
return batchMatches;
}
}
/**
* Verifies the presence of one or more keys in the index.
*
* This method accepts one or multiple keys as input and returns a boolean or
* an array of booleans indicating whether each key is present in the index.
*
* @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be checked for presence in the index.
* @return {boolean|boolean[]} - Returns true if a single key is contained in the index, false otherwise. Returns an array of booleans corresponding to the presence of each key in the index when multiple keys are provided.
* @throws Will throw an error if keys are not integers.
*/
contains(keys) {
let normalizedKeys = normalizeKeys(keys);
let normalizedResults = this.#compiledIndex.contains(normalizedKeys);
if (isOneKey(keys))
return normalizedResults[0];
else
return normalizedResults;
}
/**
* Counts the number of times keys shows up in the index.
*
* @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be enumerated.
* @return {number|number[]} - Returns the number of vectors found when a single key is provided. Returns an array of big integers corresponding to the number of vectors found for each key when multiple keys are provided.
* @throws Will throw an error if keys are not integers.
*/
count(keys) {
let normalizedKeys = normalizeKeys(keys);
let normalizedResults = this.#compiledIndex.count(normalizedKeys);
if (isOneKey(keys))
return normalizedResults[0];
else
return normalizedResults;
}
/**
* Removes one or multiple vectors from the index.
*
* This method accepts one or multiple keys as input and removes the corresponding vectors from the index.
* It returns the number of vectors actually removed for each key provided.
*
* @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be removed.
* @return {number|number[]} - Returns the number of vectors deleted when a single key is provided. Returns an array of big integers corresponding to the number of vectors deleted for each key when multiple keys are provided.
* @throws Will throw an error if keys are not integers.
*/
remove(keys) {
let normalizedKeys = normalizeKeys(keys);
let normalizedResults = this.#compiledIndex.remove(normalizedKeys);
if (isOneKey(keys))
return normalizedResults[0];
else
return normalizedResults;
}
/**
* Returns the dimensionality of vectors.
* @return {number} The dimensionality of vectors.
*/
dimensions() {
return this.#compiledIndex.dimensions();
}
/**
* Returns connectivity.
* @return {number} The connectivity of index.
*/
connectivity() {
return this.#compiledIndex.connectivity();
}
/**
* Returns the number of vectors currently indexed.
* @return {number} The number of vectors currently indexed.
*/
size() {
return this.#compiledIndex.size();
}
/**
* Returns index capacity.
* @return {number} The capacity of index.
*/
capacity() {
return this.#compiledIndex.capacity();
}
/**
* Write index to a file.
* @param {string} path File path to write.
* @throws Will throw an error if `path` is not a string.
*/
save(path) {
if (typeof path !== "string")
throw new Error("`path` must be a string representing the file path to write.");
this.#compiledIndex.save(path);
}
/**
* Load index from a file.
* @param {string} path File path to read.
* @throws Will throw an error if `path` is not a string.
*/
load(path) {
if (typeof path !== "string")
throw new Error("`path` must be a string representing the file path to read.");
this.#compiledIndex.load(path);
}
/**
* View index from a file, without loading into RAM.
* @param {string} path File path to read.
* @throws Will throw an error if `path` is not a string.
*/
view(path) {
if (typeof path !== "string")
throw new Error("`path` must be a string representing the file path to read.");
this.#compiledIndex.view(path);
}
}
/**
* Performs an exact search on the given dataset to find the best matching vectors for each query.
*
* @param {Float32Array|Float64Array|Int8Array|Array<Array<number>>} dataset - The dataset containing vectors to be searched. It can be a TypedArray or an array of arrays.
* @param {Float32Array|Float64Array|Int8Array|Array<Array<number>>} queries - The queries containing vectors to search for in the dataset. It can be a TypedArray or an array of arrays.
* @param {number} dimensions - The dimensionality of the vectors in both the dataset and the queries. It defines the number of elements in each vector.
* @param {number} count - The number of nearest neighbors to return for each query. If the dataset contains fewer vectors than the specified count, the result will contain only the available vectors.
* @param {MetricKind} metric - The distance metric to be used for the search.
* @return {Matches|BatchMatches} - Returns a `Matches` or `BatchMatches` object containing the results of the search.
* @throws Will throw an error if `dimensions` and `count` are not positive integers.
* @throws Will throw an error if `metric` is not a valid MetricKind.
* @throws Will throw an error if `dataset` and `queries` are not valid input types (TypedArray or an array of arrays).
* @throws Will throw an error if the sizes of the flattened `dataset` and `queries` are not multiples of `dimensions`.
* @throws Will throw an error if `count` is greater than the number of vectors in the `dataset`.
*
* @example
* const dataset = [[1.0, 2.0], [3.0, 4.0]]; // Two vectors: [1.0, 2.0] and [3.0, 4.0]
* const queries = [[1.5, 2.5]]; // One vector: [1.5, 2.5]
* const dimensions = 2; // The number of elements in each vector.
* const count = 1; // The number of nearest neighbors to return for each query.
* const metric = MetricKind.IP; // Using the Inner Product distance metric.
*
* const result = exactSearch(dataset, queries, dimensions, count, metric);
* // result might be:
* // {
* // keys: BigUint64Array [ 1n ],
* // distances: Float32Array [ some_value ],
* // }
*/
function exactSearch(dataset, queries, dimensions, count, metric) {
// Validate and normalize the dimensions and count
dimensions = Number(dimensions);
count = Number(count);
if (count <= 0 || dimensions <= 0) {
throw new Error("Dimensions and count must be positive integers.");
}
// Validate metric
if (!Object.values(MetricKind).includes(metric)) {
throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(", ")}`);
}
// Flatten and normalize dataset and queries if they are arrays of arrays
let targetType;
if (dataset instanceof Float64Array)
targetType = Float64Array;
else if (dataset instanceof Int8Array)
targetType = Int8Array;
else
targetType = Float32Array; // default to Float32Array if dataset is not Float64Array or Int8Array
dataset = normalizeVectors(dataset, dimensions, targetType);
queries = normalizeVectors(queries, dimensions, targetType);
const countInDataset = dataset.length / dimensions;
const countInQueries = queries.length / dimensions;
if (count > countInDataset) {
throw new Error("Count must be equal or smaller than the number of vectors in the dataset.");
}
// Call the compiled function with the normalized input
const result = compiled.exactSearch(dataset, queries, dimensions, count, metric);
// Create and return a Matches or BatchMatches object with the result
if (countInQueries == 1) {
return new Matches(result[0], result[1]);
}
else {
return new BatchMatches(...result, count);
}
}
const usearch = {
Index,
MetricKind,
ScalarKind,
Matches,
BatchMatches,
exactSearch,
};
export default usearch;
// utility functions to help find native builds
function getBuildDir(dir) {
if (existsSync(path.join(dir, "build")))
return dir;
if (existsSync(path.join(dir, "prebuilds")))
return dir;
if (path.basename(dir) === ".next") {
// special case for next.js on custom node (not vercel)
const sideways = path.join(dir, "..", "node_modules", "usearch");
if (existsSync(sideways))
return getBuildDir(sideways);
}
if (dir === "/")
throw new Error("Could not find native build for usearch");
return getBuildDir(path.join(dir, ".."));
}
function getDirName() {
try {
if (__dirname)
return __dirname;
}
catch (e) { }
return getRoot(getFileName());
}
// dummy code for ncc to include the native module
if (process.uptime() < 0) {
require(__dirname + "/../../../prebuilds/darwin-arm64+x64/usearch.node");
require(__dirname + "/../../../prebuilds/linux-arm64/usearch.node");
require(__dirname + "/../../../prebuilds/linux-x64/usearch.node");
require(__dirname + "/../../../prebuilds/win32-ia32/usearch.node");
require(__dirname + "/../../../prebuilds/win32-x64/usearch.node");
require(__dirname + "/../../../build/Release/usearch.node");
}