@lancedb/lancedb
Version:
LanceDB: A serverless, low-latency vector database for AI applications
157 lines (156 loc) • 6.91 kB
JavaScript
;
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
Object.defineProperty(exports, "__esModule", { value: true });
exports.Index = void 0;
const native_1 = require("./native");
class Index {
inner;
constructor(inner) {
this.inner = inner;
}
/**
* Create an IvfPq index
*
* This index stores a compressed (quantized) copy of every vector. These vectors
* are grouped into partitions of similar vectors. Each partition keeps track of
* a centroid which is the average value of all vectors in the group.
*
* During a query the centroids are compared with the query vector to find the closest
* partitions. The compressed vectors in these partitions are then searched to find
* the closest vectors.
*
* The compression scheme is called product quantization. Each vector is divided into
* subvectors and then each subvector is quantized into a small number of bits. the
* parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
* between index size (and thus search speed) and index accuracy.
*
* The partitioning process is called IVF and the `num_partitions` parameter controls how
* many groups to create.
*
* Note that training an IVF PQ index on a large dataset is a slow operation and
* currently is also a memory intensive operation.
*/
static ivfPq(options) {
return new Index(native_1.Index.ivfPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.numBits, options?.maxIterations, options?.sampleRate));
}
/**
* Create an IvfRq index
*
* IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
* and organizes them into IVF partitions.
*
* The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
* The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
* between index size (and thus search speed) and index accuracy.
*
* The partitioning process is called IVF and the `num_partitions` parameter controls how
* many groups to create.
*
* Note that training an IVF RQ index on a large dataset is a slow operation and
* currently is also a memory intensive operation.
*/
static ivfRq(options) {
return new Index(native_1.Index.ivfRq(options?.distanceType, options?.numPartitions, options?.numBits, options?.maxIterations, options?.sampleRate));
}
/**
* Create an IvfFlat index
*
* This index groups vectors into partitions of similar vectors. Each partition keeps track of
* a centroid which is the average value of all vectors in the group.
*
* During a query the centroids are compared with the query vector to find the closest
* partitions. The vectors in these partitions are then searched to find
* the closest vectors.
*
* The partitioning process is called IVF and the `num_partitions` parameter controls how
* many groups to create.
*
* Note that training an IVF FLAT index on a large dataset is a slow operation and
* currently is also a memory intensive operation.
*/
static ivfFlat(options) {
return new Index(native_1.Index.ivfFlat(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate));
}
/**
* Create a btree index
*
* A btree index is an index on a scalar columns. The index stores a copy of the column
* in sorted order. A header entry is created for each block of rows (currently the
* block size is fixed at 4096). These header entries are stored in a separate
* cacheable structure (a btree). To search for data the header is used to determine
* which blocks need to be read from disk.
*
* For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
* bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
* the correct row ids.
*
* This index is good for scalar columns with mostly distinct values and does best when
* the query is highly selective.
*
* The btree index does not currently have any parameters though parameters such as the
* block size may be added in the future.
*/
static btree() {
return new Index(native_1.Index.btree());
}
/**
* Create a bitmap index.
*
* A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
*
* This index works best for low-cardinality columns, where the number of unique values
* is small (i.e., less than a few hundreds).
*/
static bitmap() {
return new Index(native_1.Index.bitmap());
}
/**
* Create a label list index.
*
* LabelList index is a scalar index that can be used on `List<T>` columns to
* support queries with `array_contains_all` and `array_contains_any`
* using an underlying bitmap index.
*/
static labelList() {
return new Index(native_1.Index.labelList());
}
/**
* Create a full text search index
*
* A full text search index is an index on a string column, so that you can conduct full
* text searches on the column.
*
* The results of a full text search are ordered by relevance measured by BM25.
*
* You can combine filters with full text search.
*/
static fts(options) {
return new Index(native_1.Index.fts(options?.withPosition, options?.baseTokenizer, options?.language, options?.maxTokenLength, options?.lowercase, options?.stem, options?.removeStopWords, options?.asciiFolding, options?.ngramMinLength, options?.ngramMaxLength, options?.prefixOnly));
}
/**
*
* Create a hnswPq index
*
* HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
* It is a variant of the HNSW algorithm that uses product quantization to compress
* the vectors.
*
*/
static hnswPq(options) {
return new Index(native_1.Index.hnswPq(options?.distanceType, options?.numPartitions, options?.numSubVectors, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
}
/**
*
* Create a hnswSq index
*
* HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
* It is a variant of the HNSW algorithm that uses scalar quantization to compress
* the vectors.
*
*/
static hnswSq(options) {
return new Index(native_1.Index.hnswSq(options?.distanceType, options?.numPartitions, options?.maxIterations, options?.sampleRate, options?.m, options?.efConstruction));
}
}
exports.Index = Index;