@lancedb/lancedb
Version:
LanceDB: A serverless, low-latency vector database for AI applications
586 lines (583 loc) • 24 kB
TypeScript
/* tslint:disable */
/* eslint-disable */
/* auto-generated by NAPI-RS */
export interface SplitRandomOptions {
ratios?: Array<number>
counts?: Array<number>
fixed?: number
seed?: number
splitNames?: Array<string>
}
export interface SplitHashOptions {
columns: Array<string>
splitWeights: Array<number>
discardWeight?: number
splitNames?: Array<string>
}
export interface SplitSequentialOptions {
ratios?: Array<number>
counts?: Array<number>
fixed?: number
splitNames?: Array<string>
}
export interface SplitCalculatedOptions {
calculation: string
splitNames?: Array<string>
}
export interface ShuffleOptions {
seed?: number
clumpSize?: number
}
/** Create a permutation builder for the given table */
export declare function permutationBuilder(table: Table): PermutationBuilder
/** Timeout configuration for remote HTTP client. */
export interface TimeoutConfig {
/**
* The overall timeout for the entire request in seconds. This includes
* connection, send, and read time. If the entire request doesn't complete
* within this time, it will fail. Default is None (no overall timeout).
* This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
* as an integer number of seconds.
*/
timeout?: number
/**
* The timeout for establishing a connection in seconds. Default is 120
* seconds (2 minutes). This can also be set via the environment variable
* `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
*/
connectTimeout?: number
/**
* The timeout for reading data from the server in seconds. Default is 300
* seconds (5 minutes). This can also be set via the environment variable
* `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
*/
readTimeout?: number
/**
* The timeout for keeping idle connections in the connection pool in seconds.
* Default is 300 seconds (5 minutes). This can also be set via the
* environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer
* number of seconds.
*/
poolIdleTimeout?: number
}
/** Retry configuration for the remote HTTP client. */
export interface RetryConfig {
/**
* The maximum number of retries for a request. Default is 3. You can also
* set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`.
*/
retries?: number
/**
* The maximum number of retries for connection errors. Default is 3. You
* can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`.
*/
connectRetries?: number
/**
* The maximum number of retries for read errors. Default is 3. You can also
* set this via the environment variable `LANCE_CLIENT_READ_RETRIES`.
*/
readRetries?: number
/**
* The backoff factor to apply between retries. Default is 0.25. Between each retry
* the client will wait for the amount of seconds:
* `{backoff factor} * (2 ** ({number of previous retries}))`. So for the default
* of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5
* seconds, the third retry will wait 1 second, etc.
*
* You can also set this via the environment variable
* `LANCE_CLIENT_RETRY_BACKOFF_FACTOR`.
*/
backoffFactor?: number
/**
* The jitter to apply to the backoff factor, in seconds. Default is 0.25.
*
* A random value between 0 and `backoff_jitter` will be added to the backoff
* factor in seconds. So for the default of 0.25 seconds, between 0 and 250
* milliseconds will be added to the sleep between each retry.
*
* You can also set this via the environment variable
* `LANCE_CLIENT_RETRY_BACKOFF_JITTER`.
*/
backoffJitter?: number
/**
* The HTTP status codes for which to retry the request. Default is
* [429, 500, 502, 503].
*
* You can also set this via the environment variable
* `LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers.
*/
statuses?: Array<number>
}
/** TLS/mTLS configuration for the remote HTTP client. */
export interface TlsConfig {
/** Path to the client certificate file (PEM format) for mTLS authentication. */
certFile?: string
/** Path to the client private key file (PEM format) for mTLS authentication. */
keyFile?: string
/** Path to the CA certificate file (PEM format) for server verification. */
sslCaCert?: string
/** Whether to verify the hostname in the server's certificate. */
assertHostname?: boolean
}
export interface ClientConfig {
userAgent?: string
retryConfig?: RetryConfig
timeoutConfig?: TimeoutConfig
extraHeaders?: Record<string, string>
idDelimiter?: string
tlsConfig?: TlsConfig
}
export interface RerankerCallbacks {
rerankHybrid: (...args: any[]) => any
}
export interface RerankHybridCallbackArgs {
query: string
vecResults: Array<number>
ftsResults: Array<number>
}
/** A description of an index currently configured on a column */
export interface IndexConfig {
/** The name of the index */
name: string
/** The type of the index */
indexType: string
/**
* The columns in the index
*
* Currently this is always an array of size 1. In the future there may
* be more columns to represent composite indices.
*/
columns: Array<string>
}
/** Statistics about a compaction operation. */
export interface CompactionStats {
/** The number of fragments removed */
fragmentsRemoved: number
/** The number of new, compacted fragments added */
fragmentsAdded: number
/** The number of data files removed */
filesRemoved: number
/** The number of new, compacted data files added */
filesAdded: number
}
/** Statistics about a cleanup operation */
export interface RemovalStats {
/** The number of bytes removed */
bytesRemoved: number
/** The number of old versions removed */
oldVersionsRemoved: number
}
/** Statistics about an optimize operation */
export interface OptimizeStats {
/** Statistics about the compaction operation */
compaction: CompactionStats
/** Statistics about the removal operation */
prune: RemovalStats
}
/**
* A definition of a column alteration. The alteration changes the column at
* `path` to have the new name `name`, to be nullable if `nullable` is true,
* and to have the data type `data_type`. At least one of `rename` or `nullable`
* must be provided.
*/
export interface ColumnAlteration {
/**
* The path to the column to alter. This is a dot-separated path to the column.
* If it is a top-level column then it is just the name of the column. If it is
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
* `c` nested inside a column `b` nested inside a column `a`.
*/
path: string
/**
* The new name of the column. If not provided then the name will not be changed.
* This must be distinct from the names of all other columns in the table.
*/
rename?: string
/**
* A new data type for the column. If not provided then the data type will not be changed.
* Changing data types is limited to casting to the same general type. For example, these
* changes are valid:
* * `int32` -> `int64` (integers)
* * `double` -> `float` (floats)
* * `string` -> `large_string` (strings)
* But these changes are not:
* * `int32` -> `double` (mix integers and floats)
* * `string` -> `int32` (mix strings and integers)
*/
dataType?: string
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
nullable?: boolean
}
/** A definition of a new column to add to a table. */
export interface AddColumnsSql {
/** The name of the new column. */
name: string
/**
* The values to populate the new column with, as a SQL expression.
* The expression can reference other columns in the table.
*/
valueSql: string
}
export interface IndexStatistics {
/** The number of rows indexed by the index */
numIndexedRows: number
/** The number of rows not indexed */
numUnindexedRows: number
/** The type of the index */
indexType: string
/**
* The type of the distance function used by the index. This is only
* present for vector indices. Scalar and full text search indices do
* not have a distance function.
*/
distanceType?: string
/** The number of parts this index is split into. */
numIndices?: number
/**
* The KMeans loss value of the index,
* it is only present for vector indices.
*/
loss?: number
}
export interface TableStatistics {
/** The total number of bytes in the table */
totalBytes: number
/** The number of rows in the table */
numRows: number
/** The number of indices in the table */
numIndices: number
/** Statistics on table fragments */
fragmentStats: FragmentStatistics
}
export interface FragmentStatistics {
/** The number of fragments in the table */
numFragments: number
/** The number of uncompacted fragments in the table */
numSmallFragments: number
/** Statistics on the number of rows in the table fragments */
lengths: FragmentSummaryStats
}
export interface FragmentSummaryStats {
/** The number of rows in the fragment with the fewest rows */
min: number
/** The number of rows in the fragment with the most rows */
max: number
/** The mean number of rows in the fragments */
mean: number
/** The 25th percentile of number of rows in the fragments */
p25: number
/** The 50th percentile of number of rows in the fragments */
p50: number
/** The 75th percentile of number of rows in the fragments */
p75: number
/** The 99th percentile of number of rows in the fragments */
p99: number
}
export interface Version {
version: number
timestamp: number
metadata: Record<string, string>
}
export interface UpdateResult {
rowsUpdated: number
version: number
}
export interface AddResult {
version: number
}
export interface DeleteResult {
version: number
}
export interface MergeResult {
version: number
numInsertedRows: number
numUpdatedRows: number
numDeletedRows: number
numAttempts: number
}
export interface AddColumnsResult {
version: number
}
export interface AlterColumnsResult {
version: number
}
export interface DropColumnsResult {
version: number
}
export interface ConnectionOptions {
/**
* (For LanceDB OSS only): The interval, in seconds, at which to check for
* updates to the table from other processes. If None, then consistency is not
* checked. For performance reasons, this is the default. For strong
* consistency, set this to zero seconds. Then every read will check for
* updates from other processes. As a compromise, you can set this to a
* non-zero value for eventual consistency. If more than that interval
* has passed since the last check, then the table will be checked for updates.
* Note: this consistency only applies to read operations. Write operations are
* always consistent.
*/
readConsistencyInterval?: number
/**
* (For LanceDB OSS only): configuration for object storage.
*
* The available options are described at https://lancedb.com/docs/storage/
*/
storageOptions?: Record<string, string>
/**
* (For LanceDB OSS only): the session to use for this connection. Holds
* shared caches and other session-specific state.
*/
session?: Session
/** (For LanceDB cloud only): configuration for the remote HTTP client. */
clientConfig?: ClientConfig
/**
* (For LanceDB cloud only): the API key to use with LanceDB Cloud.
*
* Can also be set via the environment variable `LANCEDB_API_KEY`.
*/
apiKey?: string
/**
* (For LanceDB cloud only): the region to use for LanceDB cloud.
* Defaults to 'us-east-1'.
*/
region?: string
/**
* (For LanceDB cloud only): the host to use for LanceDB cloud. Used
* for testing purposes.
*/
hostOverride?: string
}
export interface OpenTableOptions {
storageOptions?: Record<string, string>
}
export class Connection {
/** Create a new Connection instance from the given URI. */
static new(uri: string, options: ConnectionOptions, headerProvider?: JsHeaderProvider | undefined | null): Promise<Connection>
display(): string
isOpen(): boolean
close(): void
/** List all tables in the dataset. */
tableNames(namespace: Array<string>, startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
/**
* Create table from a Apache Arrow IPC (file) buffer.
*
* Parameters:
* - name: The name of the table.
* - buf: The buffer containing the IPC file.
*
*/
createTable(name: string, buf: Buffer, mode: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null): Promise<Table>
openTable(name: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
cloneTable(targetTableName: string, sourceUri: string, targetNamespace: Array<string>, sourceVersion: number | undefined | null, sourceTag: string | undefined | null, isShallow: boolean): Promise<Table>
/** Drop table with the name. Or raise an error if the table does not exist. */
dropTable(name: string, namespace: Array<string>): Promise<void>
dropAllTables(namespace: Array<string>): Promise<void>
}
/**
* JavaScript HeaderProvider implementation that wraps a JavaScript callback.
* This is the only native header provider - all header provider implementations
* should provide a JavaScript function that returns headers.
*/
export class JsHeaderProvider {
/** Create a new JsHeaderProvider from a JavaScript callback */
constructor(getHeadersCallback: (...args: any[]) => any)
}
export class Index {
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
static ivfRq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
static btree(): Index
static bitmap(): Index
static labelList(): Index
static fts(withPosition?: boolean | undefined | null, baseTokenizer?: string | undefined | null, language?: string | undefined | null, maxTokenLength?: number | undefined | null, lowerCase?: boolean | undefined | null, stem?: boolean | undefined | null, removeStopWords?: boolean | undefined | null, asciiFolding?: boolean | undefined | null, ngramMinLength?: number | undefined | null, ngramMaxLength?: number | undefined | null, prefixOnly?: boolean | undefined | null): Index
static hnswPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
static hnswSq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
}
/** Typescript-style Async Iterator over RecordBatches */
export class RecordBatchIterator {
next(): Promise<Buffer | null>
}
/** A builder used to create and run a merge insert operation */
export class NativeMergeInsertBuilder {
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
setTimeout(timeout: number): void
useIndex(useIndex: boolean): NativeMergeInsertBuilder
execute(buf: Buffer): Promise<MergeResult>
}
export class PermutationBuilder {
persist(connection: Connection, tableName: string): PermutationBuilder
/** Configure random splits */
splitRandom(options: SplitRandomOptions): PermutationBuilder
/** Configure hash-based splits */
splitHash(options: SplitHashOptions): PermutationBuilder
/** Configure sequential splits */
splitSequential(options: SplitSequentialOptions): PermutationBuilder
/** Configure calculated splits */
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder
/** Configure shuffling */
shuffle(options: ShuffleOptions): PermutationBuilder
/** Configure filtering */
filter(filter: string): PermutationBuilder
/** Execute the permutation builder and create the table */
execute(): Promise<Table>
}
export class Query {
onlyIf(predicate: string): void
fullTextSearch(query: object): void
select(columns: Array<[string, string]>): void
selectColumns(columns: Array<string>): void
limit(limit: number): void
offset(offset: number): void
nearestTo(vector: Float32Array): VectorQuery
fastSearch(): void
withRowId(): void
outputSchema(): Promise<Buffer>
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
explainPlan(verbose: boolean): Promise<string>
analyzePlan(): Promise<string>
}
export class VectorQuery {
column(column: string): void
addQueryVector(vector: Float32Array): void
distanceType(distanceType: string): void
postfilter(): void
refineFactor(refineFactor: number): void
nprobes(nprobe: number): void
minimumNprobes(minimumNprobe: number): void
maximumNprobes(maximumNprobes: number): void
distanceRange(lowerBound?: number | undefined | null, upperBound?: number | undefined | null): void
ef(ef: number): void
bypassVectorIndex(): void
onlyIf(predicate: string): void
fullTextSearch(query: object): void
select(columns: Array<[string, string]>): void
selectColumns(columns: Array<string>): void
limit(limit: number): void
offset(offset: number): void
fastSearch(): void
withRowId(): void
rerank(callbacks: RerankerCallbacks): void
outputSchema(): Promise<Buffer>
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
explainPlan(verbose: boolean): Promise<string>
analyzePlan(): Promise<string>
}
export class TakeQuery {
select(columns: Array<[string, string]>): void
selectColumns(columns: Array<string>): void
withRowId(): void
outputSchema(): Promise<Buffer>
execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator>
explainPlan(verbose: boolean): Promise<string>
analyzePlan(): Promise<string>
}
export class JsFullTextQuery {
static matchQuery(query: string, column: string, boost: number, fuzziness: number | undefined | null, maxExpansions: number, operator: string, prefixLength: number): JsFullTextQuery
static phraseQuery(query: string, column: string, slop: number): JsFullTextQuery
static boostQuery(positive: JsFullTextQuery, negative: JsFullTextQuery, negativeBoost?: number | undefined | null): JsFullTextQuery
static multiMatchQuery(query: string, columns: Array<string>, boosts: Array<number> | undefined | null, operator: string): JsFullTextQuery
static booleanQuery(queries: Array<[string, JsFullTextQuery]>): JsFullTextQuery
get queryType(): string
}
/**
* Reranker implementation that "wraps" a NodeJS Reranker implementation.
* This contains references to the callbacks that can be used to invoke the
* reranking methods on the NodeJS implementation and handles serializing the
* record batches to Arrow IPC buffers.
*/
export class Reranker {
static new(callbacks: RerankerCallbacks): Reranker
}
export type RRFReranker = RrfReranker
/** Wrapper around rust RRFReranker */
export class RrfReranker {
static tryNew(k: Float32Array): Promise<RrfReranker>
rerankHybrid(query: string, vecResults: Buffer, ftsResults: Buffer): Promise<Buffer>
}
/**
* A session for managing caches and object stores across LanceDB operations.
*
* Sessions allow you to configure cache sizes for index and metadata caches,
* which can significantly impact memory use and performance. They can
* also be re-used across multiple connections to share the same cache state.
*/
export class Session {
/**
* Create a new session with custom cache sizes.
*
* # Parameters
*
* - `index_cache_size_bytes`: The size of the index cache in bytes.
* Index data is stored in memory in this cache to speed up queries.
* Defaults to 6GB if not specified.
* - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
* The metadata cache stores file metadata and schema information in memory.
* This cache improves scan and write performance.
* Defaults to 1GB if not specified.
*/
constructor(indexCacheSizeBytes?: bigint | undefined | null, metadataCacheSizeBytes?: bigint | undefined | null)
/**
* Create a session with default cache sizes.
*
* This is equivalent to creating a session with 6GB index cache
* and 1GB metadata cache.
*/
static default(): Session
/** Get the current size of the session caches in bytes. */
sizeBytes(): bigint
/** Get the approximate number of items cached in the session. */
approxNumItems(): number
}
export class Table {
name: string
display(): string
isOpen(): boolean
close(): void
/** Return Schema as empty Arrow IPC file. */
schema(): Promise<Buffer>
add(buf: Buffer, mode: string): Promise<AddResult>
countRows(filter?: string | undefined | null): Promise<number>
delete(predicate: string): Promise<DeleteResult>
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null, waitTimeoutS?: number | undefined | null, name?: string | undefined | null, train?: boolean | undefined | null): Promise<void>
dropIndex(indexName: string): Promise<void>
prewarmIndex(indexName: string): Promise<void>
waitForIndex(indexNames: Array<string>, timeoutS: number): Promise<void>
stats(): Promise<TableStatistics>
initialStorageOptions(): Promise<Record<string, string> | null>
latestStorageOptions(): Promise<Record<string, string> | null>
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<UpdateResult>
query(): Query
takeOffsets(offsets: Array<number>): TakeQuery
takeRowIds(rowIds: Array<bigint>): TakeQuery
vectorSearch(vector: Float32Array): VectorQuery
addColumns(transforms: Array<AddColumnsSql>): Promise<AddColumnsResult>
alterColumns(alterations: Array<ColumnAlteration>): Promise<AlterColumnsResult>
dropColumns(columns: Array<string>): Promise<DropColumnsResult>
version(): Promise<number>
checkout(version: number): Promise<void>
checkoutTag(tag: string): Promise<void>
checkoutLatest(): Promise<void>
listVersions(): Promise<Array<Version>>
restore(): Promise<void>
tags(): Promise<Tags>
optimize(olderThanMs?: number | undefined | null, deleteUnverified?: boolean | undefined | null): Promise<OptimizeStats>
listIndices(): Promise<Array<IndexConfig>>
indexStats(indexName: string): Promise<IndexStatistics | null>
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
usesV2ManifestPaths(): Promise<boolean>
migrateManifestPathsV2(): Promise<void>
}
export class TagContents {
version: number
manifestSize: number
}
export class Tags {
list(): Promise<Record<string, TagContents>>
getVersion(tag: string): Promise<number>
create(tag: string, version: number): Promise<void>
delete(tag: string): Promise<void>
update(tag: string, version: number): Promise<void>
}