UNPKG

@lancedb/lancedb

Version:

LanceDB: A serverless, low-latency vector database for AI applications

586 lines (583 loc) 24 kB
/* tslint:disable */ /* eslint-disable */ /* auto-generated by NAPI-RS */ export interface SplitRandomOptions { ratios?: Array<number> counts?: Array<number> fixed?: number seed?: number splitNames?: Array<string> } export interface SplitHashOptions { columns: Array<string> splitWeights: Array<number> discardWeight?: number splitNames?: Array<string> } export interface SplitSequentialOptions { ratios?: Array<number> counts?: Array<number> fixed?: number splitNames?: Array<string> } export interface SplitCalculatedOptions { calculation: string splitNames?: Array<string> } export interface ShuffleOptions { seed?: number clumpSize?: number } /** Create a permutation builder for the given table */ export declare function permutationBuilder(table: Table): PermutationBuilder /** Timeout configuration for remote HTTP client. */ export interface TimeoutConfig { /** * The overall timeout for the entire request in seconds. This includes * connection, send, and read time. If the entire request doesn't complete * within this time, it will fail. Default is None (no overall timeout). * This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`, * as an integer number of seconds. */ timeout?: number /** * The timeout for establishing a connection in seconds. Default is 120 * seconds (2 minutes). This can also be set via the environment variable * `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds. */ connectTimeout?: number /** * The timeout for reading data from the server in seconds. Default is 300 * seconds (5 minutes). This can also be set via the environment variable * `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds. */ readTimeout?: number /** * The timeout for keeping idle connections in the connection pool in seconds. * Default is 300 seconds (5 minutes). This can also be set via the * environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer * number of seconds. */ poolIdleTimeout?: number } /** Retry configuration for the remote HTTP client. */ export interface RetryConfig { /** * The maximum number of retries for a request. Default is 3. You can also * set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`. */ retries?: number /** * The maximum number of retries for connection errors. Default is 3. You * can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`. */ connectRetries?: number /** * The maximum number of retries for read errors. Default is 3. You can also * set this via the environment variable `LANCE_CLIENT_READ_RETRIES`. */ readRetries?: number /** * The backoff factor to apply between retries. Default is 0.25. Between each retry * the client will wait for the amount of seconds: * `{backoff factor} * (2 ** ({number of previous retries}))`. So for the default * of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5 * seconds, the third retry will wait 1 second, etc. * * You can also set this via the environment variable * `LANCE_CLIENT_RETRY_BACKOFF_FACTOR`. */ backoffFactor?: number /** * The jitter to apply to the backoff factor, in seconds. Default is 0.25. * * A random value between 0 and `backoff_jitter` will be added to the backoff * factor in seconds. So for the default of 0.25 seconds, between 0 and 250 * milliseconds will be added to the sleep between each retry. * * You can also set this via the environment variable * `LANCE_CLIENT_RETRY_BACKOFF_JITTER`. */ backoffJitter?: number /** * The HTTP status codes for which to retry the request. Default is * [429, 500, 502, 503]. * * You can also set this via the environment variable * `LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers. */ statuses?: Array<number> } /** TLS/mTLS configuration for the remote HTTP client. */ export interface TlsConfig { /** Path to the client certificate file (PEM format) for mTLS authentication. */ certFile?: string /** Path to the client private key file (PEM format) for mTLS authentication. */ keyFile?: string /** Path to the CA certificate file (PEM format) for server verification. */ sslCaCert?: string /** Whether to verify the hostname in the server's certificate. */ assertHostname?: boolean } export interface ClientConfig { userAgent?: string retryConfig?: RetryConfig timeoutConfig?: TimeoutConfig extraHeaders?: Record<string, string> idDelimiter?: string tlsConfig?: TlsConfig } export interface RerankerCallbacks { rerankHybrid: (...args: any[]) => any } export interface RerankHybridCallbackArgs { query: string vecResults: Array<number> ftsResults: Array<number> } /** A description of an index currently configured on a column */ export interface IndexConfig { /** The name of the index */ name: string /** The type of the index */ indexType: string /** * The columns in the index * * Currently this is always an array of size 1. In the future there may * be more columns to represent composite indices. */ columns: Array<string> } /** Statistics about a compaction operation. */ export interface CompactionStats { /** The number of fragments removed */ fragmentsRemoved: number /** The number of new, compacted fragments added */ fragmentsAdded: number /** The number of data files removed */ filesRemoved: number /** The number of new, compacted data files added */ filesAdded: number } /** Statistics about a cleanup operation */ export interface RemovalStats { /** The number of bytes removed */ bytesRemoved: number /** The number of old versions removed */ oldVersionsRemoved: number } /** Statistics about an optimize operation */ export interface OptimizeStats { /** Statistics about the compaction operation */ compaction: CompactionStats /** Statistics about the removal operation */ prune: RemovalStats } /** * A definition of a column alteration. The alteration changes the column at * `path` to have the new name `name`, to be nullable if `nullable` is true, * and to have the data type `data_type`. At least one of `rename` or `nullable` * must be provided. */ export interface ColumnAlteration { /** * The path to the column to alter. This is a dot-separated path to the column. * If it is a top-level column then it is just the name of the column. If it is * a nested column then it is the path to the column, e.g. "a.b.c" for a column * `c` nested inside a column `b` nested inside a column `a`. */ path: string /** * The new name of the column. If not provided then the name will not be changed. * This must be distinct from the names of all other columns in the table. */ rename?: string /** * A new data type for the column. If not provided then the data type will not be changed. * Changing data types is limited to casting to the same general type. For example, these * changes are valid: * * `int32` -> `int64` (integers) * * `double` -> `float` (floats) * * `string` -> `large_string` (strings) * But these changes are not: * * `int32` -> `double` (mix integers and floats) * * `string` -> `int32` (mix strings and integers) */ dataType?: string /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */ nullable?: boolean } /** A definition of a new column to add to a table. */ export interface AddColumnsSql { /** The name of the new column. */ name: string /** * The values to populate the new column with, as a SQL expression. * The expression can reference other columns in the table. */ valueSql: string } export interface IndexStatistics { /** The number of rows indexed by the index */ numIndexedRows: number /** The number of rows not indexed */ numUnindexedRows: number /** The type of the index */ indexType: string /** * The type of the distance function used by the index. This is only * present for vector indices. Scalar and full text search indices do * not have a distance function. */ distanceType?: string /** The number of parts this index is split into. */ numIndices?: number /** * The KMeans loss value of the index, * it is only present for vector indices. */ loss?: number } export interface TableStatistics { /** The total number of bytes in the table */ totalBytes: number /** The number of rows in the table */ numRows: number /** The number of indices in the table */ numIndices: number /** Statistics on table fragments */ fragmentStats: FragmentStatistics } export interface FragmentStatistics { /** The number of fragments in the table */ numFragments: number /** The number of uncompacted fragments in the table */ numSmallFragments: number /** Statistics on the number of rows in the table fragments */ lengths: FragmentSummaryStats } export interface FragmentSummaryStats { /** The number of rows in the fragment with the fewest rows */ min: number /** The number of rows in the fragment with the most rows */ max: number /** The mean number of rows in the fragments */ mean: number /** The 25th percentile of number of rows in the fragments */ p25: number /** The 50th percentile of number of rows in the fragments */ p50: number /** The 75th percentile of number of rows in the fragments */ p75: number /** The 99th percentile of number of rows in the fragments */ p99: number } export interface Version { version: number timestamp: number metadata: Record<string, string> } export interface UpdateResult { rowsUpdated: number version: number } export interface AddResult { version: number } export interface DeleteResult { version: number } export interface MergeResult { version: number numInsertedRows: number numUpdatedRows: number numDeletedRows: number numAttempts: number } export interface AddColumnsResult { version: number } export interface AlterColumnsResult { version: number } export interface DropColumnsResult { version: number } export interface ConnectionOptions { /** * (For LanceDB OSS only): The interval, in seconds, at which to check for * updates to the table from other processes. If None, then consistency is not * checked. For performance reasons, this is the default. For strong * consistency, set this to zero seconds. Then every read will check for * updates from other processes. As a compromise, you can set this to a * non-zero value for eventual consistency. If more than that interval * has passed since the last check, then the table will be checked for updates. * Note: this consistency only applies to read operations. Write operations are * always consistent. */ readConsistencyInterval?: number /** * (For LanceDB OSS only): configuration for object storage. * * The available options are described at https://lancedb.com/docs/storage/ */ storageOptions?: Record<string, string> /** * (For LanceDB OSS only): the session to use for this connection. Holds * shared caches and other session-specific state. */ session?: Session /** (For LanceDB cloud only): configuration for the remote HTTP client. */ clientConfig?: ClientConfig /** * (For LanceDB cloud only): the API key to use with LanceDB Cloud. * * Can also be set via the environment variable `LANCEDB_API_KEY`. */ apiKey?: string /** * (For LanceDB cloud only): the region to use for LanceDB cloud. * Defaults to 'us-east-1'. */ region?: string /** * (For LanceDB cloud only): the host to use for LanceDB cloud. Used * for testing purposes. */ hostOverride?: string } export interface OpenTableOptions { storageOptions?: Record<string, string> } export class Connection { /** Create a new Connection instance from the given URI. */ static new(uri: string, options: ConnectionOptions, headerProvider?: JsHeaderProvider | undefined | null): Promise<Connection> display(): string isOpen(): boolean close(): void /** List all tables in the dataset. */ tableNames(namespace: Array<string>, startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>> /** * Create table from a Apache Arrow IPC (file) buffer. * * Parameters: * - name: The name of the table. * - buf: The buffer containing the IPC file. * */ createTable(name: string, buf: Buffer, mode: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null): Promise<Table> createEmptyTable(name: string, schemaBuf: Buffer, mode: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null): Promise<Table> openTable(name: string, namespace: Array<string>, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table> cloneTable(targetTableName: string, sourceUri: string, targetNamespace: Array<string>, sourceVersion: number | undefined | null, sourceTag: string | undefined | null, isShallow: boolean): Promise<Table> /** Drop table with the name. Or raise an error if the table does not exist. */ dropTable(name: string, namespace: Array<string>): Promise<void> dropAllTables(namespace: Array<string>): Promise<void> } /** * JavaScript HeaderProvider implementation that wraps a JavaScript callback. * This is the only native header provider - all header provider implementations * should provide a JavaScript function that returns headers. */ export class JsHeaderProvider { /** Create a new JsHeaderProvider from a JavaScript callback */ constructor(getHeadersCallback: (...args: any[]) => any) } export class Index { static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index static ivfRq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index static ivfFlat(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index static btree(): Index static bitmap(): Index static labelList(): Index static fts(withPosition?: boolean | undefined | null, baseTokenizer?: string | undefined | null, language?: string | undefined | null, maxTokenLength?: number | undefined | null, lowerCase?: boolean | undefined | null, stem?: boolean | undefined | null, removeStopWords?: boolean | undefined | null, asciiFolding?: boolean | undefined | null, ngramMinLength?: number | undefined | null, ngramMaxLength?: number | undefined | null, prefixOnly?: boolean | undefined | null): Index static hnswPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index static hnswSq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index } /** Typescript-style Async Iterator over RecordBatches */ export class RecordBatchIterator { next(): Promise<Buffer | null> } /** A builder used to create and run a merge insert operation */ export class NativeMergeInsertBuilder { whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder whenNotMatchedInsertAll(): NativeMergeInsertBuilder whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder setTimeout(timeout: number): void useIndex(useIndex: boolean): NativeMergeInsertBuilder execute(buf: Buffer): Promise<MergeResult> } export class PermutationBuilder { persist(connection: Connection, tableName: string): PermutationBuilder /** Configure random splits */ splitRandom(options: SplitRandomOptions): PermutationBuilder /** Configure hash-based splits */ splitHash(options: SplitHashOptions): PermutationBuilder /** Configure sequential splits */ splitSequential(options: SplitSequentialOptions): PermutationBuilder /** Configure calculated splits */ splitCalculated(options: SplitCalculatedOptions): PermutationBuilder /** Configure shuffling */ shuffle(options: ShuffleOptions): PermutationBuilder /** Configure filtering */ filter(filter: string): PermutationBuilder /** Execute the permutation builder and create the table */ execute(): Promise<Table> } export class Query { onlyIf(predicate: string): void fullTextSearch(query: object): void select(columns: Array<[string, string]>): void selectColumns(columns: Array<string>): void limit(limit: number): void offset(offset: number): void nearestTo(vector: Float32Array): VectorQuery fastSearch(): void withRowId(): void outputSchema(): Promise<Buffer> execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator> explainPlan(verbose: boolean): Promise<string> analyzePlan(): Promise<string> } export class VectorQuery { column(column: string): void addQueryVector(vector: Float32Array): void distanceType(distanceType: string): void postfilter(): void refineFactor(refineFactor: number): void nprobes(nprobe: number): void minimumNprobes(minimumNprobe: number): void maximumNprobes(maximumNprobes: number): void distanceRange(lowerBound?: number | undefined | null, upperBound?: number | undefined | null): void ef(ef: number): void bypassVectorIndex(): void onlyIf(predicate: string): void fullTextSearch(query: object): void select(columns: Array<[string, string]>): void selectColumns(columns: Array<string>): void limit(limit: number): void offset(offset: number): void fastSearch(): void withRowId(): void rerank(callbacks: RerankerCallbacks): void outputSchema(): Promise<Buffer> execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator> explainPlan(verbose: boolean): Promise<string> analyzePlan(): Promise<string> } export class TakeQuery { select(columns: Array<[string, string]>): void selectColumns(columns: Array<string>): void withRowId(): void outputSchema(): Promise<Buffer> execute(maxBatchLength?: number | undefined | null, timeoutMs?: number | undefined | null): Promise<RecordBatchIterator> explainPlan(verbose: boolean): Promise<string> analyzePlan(): Promise<string> } export class JsFullTextQuery { static matchQuery(query: string, column: string, boost: number, fuzziness: number | undefined | null, maxExpansions: number, operator: string, prefixLength: number): JsFullTextQuery static phraseQuery(query: string, column: string, slop: number): JsFullTextQuery static boostQuery(positive: JsFullTextQuery, negative: JsFullTextQuery, negativeBoost?: number | undefined | null): JsFullTextQuery static multiMatchQuery(query: string, columns: Array<string>, boosts: Array<number> | undefined | null, operator: string): JsFullTextQuery static booleanQuery(queries: Array<[string, JsFullTextQuery]>): JsFullTextQuery get queryType(): string } /** * Reranker implementation that "wraps" a NodeJS Reranker implementation. * This contains references to the callbacks that can be used to invoke the * reranking methods on the NodeJS implementation and handles serializing the * record batches to Arrow IPC buffers. */ export class Reranker { static new(callbacks: RerankerCallbacks): Reranker } export type RRFReranker = RrfReranker /** Wrapper around rust RRFReranker */ export class RrfReranker { static tryNew(k: Float32Array): Promise<RrfReranker> rerankHybrid(query: string, vecResults: Buffer, ftsResults: Buffer): Promise<Buffer> } /** * A session for managing caches and object stores across LanceDB operations. * * Sessions allow you to configure cache sizes for index and metadata caches, * which can significantly impact memory use and performance. They can * also be re-used across multiple connections to share the same cache state. */ export class Session { /** * Create a new session with custom cache sizes. * * # Parameters * * - `index_cache_size_bytes`: The size of the index cache in bytes. * Index data is stored in memory in this cache to speed up queries. * Defaults to 6GB if not specified. * - `metadata_cache_size_bytes`: The size of the metadata cache in bytes. * The metadata cache stores file metadata and schema information in memory. * This cache improves scan and write performance. * Defaults to 1GB if not specified. */ constructor(indexCacheSizeBytes?: bigint | undefined | null, metadataCacheSizeBytes?: bigint | undefined | null) /** * Create a session with default cache sizes. * * This is equivalent to creating a session with 6GB index cache * and 1GB metadata cache. */ static default(): Session /** Get the current size of the session caches in bytes. */ sizeBytes(): bigint /** Get the approximate number of items cached in the session. */ approxNumItems(): number } export class Table { name: string display(): string isOpen(): boolean close(): void /** Return Schema as empty Arrow IPC file. */ schema(): Promise<Buffer> add(buf: Buffer, mode: string): Promise<AddResult> countRows(filter?: string | undefined | null): Promise<number> delete(predicate: string): Promise<DeleteResult> createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null, waitTimeoutS?: number | undefined | null, name?: string | undefined | null, train?: boolean | undefined | null): Promise<void> dropIndex(indexName: string): Promise<void> prewarmIndex(indexName: string): Promise<void> waitForIndex(indexNames: Array<string>, timeoutS: number): Promise<void> stats(): Promise<TableStatistics> initialStorageOptions(): Promise<Record<string, string> | null> latestStorageOptions(): Promise<Record<string, string> | null> update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<UpdateResult> query(): Query takeOffsets(offsets: Array<number>): TakeQuery takeRowIds(rowIds: Array<bigint>): TakeQuery vectorSearch(vector: Float32Array): VectorQuery addColumns(transforms: Array<AddColumnsSql>): Promise<AddColumnsResult> alterColumns(alterations: Array<ColumnAlteration>): Promise<AlterColumnsResult> dropColumns(columns: Array<string>): Promise<DropColumnsResult> version(): Promise<number> checkout(version: number): Promise<void> checkoutTag(tag: string): Promise<void> checkoutLatest(): Promise<void> listVersions(): Promise<Array<Version>> restore(): Promise<void> tags(): Promise<Tags> optimize(olderThanMs?: number | undefined | null, deleteUnverified?: boolean | undefined | null): Promise<OptimizeStats> listIndices(): Promise<Array<IndexConfig>> indexStats(indexName: string): Promise<IndexStatistics | null> mergeInsert(on: Array<string>): NativeMergeInsertBuilder usesV2ManifestPaths(): Promise<boolean> migrateManifestPathsV2(): Promise<void> } export class TagContents { version: number manifestSize: number } export class Tags { list(): Promise<Record<string, TagContents>> getVersion(tag: string): Promise<number> create(tag: string, version: number): Promise<void> delete(tag: string): Promise<void> update(tag: string, version: number): Promise<void> }