@lancedb/lancedb
Version:
LanceDB: A serverless, low-latency vector database for AI applications
322 lines (321 loc) • 11.7 kB
JavaScript
"use strict";
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
Object.defineProperty(exports, "__esModule", { value: true });
exports.LocalTable = exports.Table = void 0;
const arrow_1 = require("./arrow");
const registry_1 = require("./embedding/registry");
const merge_1 = require("./merge");
const query_1 = require("./query");
const sanitize_1 = require("./sanitize");
const util_1 = require("./util");
/**
* A Table is a collection of Records in a LanceDB Database.
*
* A Table object is expected to be long lived and reused for multiple operations.
* Table objects will cache a certain amount of index data in memory. This cache
* will be freed when the Table is garbage collected. To eagerly free the cache you
* can call the `close` method. Once the Table is closed, it cannot be used for any
* further operations.
*
* Tables are created using the methods {@link Connection#createTable}
* and {@link Connection#createEmptyTable}. Existing tables are opened
* using {@link Connection#openTable}.
*
* Closing a table is optional. It not closed, it will be closed when it is garbage
* collected.
*
* @hideconstructor
*/
class Table {
[Symbol.for("nodejs.util.inspect.custom")]() {
return this.display();
}
}
exports.Table = Table;
class LocalTable extends Table {
inner;
constructor(inner) {
super();
this.inner = inner;
}
get name() {
return this.inner.name;
}
isOpen() {
return this.inner.isOpen();
}
close() {
this.inner.close();
}
display() {
return this.inner.display();
}
async getEmbeddingFunctions() {
const schema = await this.schema();
const registry = (0, registry_1.getRegistry)();
return registry.parseFunctions(schema.metadata);
}
/** Get the schema of the table. */
async schema() {
const schemaBuf = await this.inner.schema();
const tbl = (0, arrow_1.tableFromIPC)(schemaBuf);
return tbl.schema;
}
async add(data, options) {
const mode = options?.mode ?? "append";
const schema = await this.schema();
const buffer = await (0, arrow_1.fromDataToBuffer)(data, undefined, schema);
return await this.inner.add(buffer, mode);
}
async update(optsOrUpdates, options) {
const isValues = "values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
const isValuesSql = "valuesSql" in optsOrUpdates &&
typeof optsOrUpdates.valuesSql !== "string";
const isMap = (obj) => {
return obj instanceof Map;
};
let predicate;
let columns;
switch (true) {
case isMap(optsOrUpdates):
columns = Array.from(optsOrUpdates.entries());
predicate = options?.where;
break;
case isValues && isMap(optsOrUpdates.values):
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
k,
(0, util_1.toSQL)(v),
]);
predicate = optsOrUpdates.where;
break;
case isValues && !isMap(optsOrUpdates.values):
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
k,
(0, util_1.toSQL)(v),
]);
predicate = optsOrUpdates.where;
break;
case isValuesSql && isMap(optsOrUpdates.valuesSql):
columns = Array.from(optsOrUpdates.valuesSql.entries());
predicate = optsOrUpdates.where;
break;
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
k,
v,
]);
predicate = optsOrUpdates.where;
break;
default:
columns = Object.entries(optsOrUpdates);
predicate = options?.where;
}
return await this.inner.update(predicate, columns);
}
async countRows(filter) {
return await this.inner.countRows(filter);
}
async delete(predicate) {
return await this.inner.delete(predicate);
}
async createIndex(column, options) {
// Bit of a hack to get around the fact that TS has no package-scope.
// biome-ignore lint/suspicious/noExplicitAny: skip
const nativeIndex = options?.config?.inner;
await this.inner.createIndex(nativeIndex, column, options?.replace, options?.waitTimeoutSeconds, options?.name, options?.train);
}
async dropIndex(name) {
await this.inner.dropIndex(name);
}
async prewarmIndex(name) {
await this.inner.prewarmIndex(name);
}
async waitForIndex(indexNames, timeoutSeconds) {
await this.inner.waitForIndex(indexNames, timeoutSeconds);
}
takeOffsets(offsets) {
return new query_1.TakeQuery(this.inner.takeOffsets(offsets));
}
takeRowIds(rowIds) {
const ids = rowIds.map((id) => {
if (typeof id === "bigint") {
return id;
}
if (!Number.isInteger(id)) {
throw new Error("Row id must be an integer (or bigint)");
}
if (id < 0) {
throw new Error("Row id cannot be negative");
}
if (!Number.isSafeInteger(id)) {
throw new Error("Row id is too large for number; use bigint instead");
}
return BigInt(id);
});
return new query_1.TakeQuery(this.inner.takeRowIds(ids));
}
query() {
return new query_1.Query(this.inner);
}
search(query, queryType = "auto", ftsColumns) {
if (typeof query !== "string" && !(0, query_1.instanceOfFullTextQuery)(query)) {
if (queryType === "fts") {
throw new Error("Cannot perform full text search on a vector query");
}
return this.vectorSearch(query);
}
// If the query is a string, we need to determine if it is a vector query or a full text search query
if (queryType === "fts") {
return this.query().fullTextSearch(query, {
columns: ftsColumns,
});
}
// The query type is auto or vector
// fall back to full text search if no embedding functions are defined and the query is a string
if (queryType === "auto" &&
((0, registry_1.getRegistry)().length() === 0 || (0, query_1.instanceOfFullTextQuery)(query))) {
return this.query().fullTextSearch(query, {
columns: ftsColumns,
});
}
const queryPromise = this.getEmbeddingFunctions().then(async (functions) => {
// TODO: Support multiple embedding functions
const embeddingFunc = functions
.values()
.next().value;
if (!embeddingFunc) {
return Promise.reject(new Error("No embedding functions are defined in the table"));
}
return await embeddingFunc.function.computeQueryEmbeddings(query);
});
return this.query().nearestTo(queryPromise);
}
vectorSearch(vector) {
if ((0, arrow_1.isMultiVector)(vector)) {
const query = this.query().nearestTo(vector[0]);
for (const v of vector.slice(1)) {
query.addQueryVector(v);
}
return query;
}
return this.query().nearestTo(vector);
}
// TODO: Support BatchUDF
async addColumns(newColumnTransforms) {
return await this.inner.addColumns(newColumnTransforms);
}
async alterColumns(columnAlterations) {
const processedAlterations = columnAlterations.map((alteration) => {
if (typeof alteration.dataType === "string") {
return {
...alteration,
dataType: JSON.stringify({ type: alteration.dataType }),
};
}
else if (alteration.dataType === undefined) {
return {
...alteration,
dataType: undefined,
};
}
else {
const dataType = (0, sanitize_1.sanitizeType)(alteration.dataType);
return {
...alteration,
dataType: JSON.stringify((0, arrow_1.dataTypeToJson)(dataType)),
};
}
});
return await this.inner.alterColumns(processedAlterations);
}
async dropColumns(columnNames) {
return await this.inner.dropColumns(columnNames);
}
async version() {
return await this.inner.version();
}
async checkout(version) {
if (typeof version === "string") {
return this.inner.checkoutTag(version);
}
return this.inner.checkout(version);
}
async checkoutLatest() {
await this.inner.checkoutLatest();
}
async listVersions() {
return (await this.inner.listVersions()).map((version) => ({
version: version.version,
timestamp: new Date(version.timestamp / 1000),
metadata: version.metadata,
}));
}
async restore() {
await this.inner.restore();
}
async tags() {
return await this.inner.tags();
}
async optimize(options) {
let cleanupOlderThanMs;
if (options?.cleanupOlderThan !== undefined &&
options?.cleanupOlderThan !== null) {
cleanupOlderThanMs =
new Date().getTime() - options.cleanupOlderThan.getTime();
}
return await this.inner.optimize(cleanupOlderThanMs, options?.deleteUnverified);
}
async listIndices() {
return await this.inner.listIndices();
}
async toArrow() {
return await this.query().toArrow();
}
async indexStats(name) {
const stats = await this.inner.indexStats(name);
if (stats === null) {
return undefined;
}
return stats;
}
async stats() {
return await this.inner.stats();
}
async initialStorageOptions() {
return await this.inner.initialStorageOptions();
}
async latestStorageOptions() {
return await this.inner.latestStorageOptions();
}
mergeInsert(on) {
on = Array.isArray(on) ? on : [on];
return new merge_1.MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
}
/**
* Check if the table uses the new manifest path scheme.
*
* This function will return true if the table uses the V2 manifest
* path scheme.
*/
async usesV2ManifestPaths() {
return await this.inner.usesV2ManifestPaths();
}
/**
* Migrate the table to use the new manifest path scheme.
*
* This function will rename all V1 manifests to V2 manifest paths.
* These paths provide more efficient opening of datasets with many versions
* on object stores.
*
* This function is idempotent, and can be run multiple times without
* changing the state of the object store.
*
* However, it should not be run while other concurrent operations are happening.
* And it should also run until completion before resuming other operations.
*/
async migrateManifestPathsV2() {
await this.inner.migrateManifestPathsV2();
}
}
exports.LocalTable = LocalTable;