UNPKG

@lancedb/lancedb

Version:

LanceDB: A serverless, low-latency vector database for AI applications

166 lines (165 loc) 5.64 kB
"use strict"; // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors Object.defineProperty(exports, "__esModule", { value: true }); exports.EmbeddingFunctionRegistry = void 0; exports.register = register; exports.getRegistry = getRegistry; require("reflect-metadata"); /** * This is a singleton class used to register embedding functions * and fetch them by name. It also handles serializing and deserializing. * You can implement your own embedding function by subclassing EmbeddingFunction * or TextEmbeddingFunction and registering it with the registry */ class EmbeddingFunctionRegistry { #functions = new Map(); #variables = new Map(); /** * Get the number of registered functions */ length() { return this.#functions.size; } /** * Register an embedding function * @throws Error if the function is already registered */ register(alias) { const self = this; return function (ctor) { if (!alias) { alias = ctor.name; } if (self.#functions.has(alias)) { throw new Error(`Embedding function with alias "${alias}" already exists`); } self.#functions.set(alias, ctor); Reflect.defineMetadata("lancedb::embedding::name", alias, ctor); return ctor; }; } /** * Fetch an embedding function by name * @param name The name of the function */ get(name) { const factory = this.#functions.get(name); if (!factory) { // biome-ignore lint/suspicious/noExplicitAny: <explanation> return undefined; } // biome-ignore lint/suspicious/noExplicitAny: <explanation> let create; if (factory.prototype.init) { // biome-ignore lint/suspicious/noExplicitAny: <explanation> create = async function (options) { const instance = new factory(options); await instance.init(); return instance; }; } else { // biome-ignore lint/suspicious/noExplicitAny: <explanation> create = (options) => new factory(options); } return { create, }; } /** * reset the registry to the initial state */ reset() { this.#functions.clear(); } /** * @ignore */ async parseFunctions(metadata) { if (!metadata.has("embedding_functions")) { return new Map(); } else { const functions = (JSON.parse(metadata.get("embedding_functions"))); const items = await Promise.all(functions.map(async (f) => { const fn = this.get(f.name); if (!fn) { throw new Error(`Function "${f.name}" not found in registry`); } const func = await this.get(f.name).create(f.model); return [ f.name, { sourceColumn: f.sourceColumn, vectorColumn: f.vectorColumn, function: func, }, ]; })); return new Map(items); } } // biome-ignore lint/suspicious/noExplicitAny: <explanation> functionToMetadata(conf) { // biome-ignore lint/suspicious/noExplicitAny: <explanation> const metadata = {}; const name = Reflect.getMetadata("lancedb::embedding::name", conf.function.constructor); metadata["sourceColumn"] = conf.sourceColumn; metadata["vectorColumn"] = conf.vectorColumn ?? "vector"; metadata["name"] = name ?? conf.function.constructor.name; metadata["model"] = conf.function.toJSON(); return metadata; } getTableMetadata(functions) { const metadata = new Map(); const jsonData = functions.map((conf) => this.functionToMetadata(conf)); metadata.set("embedding_functions", JSON.stringify(jsonData)); return metadata; } /** * Set a variable. These can be accessed in the embedding function * configuration using the syntax `$var:variable_name`. If they are not * set, an error will be thrown letting you know which key is unset. If you * want to supply a default value, you can add an additional part in the * configuration like so: `$var:variable_name:default_value`. Default values * can be used for runtime configurations that are not sensitive, such as * whether to use a GPU for inference. * * The name must not contain colons. The default value can contain colons. * * @param name * @param value */ setVar(name, value) { if (name.includes(":")) { throw new Error("Variable names cannot contain colons"); } this.#variables.set(name, value); } /** * Get a variable. * @param name * @returns * @see {@link setVar} */ getVar(name) { return this.#variables.get(name); } } exports.EmbeddingFunctionRegistry = EmbeddingFunctionRegistry; const _REGISTRY = new EmbeddingFunctionRegistry(); function register(name) { return _REGISTRY.register(name); } /** * Utility function to get the global instance of the registry * @returns `EmbeddingFunctionRegistry` The global instance of the registry * @example * ```ts * const registry = getRegistry(); * const openai = registry.get("openai").create(); */ function getRegistry() { return _REGISTRY; }