@lancedb/lancedb

Version:

LanceDB: A serverless, low-latency vector database for AI applications

37 lines (36 loc) • 1.37 kB

TypeScript

import { Float } from "../arrow"; import { EmbeddingFunction } from "./embedding_function"; export type XenovaTransformerOptions = { /** The wasm compatible model to use */ model: string; /** * The wasm compatible tokenizer to use * If not provided, it will use the default tokenizer for the model */ tokenizer?: string; /** * The number of dimensions of the embeddings * * We will attempt to infer this from the model config if not provided. * Since there isn't a standard way to get this information from the model, * you may need to manually specify this if using a model that doesn't have a 'hidden_size' in the config. * */ ndims?: number; /** Options for the tokenizer */ tokenizerOptions?: { textPair?: string | string[]; padding?: boolean | "max_length"; addSpecialTokens?: boolean; truncation?: boolean; maxLength?: number; }; }; export declare class TransformersEmbeddingFunction extends EmbeddingFunction<string, Partial<XenovaTransformerOptions>> { #private; constructor(optionsRaw?: Partial<XenovaTransformerOptions>); init(): Promise<void>; ndims(): number; embeddingDataType(): Float; computeSourceEmbeddings(data: string[]): Promise<number[][]>; computeQueryEmbeddings(data: string): Promise<number[]>; }