chromadb
Version:
A JavaScript interface for chroma
444 lines (400 loc) • 14.3 kB
text/typescript
import { EmbeddingFunctionConfiguration, SparseVector } from "./api";
import { ChromaValueError } from "./errors";
import { DefaultEmbeddingFunction } from "@chroma-core/default-embed";
import { ChromaClient } from "./chroma-client";
/**
* Supported vector space types.
*/
export type EmbeddingFunctionSpace = "cosine" | "l2" | "ip";
/**
* Interface for embedding functions.
* Embedding functions transform text documents into numerical representations
* that can be used for similarity search and other vector operations.
*/
export interface EmbeddingFunction {
/**
* Generates embeddings for the given texts.
* @param texts - Array of text strings to embed
* @returns Promise resolving to array of embedding vectors
*/
generate(texts: string[]): Promise<number[][]>;
/**
* Generates embeddings specifically for query texts.
* The client will fall back to using the implementation of `generate`
* if this function is not provided.
* @param texts - Array of query text strings to embed
* @returns Promise resolving to array of embedding vectors
*/
generateForQueries?(texts: string[]): Promise<number[][]>;
/** Optional name identifier for the embedding function */
name?: string;
/** Returns the default vector space for this embedding function */
defaultSpace?(): EmbeddingFunctionSpace;
/** Returns all supported vector spaces for this embedding function */
supportedSpaces?(): EmbeddingFunctionSpace[];
/** Creates an instance from configuration object */
buildFromConfig?(
config: Record<string, any>,
client?: ChromaClient,
): EmbeddingFunction;
/** Returns the current configuration as an object */
getConfig?(): Record<string, any>;
/**
* Validates that a configuration update is allowed.
* @param newConfig - New configuration to validate
*/
validateConfigUpdate?(newConfig: Record<string, any>): void;
/**
* Validates that a configuration object is valid.
* @param config - Configuration to validate
*/
validateConfig?(config: Record<string, any>): void;
}
/**
* Interface for sparse embedding functions.
* Sparse embedding functions transform text documents into sparse numerical representations
* where only non-zero values are stored, making them efficient for high-dimensional spaces.
*/
export interface SparseEmbeddingFunction {
/**
* Generates sparse embeddings for the given texts.
* @param texts - Array of text strings to embed
* @returns Promise resolving to array of sparse vectors
*/
generate(texts: string[]): Promise<SparseVector[]>;
/**
* Generates sparse embeddings specifically for query texts.
* The client will fall back to using the implementation of `generate`
* if this function is not provided.
* @param texts - Array of query text strings to embed
* @returns Promise resolving to array of sparse vectors
*/
generateForQueries?(texts: string[]): Promise<SparseVector[]>;
/** Optional name identifier for the embedding function */
name?: string;
/** Creates an instance from configuration object */
buildFromConfig?(
config: Record<string, any>,
client?: ChromaClient,
): SparseEmbeddingFunction;
/** Returns the current configuration as an object */
getConfig?(): Record<string, any>;
/**
* Validates that a configuration update is allowed.
* @param newConfig - New configuration to validate
*/
validateConfigUpdate?(newConfig: Record<string, any>): void;
/**
* Validates that a configuration object is valid.
* @param config - Configuration to validate
*/
validateConfig?(config: Record<string, any>): void;
}
/**
* Interface for embedding function constructor classes.
* Used for registering and instantiating embedding functions.
*/
export interface EmbeddingFunctionClass {
/** Constructor for creating new instances */
new (...args: any[]): EmbeddingFunction;
/** Name identifier for the embedding function */
name: string;
/** Static method to build instance from configuration */
buildFromConfig(
config: Record<string, any>,
client?: ChromaClient,
): EmbeddingFunction;
}
/**
* Interface for sparse embedding function constructor classes.
* Used for registering and instantiating sparse embedding functions.
*/
export interface SparseEmbeddingFunctionClass {
/** Constructor for creating new instances */
new (...args: any[]): SparseEmbeddingFunction;
/** Name identifier for the embedding function */
name: string;
/** Static method to build instance from configuration */
buildFromConfig(
config: Record<string, any>,
client?: ChromaClient,
): SparseEmbeddingFunction;
}
/**
* Registry of available embedding functions.
* Maps function names to their constructor classes.
*/
export const knownEmbeddingFunctions = new Map<
string,
EmbeddingFunctionClass
>();
const pythonEmbeddingFunctions: Record<string, string> = {
onnx_mini_lm_l6_v2: "default-embed",
default: "default-embed",
together_ai: "together-ai",
sentence_transformer: "sentence-transformer",
};
const unsupportedEmbeddingFunctions: Set<string> = new Set([
"amazon_bedrock",
"baseten",
"langchain",
"google_palm",
"huggingface",
"instructor",
"open_clip",
"roboflow",
"text2vec",
]);
const chromaCloudEmbeddingFunctions: Set<string> = new Set([
"chroma-cloud-splade",
"chroma-cloud-qwen",
]);
/**
* Registry of available sparse embedding functions.
* Maps function names to their constructor classes.
*/
export const knownSparseEmbeddingFunctions = new Map<
string,
SparseEmbeddingFunctionClass
>();
const pythonSparseEmbeddingFunctions: Record<string, string> = {
chroma_bm25: "chroma-bm25",
};
const unsupportedSparseEmbeddingFunctions: Set<string> = new Set([
"bm25",
"fastembed_sparse",
"huggingface_sparse",
]);
/**
* Union type covering both dense and sparse embedding functions.
*/
export type AnyEmbeddingFunction = EmbeddingFunction | SparseEmbeddingFunction;
/**
* Registers an embedding function in the global registry.
* @param name - Unique name for the embedding function
* @param fn - Embedding function class to register
* @throws ChromaValueError if name is already registered
*/
export const registerEmbeddingFunction = (
name: string,
fn: EmbeddingFunctionClass,
) => {
if (knownEmbeddingFunctions.has(name)) {
throw new ChromaValueError(
`Embedding function with name ${name} is already registered.`,
);
}
knownEmbeddingFunctions.set(name, fn);
};
/**
* Registers a sparse embedding function in the global registry.
* @param name - Unique name for the sparse embedding function
* @param fn - Sparse embedding function class to register
* @throws ChromaValueError if name is already registered
*/
export const registerSparseEmbeddingFunction = (
name: string,
fn: SparseEmbeddingFunctionClass,
) => {
if (knownSparseEmbeddingFunctions.has(name)) {
throw new ChromaValueError(
`Sparse embedding function with name ${name} is already registered.`,
);
}
knownSparseEmbeddingFunctions.set(name, fn);
};
/**
* Retrieves and instantiates an embedding function from configuration.
* @returns EmbeddingFunction instance or undefined if it cannot be constructed
*/
export const getEmbeddingFunction = async (args: {
collectionName: string;
client: ChromaClient;
efConfig?: EmbeddingFunctionConfiguration;
}) => {
const { collectionName, client, efConfig } = args;
if (!efConfig) {
console.warn(
`No embedding function configuration found for collection ${collectionName}. 'add' and 'query' will fail unless you provide them embeddings directly.`,
);
return undefined;
}
if (efConfig.type === "legacy") {
console.warn(
`No embedding function configuration found for collection ${collectionName}. 'add' and 'query' will fail unless you provide them embeddings directly.`,
);
return undefined;
}
if (efConfig.type === "unknown") {
console.warn(
`Unknown embedding function configuration for collection ${collectionName}. 'add' and 'query' will fail unless you provide them embeddings directly.`,
);
return undefined;
}
if (efConfig.type !== "known") {
return undefined;
}
if (unsupportedEmbeddingFunctions.has(efConfig.name)) {
console.warn(
`Embedding function ${efConfig.name} is not supported in the JS/TS SDK. 'add' and 'query' will fail unless you provide them embeddings directly.`,
);
return undefined;
}
const packageName = pythonEmbeddingFunctions[efConfig.name] || efConfig.name;
if (packageName === "default-embed") {
await getDefaultEFConfig();
}
let embeddingFunction = knownEmbeddingFunctions.get(packageName);
if (!embeddingFunction) {
try {
const fullPackageName = `@chroma-core/${packageName}`;
await import(fullPackageName);
embeddingFunction = knownEmbeddingFunctions.get(packageName);
} catch (error) {
// Dynamic loading failed, proceed with warning
}
if (!embeddingFunction) {
console.warn(
`Collection ${collectionName} was created with the ${packageName} embedding function. However, the @chroma-core/${packageName} package is not installed. 'add' and 'query' will fail unless you provide them embeddings directly, or install the @chroma-core/${packageName} package.`,
);
return undefined;
}
}
let constructorConfig: Record<string, any> =
efConfig.type === "known" ? (efConfig.config as Record<string, any>) : {};
try {
if (embeddingFunction.buildFromConfig) {
return embeddingFunction.buildFromConfig(constructorConfig, client);
}
console.warn(
`Embedding function ${packageName} does not define a 'buildFromConfig' function. 'add' and 'query' will fail unless you provide them embeddings directly.`,
);
return undefined;
} catch (e) {
console.warn(
`Embedding function ${packageName} failed to build with config: ${constructorConfig}. 'add' and 'query' will fail unless you provide them embeddings directly. Error: ${e}`,
);
return undefined;
}
};
/**
* Retrieves and instantiates a sparse embedding function from configuration.
* @returns SparseEmbeddingFunction instance or undefined if it cannot be constructed
*/
export const getSparseEmbeddingFunction = async (
collectionName: string,
client: ChromaClient,
efConfig?: EmbeddingFunctionConfiguration,
) => {
if (!efConfig) {
return undefined;
}
if (efConfig.type === "legacy") {
return undefined;
}
if (efConfig.type !== "known") {
return undefined;
}
if (unsupportedSparseEmbeddingFunctions.has(efConfig.name)) {
console.warn(
"Embedding function ${efConfig.name} is not supported in the JS/TS SDK. 'add' and 'query' will fail unless you provide them embeddings directly.",
);
return undefined;
}
const packageName =
pythonSparseEmbeddingFunctions[efConfig.name] || efConfig.name;
let sparseEmbeddingFunction = knownSparseEmbeddingFunctions.get(packageName);
if (!sparseEmbeddingFunction) {
try {
const fullPackageName = `@chroma-core/${packageName}`;
await import(fullPackageName);
sparseEmbeddingFunction = knownSparseEmbeddingFunctions.get(packageName);
} catch (error) {
// Dynamic loading failed, proceed with warning
}
if (!sparseEmbeddingFunction) {
console.warn(
`Collection ${collectionName} was created with the ${packageName} sparse embedding function. However, the @chroma-core/${packageName} package is not installed.`,
);
return undefined;
}
}
let constructorConfig: Record<string, any> =
efConfig.type === "known" ? (efConfig.config as Record<string, any>) : {};
try {
if (sparseEmbeddingFunction.buildFromConfig) {
return sparseEmbeddingFunction.buildFromConfig(constructorConfig, client);
}
console.warn(
`Sparse embedding function ${packageName} does not define a 'buildFromConfig' function.`,
);
return undefined;
} catch (e) {
console.warn(
`Sparse embedding function ${packageName} failed to build with config: ${constructorConfig}. Error: ${e}`,
);
return undefined;
}
};
/**
* Serializes an embedding function to configuration format.
* @param embeddingFunction - User provided embedding function
* @param configEmbeddingFunction - Collection config embedding function
* @returns Configuration object that can recreate the function
*/
export const serializeEmbeddingFunction = ({
embeddingFunction,
configEmbeddingFunction,
}: {
embeddingFunction?: EmbeddingFunction;
configEmbeddingFunction?: EmbeddingFunction;
}): EmbeddingFunctionConfiguration | undefined => {
if (embeddingFunction && configEmbeddingFunction) {
throw new ChromaValueError(
"Embedding function provided when already defined in the collection configuration",
);
}
if (!embeddingFunction && !configEmbeddingFunction) {
return undefined;
}
const ef = embeddingFunction || configEmbeddingFunction!;
if (
!ef.getConfig ||
!ef.name ||
!(ef.constructor as EmbeddingFunctionClass).buildFromConfig
) {
return { type: "legacy" };
}
if (ef.validateConfig) ef.validateConfig(ef.getConfig());
return {
name: ef.name,
type: "known",
config: ef.getConfig(),
};
};
/**
* Gets the configuration for the default embedding function.
* Dynamically imports and registers the default embedding function if needed.
* @returns Promise resolving to default embedding function configuration
* @throws Error if default embedding function cannot be loaded
*/
export const getDefaultEFConfig =
async (): Promise<EmbeddingFunctionConfiguration> => {
try {
const { DefaultEmbeddingFunction } = await import(
"@chroma-core/default-embed"
);
if (!knownEmbeddingFunctions.has("default-embed")) {
registerEmbeddingFunction("default-embed", DefaultEmbeddingFunction);
}
} catch (e) {
console.warn(
"Cannot instantiate a collection with the DefaultEmbeddingFunction. Please install @chroma-core/default-embed, or provide a different embedding function",
);
}
return {
name: "default",
type: "known",
config: {},
};
};