UNPKG

chromadb

Version:

A JavaScript interface for chroma

581 lines (519 loc) 15.8 kB
import { AdminClientArgs } from "./admin-client"; import { ChromaClientArgs } from "./chroma-client"; import { BaseRecordSet, IncludeEnum, Metadata, RecordSet, recordSetFields, Where, WhereDocument, } from "./types"; import { Include } from "./api"; import { ChromaValueError } from "./errors"; /** Default tenant name used when none is specified */ export const DEFAULT_TENANT = "default_tenant"; /** Default database name used when none is specified */ export const DEFAULT_DATABASE = "default_database"; /** Default configuration for AdminClient connections */ export const defaultAdminClientArgs: AdminClientArgs = { host: "localhost", port: 8000, ssl: false, }; /** Default configuration for ChromaClient connections */ export const defaultChromaClientArgs: ChromaClientArgs = { ...defaultAdminClientArgs, tenant: DEFAULT_TENANT, database: DEFAULT_DATABASE, }; /** * Supported HTTP methods for API requests. */ export type HttpMethod = | "GET" | "POST" | "PUT" | "DELETE" | "HEAD" | "CONNECT" | "OPTIONS" | "PATCH" | "TRACE" | undefined; /** * Normalizes HTTP method strings to standard uppercase format. * @param method - HTTP method string to normalize * @returns Normalized HttpMethod or undefined if invalid */ export const normalizeMethod = (method?: string): HttpMethod => { if (method) { switch (method.toUpperCase()) { case "GET": return "GET"; case "POST": return "POST"; case "PUT": return "PUT"; case "DELETE": return "DELETE"; case "HEAD": return "HEAD"; case "CONNECT": return "CONNECT"; case "OPTIONS": return "OPTIONS"; case "PATCH": return "PATCH"; case "TRACE": return "TRACE"; default: return undefined; } } return undefined; }; /** * Validates that all arrays in a RecordSet have consistent lengths. * @param recordSet - The record set to validate * @throws ChromaValueError if arrays have inconsistent lengths or are empty */ export const validateRecordSetLengthConsistency = (recordSet: RecordSet) => { const lengths: [string, number][] = Object.entries(recordSet) .filter( ([field, value]) => recordSetFields.includes(field) && value !== undefined, ) .map(([field, value]) => [field, value.length]); if (lengths.length === 0) { throw new ChromaValueError( `At least one of ${recordSetFields.join(", ")} must be provided`, ); } const zeroLength = lengths .filter(([_, length]) => length === 0) .map(([field, _]) => field); if (zeroLength.length > 0) { throw new ChromaValueError( `Non-empty lists are required for ${zeroLength.join(", ")}`, ); } if (new Set(lengths.map(([_, length]) => length)).size > 1) { throw new ChromaValueError( `Unequal lengths for fields ${lengths .map(([field, _]) => field) .join(", ")}`, ); } }; const validateEmbeddings = ({ embeddings, fieldName = "embeddings", }: { embeddings: number[][]; fieldName: string; }) => { if (!Array.isArray(embeddings)) { throw new ChromaValueError( `Expected '${fieldName}' to be an array, but got ${typeof embeddings}`, ); } if (embeddings.length === 0) { throw new ChromaValueError( "Expected embeddings to be an array with at least one item", ); } if (!embeddings.filter((e) => e.every((n: any) => typeof n === "number"))) { throw new ChromaValueError( "Expected each embedding to be an array of numbers", ); } embeddings.forEach((embedding, i) => { if (embedding.length === 0) { throw new ChromaValueError( `Expected each embedding to be a non-empty array of numbers, but got an empty array at index ${i}`, ); } }); }; const validateDocuments = ({ documents, nullable = false, fieldName = "documents", }: { documents: (string | null | undefined)[]; fieldName: string; nullable?: boolean; }) => { if (!Array.isArray(documents)) { throw new ChromaValueError( `Expected '${fieldName}' to be an array, but got ${typeof documents}`, ); } if (documents.length === 0) { throw new ChromaValueError( `Expected '${fieldName}' to be a non-empty list`, ); } documents.forEach((document) => { if (!nullable && typeof document !== "string" && !document) { throw new ChromaValueError( `Expected each document to be a string, but got ${typeof document}`, ); } }); }; /** * Validates an array of IDs for type correctness and uniqueness. * @param ids - Array of ID strings to validate * @throws ChromaValueError if IDs are not strings, empty, or contain duplicates */ export const validateIDs = (ids: string[]) => { if (!Array.isArray(ids)) { throw new ChromaValueError( `Expected 'ids' to be an array, but got ${typeof ids}`, ); } if (ids.length === 0) { throw new ChromaValueError("Expected 'ids' to be a non-empty list"); } const nonStrings = ids .map((id, i) => [id, i] as [any, number]) .filter(([id, _]) => typeof id !== "string") .map(([_, i]) => i); if (nonStrings.length > 0) { throw new ChromaValueError( `Found non-string IDs at ${nonStrings.join(", ")}`, ); } const seen = new Set(); const duplicates = ids.filter((id) => { if (seen.has(id)) { return id; } seen.add(id); }); let message = "Expected IDs to be unique, but found duplicates of"; if (duplicates.length > 0 && duplicates.length <= 5) { throw new ChromaValueError(`${message} ${duplicates.join(", ")}`); } if (duplicates.length > 0) { throw new ChromaValueError( `${message} ${duplicates.slice(0, 5).join(", ")}, ..., ${duplicates .slice(duplicates.length - 5) .join(", ")}`, ); } }; /** * Validates metadata object for correct types and non-emptiness. * @param metadata - Metadata object to validate * @throws ChromaValueError if metadata is invalid */ export const validateMetadata = (metadata?: Metadata) => { if (!metadata) { return; } if (Object.keys(metadata).length === 0) { throw new ChromaValueError("Expected metadata to be non-empty"); } if ( !Object.values(metadata).every( (v: any) => v === null || v === undefined || typeof v === "string" || typeof v === "number" || typeof v === "boolean", ) ) { throw new ChromaValueError( "Expected metadata to be a string, number, boolean, or nullable", ); } }; const validateMetadatas = (metadatas: Metadata[]) => { if (!Array.isArray(metadatas)) { throw new ChromaValueError( `Expected metadatas to be an array, but got ${typeof metadatas}`, ); } metadatas.forEach((metadata) => validateMetadata(metadata)); }; /** * Validates a base record set for required fields and data consistency. * @param options - Validation options * @param options.recordSet - The record set to validate * @param options.update - Whether this is for an update operation (relaxes requirements) * @param options.embeddingsField - Name of the embeddings field for error messages * @param options.documentsField - Name of the documents field for error messages * @throws ChromaValueError if validation fails */ export const validateBaseRecordSet = ({ recordSet, update = false, embeddingsField = "embeddings", documentsField = "documents", }: { recordSet: BaseRecordSet; update?: boolean; embeddingsField?: string; documentsField?: string; }) => { if (!recordSet.embeddings && !recordSet.documents && !update) { throw new ChromaValueError( `At least one of '${embeddingsField}' and '${documentsField}' must be provided`, ); } if (recordSet.embeddings) { validateEmbeddings({ embeddings: recordSet.embeddings, fieldName: embeddingsField, }); } if (recordSet.documents) { validateDocuments({ documents: recordSet.documents, fieldName: documentsField, }); } if (recordSet.metadatas) { validateMetadatas(recordSet.metadatas); } }; export const validateMaxBatchSize = ( recordSetLength: number, maxBatchSize: number, ) => { if (recordSetLength > maxBatchSize) { throw new ChromaValueError( `Record set length ${recordSetLength} exceeds max batch size ${maxBatchSize}`, ); } }; /** * Validates a where clause for metadata filtering. * @param where - Where clause object to validate * @throws ChromaValueError if the where clause is malformed */ export const validateWhere = (where: Where) => { if (typeof where !== "object") { throw new ChromaValueError("Expected where to be a non-empty object"); } if (Object.keys(where).length != 1) { throw new ChromaValueError( `Expected 'where' to have exactly one operator, but got ${ Object.keys(where).length }`, ); } Object.entries(where).forEach(([key, value]) => { if ( key !== "$and" && key !== "$or" && key !== "$in" && key !== "$nin" && !["string", "number", "boolean", "object"].includes(typeof value) ) { throw new ChromaValueError( `Expected 'where' value to be a string, number, boolean, or an operator expression, but got ${value}`, ); } if (key === "$and" || key === "$or") { if (Object.keys(value).length <= 1) { throw new ChromaValueError( `Expected 'where' value for $and or $or to be a list of 'where' expressions, but got ${value}`, ); } value.forEach((w: Where) => validateWhere(w)); return; } if (typeof value === "object") { if (Object.keys(value).length != 1) { throw new ChromaValueError( `Expected operator expression to have one operator, but got ${value}`, ); } const [operator, operand] = Object.entries(value)[0]; if ( ["$gt", "$gte", "$lt", "$lte"].includes(operator) && typeof operand !== "number" ) { throw new ChromaValueError( `Expected operand value to be a number for ${operator}, but got ${typeof operand}`, ); } if (["$in", "$nin"].includes(operator) && !Array.isArray(operand)) { throw new ChromaValueError( `Expected operand value to be an array for ${operator}, but got ${operand}`, ); } if ( !["$gt", "$gte", "$lt", "$lte", "$ne", "$eq", "$in", "$nin"].includes( operator, ) ) { throw new ChromaValueError( `Expected operator to be one of $gt, $gte, $lt, $lte, $ne, $eq, $in, $nin, but got ${operator}`, ); } if ( !["string", "number", "boolean"].includes(typeof operand) && !Array.isArray(operand) ) { throw new ChromaValueError( "Expected operand value to be a string, number, boolean, or a list of those types", ); } if ( Array.isArray(operand) && (operand.length === 0 || !operand.every((item) => typeof item === typeof operand[0])) ) { throw new ChromaValueError( "Expected 'where' operand value to be a non-empty list and all values to be of the same type", ); } } }); }; /** * Validates a where document clause for document content filtering. * @param whereDocument - Where document clause to validate * @throws ChromaValueError if the clause is malformed */ export const validateWhereDocument = (whereDocument: WhereDocument) => { if (typeof whereDocument !== "object") { throw new ChromaValueError( "Expected 'whereDocument' to be a non-empty object", ); } if (Object.keys(whereDocument).length != 1) { throw new ChromaValueError( `Expected 'whereDocument' to have exactly one operator, but got ${whereDocument}`, ); } const [operator, operand] = Object.entries(whereDocument)[0]; if ( ![ "$contains", "$not_contains", "$matches", "$not_matches", "$regex", "$not_regex", "$and", "$or", ].includes(operator) ) { throw new ChromaValueError( `Expected 'whereDocument' operator to be one of $contains, $not_contains, $matches, $not_matches, $regex, $not_regex, $and, or $or, but got ${operator}`, ); } if (operator === "$and" || operator === "$or") { if (!Array.isArray(operand)) { throw new ChromaValueError( `Expected operand for ${operator} to be a list of 'whereDocument' expressions, but got ${operand}`, ); } if (operand.length <= 1) { throw new ChromaValueError( `Expected 'whereDocument' operand for ${operator} to be a list with at least two 'whereDocument' expressions`, ); } operand.forEach((item) => validateWhereDocument(item)); } if ( (operand === "$contains" || operand === "$not_contains" || operand === "$regex" || operand === "$not_regex") && (typeof (operator as any) !== "string" || operator.length === 0) ) { throw new ChromaValueError( `Expected operand for ${operator} to be a non empty string, but got ${operand}`, ); } }; /** * Validates include fields for query operations. * @param options - Validation options * @param options.include - Array of fields to include in results * @param options.exclude - Optional array of fields that should not be included * @throws ChromaValueError if include fields are invalid */ export const validateInclude = ({ include, exclude, }: { include: Include[]; exclude?: Include[]; }) => { if (!Array.isArray(include)) { throw new ChromaValueError("Expected 'include' to be a non-empty array"); } const validValues = Object.keys(IncludeEnum); include.forEach((item) => { if (typeof (item as any) !== "string") { throw new ChromaValueError("Expected 'include' items to be strings"); } if (!validValues.includes(item)) { throw new ChromaValueError( `Expected 'include' items to be one of ${validValues.join( ", ", )}, but got ${item}`, ); } if (exclude?.includes(item)) { throw new ChromaValueError(`${item} is not allowed for this operation`); } }); }; /** * Validates the number of results parameter for queries. * @param nResults - Number of results to validate * @throws ChromaValueError if nResults is not a positive number */ export const validateNResults = (nResults: number) => { if (typeof (nResults as any) !== "number") { throw new ChromaValueError( `Expected 'nResults' to be a number, but got ${typeof nResults}`, ); } if (nResults <= 0) { throw new ChromaValueError("Number of requested results has to positive"); } }; export const parseConnectionPath = (path: string) => { try { const url = new URL(path); const ssl = url.protocol === "https:"; const host = url.hostname; const port = url.port; return { ssl, host, port: Number(port), }; } catch { throw new ChromaValueError(`Invalid URL: ${path}`); } }; const packEmbedding = (embedding: number[]): ArrayBuffer => { const buffer = new ArrayBuffer(embedding.length * 4); const view = new Float32Array(buffer); for (let i = 0; i < embedding.length; i++) { view[i] = embedding[i]; } return buffer; }; export const embeddingsToBase64Bytes = (embeddings: number[][]) => { return embeddings.map((embedding) => { const buffer = packEmbedding(embedding); const uint8Array = new Uint8Array(buffer); const binaryString = Array.from(uint8Array, (byte) => String.fromCharCode(byte), ).join(""); return btoa(binaryString); }); };