UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

109 lines 4.2 kB
import { getModelProvider } from '../model_providers/model_provider.js'; import { findModel } from '../data/model_data.js'; import { createTraceContext } from '../utils/trace_context.js'; import { randomUUID } from 'crypto'; const EMBEDDING_TTL_MS = 1000 * 60 * 60; const EMBEDDING_CACHE_MAX = 1000; const embeddingCache = new Map(); export async function ensembleEmbed(text, agent, options) { const trace = createTraceContext(agent, 'embedding'); const requestId = randomUUID(); let requestStarted = false; let turnStatus = 'completed'; let requestStatus = 'completed'; let requestError; let requestMetadata = {}; await trace.emitTurnStart({ input_text: text, options, }); await trace.emitRequestStart(requestId, { agent_id: agent.agent_id, provider: agent.model ? findModel(agent.model)?.provider : undefined, model: agent.model || 'text-embedding-3-small', payload: { text, options, }, }); requestStarted = true; try { const dimensions = options?.dimensions || 1536; const cacheKey = `${agent.model || agent.modelClass}:${text}:${dimensions}`; const cached = embeddingCache.get(cacheKey); if (cached) { if (Date.now() - cached.timestamp.getTime() < EMBEDDING_TTL_MS) { requestMetadata = { ...requestMetadata, from_cache: true, dimensions: cached.embedding.length, }; return cached.embedding; } embeddingCache.delete(cacheKey); } const model = agent.model || 'text-embedding-3-small'; const provider = getModelProvider(model); if (!provider.createEmbedding) { throw new Error(`Provider for model ${model} does not support embeddings`); } const modelInfo = findModel(model); const inputTokenLimit = modelInfo?.features?.input_token_limit; const MAX_CHARS_PER_CHUNK = inputTokenLimit ? inputTokenLimit * 4 * 0.9 : Infinity; const needsChunking = inputTokenLimit && text.length > MAX_CHARS_PER_CHUNK; let embedding; if (needsChunking) { const chunks = []; for (let i = 0; i < text.length; i += MAX_CHARS_PER_CHUNK) { chunks.push(text.slice(i, i + MAX_CHARS_PER_CHUNK)); } const result = await provider.createEmbedding(chunks, model, agent, { ...options, dimensions }); const embeddings = result; embedding = new Array(dimensions).fill(0); for (const vec of embeddings) { for (let i = 0; i < dimensions; i++) { embedding[i] += vec[i] / embeddings.length; } } } else { const result = await provider.createEmbedding(text, model, agent, { ...options, dimensions }); embedding = Array.isArray(result[0]) ? result[0] : result; } if (embeddingCache.size >= EMBEDDING_CACHE_MAX) { const oldestKey = embeddingCache.keys().next().value; if (oldestKey) embeddingCache.delete(oldestKey); } embeddingCache.set(cacheKey, { embedding, timestamp: new Date(), }); requestMetadata = { ...requestMetadata, from_cache: false, dimensions: embedding.length, chunked: !!needsChunking, }; return embedding; } catch (error) { turnStatus = 'error'; requestStatus = 'error'; requestError = error instanceof Error ? error.message : String(error); throw error; } finally { if (requestStarted) { await trace.emitRequestEnd(requestId, { status: requestStatus, error: requestError, ...requestMetadata, }); } await trace.emitTurnEnd(turnStatus, turnStatus === 'completed' ? 'completed' : 'exception', { error: requestError, }); } } //# sourceMappingURL=ensemble_embed.js.map