UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

112 lines 4.42 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ensembleEmbed = ensembleEmbed; const model_provider_js_1 = require("../model_providers/model_provider.cjs"); const model_data_js_1 = require("../data/model_data.cjs"); const trace_context_js_1 = require("../utils/trace_context.cjs"); const crypto_1 = require("crypto"); const EMBEDDING_TTL_MS = 1000 * 60 * 60; const EMBEDDING_CACHE_MAX = 1000; const embeddingCache = new Map(); async function ensembleEmbed(text, agent, options) { const trace = (0, trace_context_js_1.createTraceContext)(agent, 'embedding'); const requestId = (0, crypto_1.randomUUID)(); let requestStarted = false; let turnStatus = 'completed'; let requestStatus = 'completed'; let requestError; let requestMetadata = {}; await trace.emitTurnStart({ input_text: text, options, }); await trace.emitRequestStart(requestId, { agent_id: agent.agent_id, provider: agent.model ? (0, model_data_js_1.findModel)(agent.model)?.provider : undefined, model: agent.model || 'text-embedding-3-small', payload: { text, options, }, }); requestStarted = true; try { const dimensions = options?.dimensions || 1536; const cacheKey = `${agent.model || agent.modelClass}:${text}:${dimensions}`; const cached = embeddingCache.get(cacheKey); if (cached) { if (Date.now() - cached.timestamp.getTime() < EMBEDDING_TTL_MS) { requestMetadata = { ...requestMetadata, from_cache: true, dimensions: cached.embedding.length, }; return cached.embedding; } embeddingCache.delete(cacheKey); } const model = agent.model || 'text-embedding-3-small'; const provider = (0, model_provider_js_1.getModelProvider)(model); if (!provider.createEmbedding) { throw new Error(`Provider for model ${model} does not support embeddings`); } const modelInfo = (0, model_data_js_1.findModel)(model); const inputTokenLimit = modelInfo?.features?.input_token_limit; const MAX_CHARS_PER_CHUNK = inputTokenLimit ? inputTokenLimit * 4 * 0.9 : Infinity; const needsChunking = inputTokenLimit && text.length > MAX_CHARS_PER_CHUNK; let embedding; if (needsChunking) { const chunks = []; for (let i = 0; i < text.length; i += MAX_CHARS_PER_CHUNK) { chunks.push(text.slice(i, i + MAX_CHARS_PER_CHUNK)); } const result = await provider.createEmbedding(chunks, model, agent, { ...options, dimensions }); const embeddings = result; embedding = new Array(dimensions).fill(0); for (const vec of embeddings) { for (let i = 0; i < dimensions; i++) { embedding[i] += vec[i] / embeddings.length; } } } else { const result = await provider.createEmbedding(text, model, agent, { ...options, dimensions }); embedding = Array.isArray(result[0]) ? result[0] : result; } if (embeddingCache.size >= EMBEDDING_CACHE_MAX) { const oldestKey = embeddingCache.keys().next().value; if (oldestKey) embeddingCache.delete(oldestKey); } embeddingCache.set(cacheKey, { embedding, timestamp: new Date(), }); requestMetadata = { ...requestMetadata, from_cache: false, dimensions: embedding.length, chunked: !!needsChunking, }; return embedding; } catch (error) { turnStatus = 'error'; requestStatus = 'error'; requestError = error instanceof Error ? error.message : String(error); throw error; } finally { if (requestStarted) { await trace.emitRequestEnd(requestId, { status: requestStatus, error: requestError, ...requestMetadata, }); } await trace.emitTurnEnd(turnStatus, turnStatus === 'completed' ? 'completed' : 'exception', { error: requestError, }); } } //# sourceMappingURL=ensemble_embed.js.map