UNPKG

openlit

Version:

OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects

210 lines 9.69 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const api_1 = require("@opentelemetry/api"); const config_1 = __importDefault(require("../../config")); const helpers_1 = __importDefault(require("../../helpers")); const semantic_convention_1 = __importDefault(require("../../semantic-convention")); const constant_1 = require("../../constant"); class OllamaWrapper { static setBaseSpanAttributes(span, { genAIEndpoint, model, user, cost, environment, applicationName }) { span.setAttributes({ [constant_1.TELEMETRY_SDK_NAME]: constant_1.SDK_NAME, }); span.setAttribute(constant_1.TELEMETRY_SDK_NAME, constant_1.SDK_NAME); span.setAttribute(semantic_convention_1.default.GEN_AI_SYSTEM, semantic_convention_1.default.GEN_AI_SYSTEM_ANTHROPIC); span.setAttribute(semantic_convention_1.default.GEN_AI_ENDPOINT, genAIEndpoint); span.setAttribute(semantic_convention_1.default.GEN_AI_ENVIRONMENT, environment); span.setAttribute(semantic_convention_1.default.GEN_AI_APPLICATION_NAME, applicationName); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MODEL, model); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_USER, user); if (cost !== undefined) span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_COST, cost); span.setStatus({ code: api_1.SpanStatusCode.OK }); } static _patchChat(tracer) { const genAIEndpoint = 'ollama.chat'; return (originalMethod) => { return async function (...args) { const span = tracer.startSpan(genAIEndpoint, { kind: api_1.SpanKind.CLIENT }); const { stream = false } = args[0]; return api_1.context .with(api_1.trace.setSpan(api_1.context.active(), span), async () => { return originalMethod.apply(this, args); }) .then((response) => { if (!!stream) { return helpers_1.default.createStreamProxy(response, OllamaWrapper._chatGenerator({ args, genAIEndpoint, response, span, })); } return OllamaWrapper._chat({ args, genAIEndpoint, response, span }); }) .catch((e) => { helpers_1.default.handleException(span, e); span.end(); throw e; }); }; }; } static async _chat({ args, genAIEndpoint, response, span, }) { try { await OllamaWrapper._chatCommonSetter({ args, genAIEndpoint, result: response, span, }); return response; } catch (e) { helpers_1.default.handleException(span, e); } finally { span.end(); } } static async *_chatGenerator({ args, genAIEndpoint, response, span, }) { try { const result = { id: '0', model: '', stop_reason: '', content: [ { text: '', role: '', }, ], usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0, }, }; for await (const chunk of response) { console.log(chunk); switch (chunk.type) { case 'content_block_delta': result.content[0].text += chunk.delta?.text ?? ''; break; case 'message_stop': break; case 'content_block_stop': break; case 'message_start': if (chunk.message) { result.id = chunk.message.id; result.model = chunk.message.model; result.content[0].role = chunk.message.role; result.usage.input_tokens += Number(chunk.message.usage?.input_tokens) ?? 0; result.usage.output_tokens += Number(chunk.message.usage?.output_tokens) ?? 0; result.stop_reason = chunk.message?.stop_reason ?? ''; } break; case 'content_block_start': result.content[0].text = chunk.content_block?.text ?? ''; break; case 'message_delta': result.stop_reason = chunk.delta?.stop_reason ?? ''; result.usage.output_tokens += Number(chunk.usage?.output_tokens) ?? 0; break; } yield chunk; } result.usage.total_tokens = result.usage.output_tokens + result.usage.input_tokens; await OllamaWrapper._chatCommonSetter({ args, genAIEndpoint, result, span, }); return response; } catch (e) { helpers_1.default.handleException(span, e); } finally { span.end(); } } static async _chatCommonSetter({ args, genAIEndpoint, result, span, }) { const applicationName = config_1.default.applicationName; const environment = config_1.default.environment; const traceContent = config_1.default.traceContent; const { messages, max_tokens = null, seed = null, temperature = 1, top_p, top_k, user, stream = false, stop_reason, } = args[0]; // Format 'messages' into a single string const messagePrompt = messages || ''; const formattedMessages = []; for (const message of messagePrompt) { const role = message.role; const content = message.content; if (Array.isArray(content)) { const contentStr = content .map((item) => { if ('type' in item) { return `${item.type}: ${item.text ? item.text : item.image_url}`; } else { return `text: ${item.text}`; } }) .join(', '); formattedMessages.push(`${role}: ${contentStr}`); } else { formattedMessages.push(`${role}: ${content}`); } } const prompt = formattedMessages.join('\n'); span.setAttribute(semantic_convention_1.default.GEN_AI_OPERATION, semantic_convention_1.default.GEN_AI_OPERATION_TYPE_CHAT); span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_ID, result.id); const model = result.model || args[0].model; const pricingInfo = await config_1.default.updatePricingJson(config_1.default.pricing_json); const promptTokens = result.prompt_eval_count; const completionTokens = result.eval_count; const totalTokens = promptTokens + completionTokens; // Calculate cost of the operation const cost = helpers_1.default.getChatModelCost(model, pricingInfo, promptTokens, completionTokens); OllamaWrapper.setBaseSpanAttributes(span, { genAIEndpoint, model, user, cost, applicationName, environment, }); // Request Params attributes : Start span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_P, top_p); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_K, top_k); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MAX_TOKENS, max_tokens); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TEMPERATURE, temperature); span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, stop_reason); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_IS_STREAM, stream); span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_SEED, seed); if (traceContent) { span.setAttribute(semantic_convention_1.default.GEN_AI_CONTENT_PROMPT, prompt); } // Request Params attributes : End span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS, promptTokens); span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS, completionTokens); span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_TOTAL_TOKENS, totalTokens); if (result.done_reason) { span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, result.done_reason); } if (traceContent) { // Format 'messages' into a single string const { message = {} } = result; const messageString = `${message.role}: ${message.content}`; span.setAttribute(semantic_convention_1.default.GEN_AI_CONTENT_COMPLETION, messageString); } } } exports.default = OllamaWrapper; //# sourceMappingURL=wrapper.js.map