openlit
Version:
OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
563 lines • 27.6 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const api_1 = require("@opentelemetry/api");
const config_1 = __importDefault(require("../../config"));
const helpers_1 = __importStar(require("../../helpers"));
const semantic_convention_1 = __importDefault(require("../../semantic-convention"));
const base_wrapper_1 = __importDefault(require("../base-wrapper"));
function spanCreationAttrs(operationName, requestModel) {
return {
[semantic_convention_1.default.GEN_AI_OPERATION]: operationName,
[semantic_convention_1.default.GEN_AI_PROVIDER_NAME_OTEL]: OllamaWrapper.aiSystem,
[semantic_convention_1.default.GEN_AI_REQUEST_MODEL]: requestModel,
[semantic_convention_1.default.SERVER_ADDRESS]: OllamaWrapper.serverAddress,
[semantic_convention_1.default.SERVER_PORT]: OllamaWrapper.serverPort,
};
}
class OllamaWrapper extends base_wrapper_1.default {
// ──────────────────── Chat ────────────────────
static _patchChat(tracer) {
const genAIEndpoint = 'ollama.chat';
return (originalMethod) => {
return async function (...args) {
if ((0, helpers_1.isFrameworkLlmActive)())
return originalMethod.apply(this, args);
const requestModel = args[0]?.model || 'llama3';
const spanName = `${semantic_convention_1.default.GEN_AI_OPERATION_TYPE_CHAT} ${requestModel}`;
const effectiveCtx = (0, helpers_1.getFrameworkParentContext)() ?? api_1.context.active();
const span = tracer.startSpan(spanName, {
kind: api_1.SpanKind.CLIENT,
attributes: spanCreationAttrs(semantic_convention_1.default.GEN_AI_OPERATION_TYPE_CHAT, requestModel),
}, effectiveCtx);
return api_1.context
.with(api_1.trace.setSpan(effectiveCtx, span), async () => {
return originalMethod.apply(this, args);
})
.then((response) => {
const { stream = false } = args[0];
if (stream) {
return helpers_1.default.createStreamProxy(response, OllamaWrapper._chatGenerator({ args, genAIEndpoint, response, span }));
}
return OllamaWrapper._chat({ args, genAIEndpoint, response, span });
})
.catch((e) => {
helpers_1.default.handleException(span, e);
base_wrapper_1.default.recordMetrics(span, {
genAIEndpoint,
model: requestModel,
aiSystem: OllamaWrapper.aiSystem,
serverAddress: OllamaWrapper.serverAddress,
serverPort: OllamaWrapper.serverPort,
errorType: e?.constructor?.name || '_OTHER',
});
span.end();
throw e;
});
};
};
}
static async _chat({ args, genAIEndpoint, response, span, }) {
let metricParams;
try {
metricParams = await OllamaWrapper._chatCommonSetter({
args,
genAIEndpoint,
result: response,
span,
});
return response;
}
catch (e) {
helpers_1.default.handleException(span, e);
throw e;
}
finally {
span.end();
if (metricParams) {
base_wrapper_1.default.recordMetrics(span, metricParams);
}
}
}
static async *_chatGenerator({ args, genAIEndpoint, response, span, }) {
let metricParams;
const timestamps = [];
const startTime = Date.now();
try {
const result = {
model: '',
message: { role: 'assistant', content: '' },
done_reason: '',
prompt_eval_count: 0,
eval_count: 0,
};
let toolCalls = [];
for await (const chunk of response) {
timestamps.push(Date.now());
result.model = chunk.model || result.model;
if (chunk.message?.content) {
result.message.content += chunk.message.content;
result.message.role = chunk.message.role || result.message.role;
}
if (chunk.message?.tool_calls) {
toolCalls = chunk.message.tool_calls;
}
if (chunk.done) {
result.done_reason = chunk.done_reason || '';
result.prompt_eval_count = chunk.prompt_eval_count || 0;
result.eval_count = chunk.eval_count || 0;
}
yield chunk;
}
if (toolCalls.length > 0) {
result.message.tool_calls = toolCalls;
}
const ttft = timestamps.length > 0 ? (timestamps[0] - startTime) / 1000 : 0;
let tbt = 0;
if (timestamps.length > 1) {
const timeDiffs = timestamps.slice(1).map((t, i) => t - timestamps[i]);
tbt = timeDiffs.reduce((a, b) => a + b, 0) / timeDiffs.length / 1000;
}
metricParams = await OllamaWrapper._chatCommonSetter({
args,
genAIEndpoint,
result,
span,
ttft,
tbt,
});
return result;
}
catch (e) {
helpers_1.default.handleException(span, e);
throw e;
}
finally {
span.end();
if (metricParams) {
base_wrapper_1.default.recordMetrics(span, metricParams);
}
}
}
static async _chatCommonSetter({ args, genAIEndpoint, result, span, ttft = 0, tbt = 0, }) {
const captureContent = config_1.default.captureMessageContent;
const requestModel = args[0]?.model || 'llama3';
const { messages, stream = false } = args[0];
const options = args[0]?.options || {};
if (options.temperature != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TEMPERATURE, options.temperature);
}
if (options.top_p != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_P, options.top_p);
}
if (options.top_k != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_K, options.top_k);
}
if (options.max_tokens != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MAX_TOKENS, options.max_tokens);
}
if (options.repeat_penalty) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_FREQUENCY_PENALTY, options.repeat_penalty);
}
if (options.seed != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_SEED, Number(options.seed));
}
if (options.stop) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_STOP_SEQUENCES, Array.isArray(options.stop) ? options.stop : [options.stop]);
}
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_IS_STREAM, stream);
if (captureContent) {
span.setAttribute(semantic_convention_1.default.GEN_AI_INPUT_MESSAGES, helpers_1.default.buildInputMessages(messages || []));
}
const responseModel = result.model || requestModel;
const pricingInfo = config_1.default.pricingInfo || {};
const inputTokens = result.prompt_eval_count || 0;
const outputTokens = result.eval_count || 0;
const cost = helpers_1.default.getChatModelCost(requestModel, pricingInfo, inputTokens, outputTokens);
OllamaWrapper.setBaseSpanAttributes(span, {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
serverAddress: OllamaWrapper.serverAddress,
serverPort: OllamaWrapper.serverPort,
});
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_MODEL, responseModel);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS, inputTokens);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS, outputTokens);
if (ttft > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_SERVER_TTFT, ttft);
}
if (tbt > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_SERVER_TBT, tbt);
}
if (result.done_reason) {
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, [result.done_reason]);
}
const outputType = typeof result.message?.content === 'string'
? semantic_convention_1.default.GEN_AI_OUTPUT_TYPE_TEXT
: semantic_convention_1.default.GEN_AI_OUTPUT_TYPE_JSON;
span.setAttribute(semantic_convention_1.default.GEN_AI_OUTPUT_TYPE, outputType);
if (result.message?.tool_calls) {
const resultToolCalls = result.message.tool_calls;
const toolNames = resultToolCalls.map((t) => t.function?.name || '').filter(Boolean);
const toolIds = resultToolCalls.map((t) => String(t.id || '')).filter(Boolean);
const toolArgs = resultToolCalls
.map((t) => String(t.function?.arguments || ''))
.filter(Boolean);
if (toolNames.length > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_TOOL_NAME, toolNames.join(', '));
}
if (toolIds.length > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_TOOL_CALL_ID, toolIds.join(', '));
}
if (toolArgs.length > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_TOOL_ARGS, toolArgs.join(', '));
}
}
let inputMessagesJson;
let outputMessagesJson;
if (captureContent) {
const toolCalls = result.message?.tool_calls;
outputMessagesJson = helpers_1.default.buildOutputMessages(result.message?.content || '', result.done_reason || 'stop', toolCalls);
span.setAttribute(semantic_convention_1.default.GEN_AI_OUTPUT_MESSAGES, outputMessagesJson);
inputMessagesJson = helpers_1.default.buildInputMessages(messages || []);
}
if (!config_1.default.disableEvents) {
const eventAttrs = {
[semantic_convention_1.default.GEN_AI_OPERATION]: semantic_convention_1.default.GEN_AI_OPERATION_TYPE_CHAT,
[semantic_convention_1.default.GEN_AI_REQUEST_MODEL]: requestModel,
[semantic_convention_1.default.GEN_AI_RESPONSE_MODEL]: responseModel,
[semantic_convention_1.default.SERVER_ADDRESS]: OllamaWrapper.serverAddress,
[semantic_convention_1.default.SERVER_PORT]: OllamaWrapper.serverPort,
[semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON]: [result.done_reason || 'stop'],
[semantic_convention_1.default.GEN_AI_OUTPUT_TYPE]: outputType,
[semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS]: inputTokens,
[semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS]: outputTokens,
};
if (captureContent) {
if (inputMessagesJson)
eventAttrs[semantic_convention_1.default.GEN_AI_INPUT_MESSAGES] = inputMessagesJson;
if (outputMessagesJson)
eventAttrs[semantic_convention_1.default.GEN_AI_OUTPUT_MESSAGES] = outputMessagesJson;
}
helpers_1.default.emitInferenceEvent(span, eventAttrs);
}
return {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
};
}
// ──────────────────── Generate (text_completion) ────────────────────
static _patchGenerate(tracer) {
const genAIEndpoint = 'ollama.generate';
return (originalMethod) => {
return async function (...args) {
if ((0, helpers_1.isFrameworkLlmActive)())
return originalMethod.apply(this, args);
const requestModel = args[0]?.model || 'llama3';
const spanName = `${semantic_convention_1.default.GEN_AI_OPERATION_TYPE_TEXT_COMPLETION} ${requestModel}`;
const effectiveCtx = (0, helpers_1.getFrameworkParentContext)() ?? api_1.context.active();
const span = tracer.startSpan(spanName, {
kind: api_1.SpanKind.CLIENT,
attributes: spanCreationAttrs(semantic_convention_1.default.GEN_AI_OPERATION_TYPE_TEXT_COMPLETION, requestModel),
}, effectiveCtx);
return api_1.context
.with(api_1.trace.setSpan(effectiveCtx, span), async () => {
return originalMethod.apply(this, args);
})
.then((response) => {
const { stream = false } = args[0];
if (stream) {
return helpers_1.default.createStreamProxy(response, OllamaWrapper._generateGenerator({ args, genAIEndpoint, response, span }));
}
return OllamaWrapper._generate({ args, genAIEndpoint, response, span });
})
.catch((e) => {
helpers_1.default.handleException(span, e);
base_wrapper_1.default.recordMetrics(span, {
genAIEndpoint,
model: requestModel,
aiSystem: OllamaWrapper.aiSystem,
serverAddress: OllamaWrapper.serverAddress,
serverPort: OllamaWrapper.serverPort,
errorType: e?.constructor?.name || '_OTHER',
});
span.end();
throw e;
});
};
};
}
static async _generate({ args, genAIEndpoint, response, span, }) {
let metricParams;
try {
metricParams = await OllamaWrapper._generateCommonSetter({
args,
genAIEndpoint,
result: response,
span,
});
return response;
}
catch (e) {
helpers_1.default.handleException(span, e);
throw e;
}
finally {
span.end();
if (metricParams) {
base_wrapper_1.default.recordMetrics(span, metricParams);
}
}
}
static async *_generateGenerator({ args, genAIEndpoint, response, span, }) {
let metricParams;
const timestamps = [];
const startTime = Date.now();
try {
const result = {
model: '',
response: '',
done_reason: '',
prompt_eval_count: 0,
eval_count: 0,
};
for await (const chunk of response) {
timestamps.push(Date.now());
result.model = chunk.model || result.model;
if (chunk.response) {
result.response += chunk.response;
}
if (chunk.done) {
result.done_reason = chunk.done_reason || '';
result.prompt_eval_count = chunk.prompt_eval_count || 0;
result.eval_count = chunk.eval_count || 0;
}
yield chunk;
}
const ttft = timestamps.length > 0 ? (timestamps[0] - startTime) / 1000 : 0;
let tbt = 0;
if (timestamps.length > 1) {
const timeDiffs = timestamps.slice(1).map((t, i) => t - timestamps[i]);
tbt = timeDiffs.reduce((a, b) => a + b, 0) / timeDiffs.length / 1000;
}
metricParams = await OllamaWrapper._generateCommonSetter({
args,
genAIEndpoint,
result,
span,
ttft,
tbt,
});
return result;
}
catch (e) {
helpers_1.default.handleException(span, e);
throw e;
}
finally {
span.end();
if (metricParams) {
base_wrapper_1.default.recordMetrics(span, metricParams);
}
}
}
static async _generateCommonSetter({ args, genAIEndpoint, result, span, ttft = 0, tbt = 0, }) {
const captureContent = config_1.default.captureMessageContent;
const requestModel = args[0]?.model || 'llama3';
const { prompt, stream = false } = args[0];
const options = args[0]?.options || {};
if (options.temperature != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TEMPERATURE, options.temperature);
}
if (options.top_p != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_P, options.top_p);
}
if (options.top_k != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_K, options.top_k);
}
if (options.max_tokens != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MAX_TOKENS, options.max_tokens);
}
if (options.repeat_penalty) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_FREQUENCY_PENALTY, options.repeat_penalty);
}
if (options.seed != null) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_SEED, Number(options.seed));
}
if (options.stop) {
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_STOP_SEQUENCES, Array.isArray(options.stop) ? options.stop : [options.stop]);
}
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_IS_STREAM, stream);
if (captureContent) {
const inputMessages = [{ role: 'user', content: prompt || '' }];
span.setAttribute(semantic_convention_1.default.GEN_AI_INPUT_MESSAGES, helpers_1.default.buildInputMessages(inputMessages));
}
const responseModel = result.model || requestModel;
const pricingInfo = config_1.default.pricingInfo || {};
const inputTokens = result.prompt_eval_count || 0;
const outputTokens = result.eval_count || 0;
const cost = helpers_1.default.getChatModelCost(requestModel, pricingInfo, inputTokens, outputTokens);
OllamaWrapper.setBaseSpanAttributes(span, {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
serverAddress: OllamaWrapper.serverAddress,
serverPort: OllamaWrapper.serverPort,
});
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_MODEL, responseModel);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS, inputTokens);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS, outputTokens);
if (ttft > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_SERVER_TTFT, ttft);
}
if (tbt > 0) {
span.setAttribute(semantic_convention_1.default.GEN_AI_SERVER_TBT, tbt);
}
if (result.done_reason) {
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, [result.done_reason]);
}
const llmResponse = result.response || '';
const outputType = typeof llmResponse === 'string'
? semantic_convention_1.default.GEN_AI_OUTPUT_TYPE_TEXT
: semantic_convention_1.default.GEN_AI_OUTPUT_TYPE_JSON;
span.setAttribute(semantic_convention_1.default.GEN_AI_OUTPUT_TYPE, outputType);
let inputMessagesJson;
let outputMessagesJson;
if (captureContent) {
const inputMessages = [{ role: 'user', content: prompt || '' }];
outputMessagesJson = helpers_1.default.buildOutputMessages(llmResponse, result.done_reason || 'stop');
span.setAttribute(semantic_convention_1.default.GEN_AI_OUTPUT_MESSAGES, outputMessagesJson);
inputMessagesJson = helpers_1.default.buildInputMessages(inputMessages);
}
if (!config_1.default.disableEvents) {
const eventAttrs = {
[semantic_convention_1.default.GEN_AI_OPERATION]: semantic_convention_1.default.GEN_AI_OPERATION_TYPE_TEXT_COMPLETION,
[semantic_convention_1.default.GEN_AI_REQUEST_MODEL]: requestModel,
[semantic_convention_1.default.GEN_AI_RESPONSE_MODEL]: responseModel,
[semantic_convention_1.default.SERVER_ADDRESS]: OllamaWrapper.serverAddress,
[semantic_convention_1.default.SERVER_PORT]: OllamaWrapper.serverPort,
[semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON]: [result.done_reason || 'stop'],
[semantic_convention_1.default.GEN_AI_OUTPUT_TYPE]: outputType,
[semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS]: inputTokens,
[semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS]: outputTokens,
};
if (captureContent) {
if (inputMessagesJson)
eventAttrs[semantic_convention_1.default.GEN_AI_INPUT_MESSAGES] = inputMessagesJson;
if (outputMessagesJson)
eventAttrs[semantic_convention_1.default.GEN_AI_OUTPUT_MESSAGES] = outputMessagesJson;
}
helpers_1.default.emitInferenceEvent(span, eventAttrs);
}
return {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
};
}
// ──────────────────── Embeddings ────────────────────
static _patchEmbeddings(tracer) {
const genAIEndpoint = 'ollama.embeddings';
return (originalMethod) => {
return async function (...args) {
if ((0, helpers_1.isFrameworkLlmActive)())
return originalMethod.apply(this, args);
const requestModel = args[0]?.model || 'llama3';
const spanName = `${semantic_convention_1.default.GEN_AI_OPERATION_TYPE_EMBEDDING} ${requestModel}`;
const effectiveCtx = (0, helpers_1.getFrameworkParentContext)() ?? api_1.context.active();
const span = tracer.startSpan(spanName, {
kind: api_1.SpanKind.CLIENT,
attributes: spanCreationAttrs(semantic_convention_1.default.GEN_AI_OPERATION_TYPE_EMBEDDING, requestModel),
}, effectiveCtx);
return api_1.context.with(api_1.trace.setSpan(effectiveCtx, span), async () => {
const captureContent = config_1.default.captureMessageContent;
let metricParams;
try {
const response = await originalMethod.apply(this, args);
const promptVal = args[0]?.input || args[0]?.prompt || '';
const promptText = typeof promptVal === 'string' ? promptVal : JSON.stringify(promptVal);
const inputTokens = helpers_1.default.generalTokens(promptText);
const pricingInfo = config_1.default.pricingInfo || {};
const cost = helpers_1.default.getEmbedModelCost(requestModel, pricingInfo, inputTokens);
OllamaWrapper.setBaseSpanAttributes(span, {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
serverAddress: OllamaWrapper.serverAddress,
serverPort: OllamaWrapper.serverPort,
});
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_IS_STREAM, false);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS, inputTokens);
if (captureContent) {
span.setAttribute(semantic_convention_1.default.GEN_AI_INPUT_MESSAGES, promptText);
}
metricParams = {
genAIEndpoint,
model: requestModel,
cost,
aiSystem: OllamaWrapper.aiSystem,
};
return response;
}
catch (e) {
helpers_1.default.handleException(span, e);
throw e;
}
finally {
span.end();
if (metricParams) {
base_wrapper_1.default.recordMetrics(span, metricParams);
}
}
});
};
};
}
}
OllamaWrapper.aiSystem = semantic_convention_1.default.GEN_AI_SYSTEM_OLLAMA;
OllamaWrapper.serverAddress = '127.0.0.1';
OllamaWrapper.serverPort = 11434;
exports.default = OllamaWrapper;
//# sourceMappingURL=wrapper.js.map