openlit
Version:
OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
210 lines • 9.69 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const api_1 = require("@opentelemetry/api");
const config_1 = __importDefault(require("../../config"));
const helpers_1 = __importDefault(require("../../helpers"));
const semantic_convention_1 = __importDefault(require("../../semantic-convention"));
const constant_1 = require("../../constant");
class OllamaWrapper {
static setBaseSpanAttributes(span, { genAIEndpoint, model, user, cost, environment, applicationName }) {
span.setAttributes({
[constant_1.TELEMETRY_SDK_NAME]: constant_1.SDK_NAME,
});
span.setAttribute(constant_1.TELEMETRY_SDK_NAME, constant_1.SDK_NAME);
span.setAttribute(semantic_convention_1.default.GEN_AI_SYSTEM, semantic_convention_1.default.GEN_AI_SYSTEM_ANTHROPIC);
span.setAttribute(semantic_convention_1.default.GEN_AI_ENDPOINT, genAIEndpoint);
span.setAttribute(semantic_convention_1.default.GEN_AI_ENVIRONMENT, environment);
span.setAttribute(semantic_convention_1.default.GEN_AI_APPLICATION_NAME, applicationName);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MODEL, model);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_USER, user);
if (cost !== undefined)
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_COST, cost);
span.setStatus({ code: api_1.SpanStatusCode.OK });
}
static _patchChat(tracer) {
const genAIEndpoint = 'ollama.chat';
return (originalMethod) => {
return async function (...args) {
const span = tracer.startSpan(genAIEndpoint, { kind: api_1.SpanKind.CLIENT });
const { stream = false } = args[0];
return api_1.context
.with(api_1.trace.setSpan(api_1.context.active(), span), async () => {
return originalMethod.apply(this, args);
})
.then((response) => {
if (!!stream) {
return helpers_1.default.createStreamProxy(response, OllamaWrapper._chatGenerator({
args,
genAIEndpoint,
response,
span,
}));
}
return OllamaWrapper._chat({ args, genAIEndpoint, response, span });
})
.catch((e) => {
helpers_1.default.handleException(span, e);
span.end();
throw e;
});
};
};
}
static async _chat({ args, genAIEndpoint, response, span, }) {
try {
await OllamaWrapper._chatCommonSetter({
args,
genAIEndpoint,
result: response,
span,
});
return response;
}
catch (e) {
helpers_1.default.handleException(span, e);
}
finally {
span.end();
}
}
static async *_chatGenerator({ args, genAIEndpoint, response, span, }) {
try {
const result = {
id: '0',
model: '',
stop_reason: '',
content: [
{
text: '',
role: '',
},
],
usage: {
input_tokens: 0,
output_tokens: 0,
total_tokens: 0,
},
};
for await (const chunk of response) {
console.log(chunk);
switch (chunk.type) {
case 'content_block_delta':
result.content[0].text += chunk.delta?.text ?? '';
break;
case 'message_stop':
break;
case 'content_block_stop':
break;
case 'message_start':
if (chunk.message) {
result.id = chunk.message.id;
result.model = chunk.message.model;
result.content[0].role = chunk.message.role;
result.usage.input_tokens += Number(chunk.message.usage?.input_tokens) ?? 0;
result.usage.output_tokens += Number(chunk.message.usage?.output_tokens) ?? 0;
result.stop_reason = chunk.message?.stop_reason ?? '';
}
break;
case 'content_block_start':
result.content[0].text = chunk.content_block?.text ?? '';
break;
case 'message_delta':
result.stop_reason = chunk.delta?.stop_reason ?? '';
result.usage.output_tokens += Number(chunk.usage?.output_tokens) ?? 0;
break;
}
yield chunk;
}
result.usage.total_tokens = result.usage.output_tokens + result.usage.input_tokens;
await OllamaWrapper._chatCommonSetter({
args,
genAIEndpoint,
result,
span,
});
return response;
}
catch (e) {
helpers_1.default.handleException(span, e);
}
finally {
span.end();
}
}
static async _chatCommonSetter({ args, genAIEndpoint, result, span, }) {
const applicationName = config_1.default.applicationName;
const environment = config_1.default.environment;
const traceContent = config_1.default.traceContent;
const { messages, max_tokens = null, seed = null, temperature = 1, top_p, top_k, user, stream = false, stop_reason, } = args[0];
// Format 'messages' into a single string
const messagePrompt = messages || '';
const formattedMessages = [];
for (const message of messagePrompt) {
const role = message.role;
const content = message.content;
if (Array.isArray(content)) {
const contentStr = content
.map((item) => {
if ('type' in item) {
return `${item.type}: ${item.text ? item.text : item.image_url}`;
}
else {
return `text: ${item.text}`;
}
})
.join(', ');
formattedMessages.push(`${role}: ${contentStr}`);
}
else {
formattedMessages.push(`${role}: ${content}`);
}
}
const prompt = formattedMessages.join('\n');
span.setAttribute(semantic_convention_1.default.GEN_AI_OPERATION, semantic_convention_1.default.GEN_AI_OPERATION_TYPE_CHAT);
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_ID, result.id);
const model = result.model || args[0].model;
const pricingInfo = await config_1.default.updatePricingJson(config_1.default.pricing_json);
const promptTokens = result.prompt_eval_count;
const completionTokens = result.eval_count;
const totalTokens = promptTokens + completionTokens;
// Calculate cost of the operation
const cost = helpers_1.default.getChatModelCost(model, pricingInfo, promptTokens, completionTokens);
OllamaWrapper.setBaseSpanAttributes(span, {
genAIEndpoint,
model,
user,
cost,
applicationName,
environment,
});
// Request Params attributes : Start
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_P, top_p);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TOP_K, top_k);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_MAX_TOKENS, max_tokens);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_TEMPERATURE, temperature);
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, stop_reason);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_IS_STREAM, stream);
span.setAttribute(semantic_convention_1.default.GEN_AI_REQUEST_SEED, seed);
if (traceContent) {
span.setAttribute(semantic_convention_1.default.GEN_AI_CONTENT_PROMPT, prompt);
}
// Request Params attributes : End
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_INPUT_TOKENS, promptTokens);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_OUTPUT_TOKENS, completionTokens);
span.setAttribute(semantic_convention_1.default.GEN_AI_USAGE_TOTAL_TOKENS, totalTokens);
if (result.done_reason) {
span.setAttribute(semantic_convention_1.default.GEN_AI_RESPONSE_FINISH_REASON, result.done_reason);
}
if (traceContent) {
// Format 'messages' into a single string
const { message = {} } = result;
const messageString = `${message.role}: ${message.content}`;
span.setAttribute(semantic_convention_1.default.GEN_AI_CONTENT_COMPLETION, messageString);
}
}
}
exports.default = OllamaWrapper;
//# sourceMappingURL=wrapper.js.map