UNPKG

@lewist9x/distil-beta1

Version:

An opinionated library for managing LLM pipelines. Define, track, rate, and curate prompt–completion pairs for fine-tuning.

105 lines (104 loc) • 4.53 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.InferenceEngine = void 0; // src/inference.ts const axios_1 = __importDefault(require("axios")); const elasticsearch_1 = require("@elastic/elasticsearch"); const config_1 = require("./config"); const utils_1 = require("./utils"); const logger_1 = require("./logger"); class InferenceEngine { constructor(logLevel) { this.esClient = new elasticsearch_1.Client({ node: config_1.config.elastic.host, auth: { username: config_1.config.elastic.user, password: config_1.config.elastic.password } }); this.logger = new logger_1.Logger(logLevel || "DEBUG"); } async callInference(input) { var _a; // Determine if using finetuned model const isFineTuned = ((_a = input.parameters) === null || _a === void 0 ? void 0 : _a.useFinetuned) === true; // Construct messages array with appropriate system message const systemMessage = isFineTuned ? `You are an AI assistant trained to help with ${input.pipelineName.toLowerCase()} tasks.` : input.systemPrompt; const messages = [ { role: "system", content: systemMessage }, { role: "user", content: isFineTuned ? JSON.stringify(JSON.stringify(input.parameters)) : input.userPrompt } ]; // Prepare API request with all relevant parameters const requestBody = { model: input.modelName, messages, max_tokens: 4000, temperature: 1, ...(input.parameters || {}) // Include any custom parameters }; // Choose API endpoint based on whether using finetuned model const apiConfig = isFineTuned ? { baseUrl: config_1.config.openai.baseUrl, endpoint: config_1.config.openai.finetune.endpoint, apiKey: config_1.config.openai.apiKey } : { baseUrl: config_1.config.openLLM.baseUrl, endpoint: "/chat/completions", apiKey: config_1.config.openLLM.apiKey }; await this.logger.debug("Request body:" + `${apiConfig.baseUrl}${apiConfig.endpoint}`); await this.logger.debug("Request body:" + JSON.stringify(requestBody)); const response = await (0, utils_1.retry)(() => axios_1.default.post(`${apiConfig.baseUrl}${apiConfig.endpoint}`, requestBody, { headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiConfig.apiKey}` } })); // Extract completion const rawOutput = response.data.choices[0].message.content; const processedOutput = isFineTuned ? rawOutput : // If finetuned, skip post-processing (input.postprocessFn ? await input.postprocessFn(rawOutput, input.extraData) : rawOutput); await this.logger.debug("Processed output:" + processedOutput); const cost = (0, utils_1.calculateCost)(JSON.stringify(messages), rawOutput); // Store completion data const indexResponse = await this.esClient.index({ index: input.pipelineName.toLowerCase(), body: { timestamp: new Date().toISOString(), pipelineName: input.pipelineName, pipelineHash: input.templateHash, input: { raw: JSON.stringify(input.originalInput), preprocessed: { systemPrompt: systemMessage, userPrompt: isFineTuned ? JSON.stringify(input.parameters) : input.userPrompt, parameters: JSON.stringify(input.parameters) } }, rawOutput, output: JSON.stringify(processedOutput), cost, model: input.modelName, metadata: input.extraData, isFineTuned } }); // Return exactly what InferenceResult expects return { detail: indexResponse._id, rawInput: input.originalInput, preprocessedInput: input, rawOutput, processedOutput, cost, retryCount: 0 }; } } exports.InferenceEngine = InferenceEngine;