UNPKG

@lewist9x/distil

Version:

An opinionated library for managing LLM pipelines. Define, track, rate, and curate prompt–completion pairs for fine-tuning.

117 lines (116 loc) • 5.26 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.InferenceEngine = void 0; // src/inference.ts const openai_1 = require("openai"); const elasticsearch_1 = require("@elastic/elasticsearch"); const config_1 = require("./config"); const utils_1 = require("./utils"); const logger_1 = require("./logger"); class InferenceEngine { constructor(logLevel) { this.esClient = new elasticsearch_1.Client({ node: config_1.config.elastic.host, auth: { username: config_1.config.elastic.user, password: config_1.config.elastic.password } }); this.logger = new logger_1.Logger(logLevel || "DEBUG"); // Initialize OpenAI clients this.openaiClient = new openai_1.OpenAI({ apiKey: config_1.config.openai.apiKey, baseURL: config_1.config.openai.baseUrl, defaultHeaders: { 'HTTP-Referer': 'https://www.theclarityproject.net/', 'X-Title': 'Unbrowse Agentic Launcher' } }); this.openLLMClient = new openai_1.OpenAI({ apiKey: config_1.config.openLLM.apiKey, baseURL: config_1.config.openLLM.baseUrl, defaultHeaders: { 'HTTP-Referer': 'https://www.theclarityproject.net/', 'X-Title': 'Unbrowse Agentic Launcher' } }); } async callInference(input) { var _a, _b; // Determine if using finetuned model const isFineTuned = ((_a = input.parameters) === null || _a === void 0 ? void 0 : _a.useFinetuned) === true; // Construct messages array with appropriate system message const systemMessage = isFineTuned ? `You are an AI assistant trained to help with ${input.pipelineName.toLowerCase()} tasks.` : input.systemPrompt; (_b = input.parameters) === null || _b === void 0 ? true : delete _b.useFinetuned; const promptFinetuned = input.parameters; promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.temperature; promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.top_p; promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.max_tokens; const messages = [ { role: "system", content: systemMessage }, { role: "user", content: isFineTuned ? JSON.stringify(promptFinetuned) : input.userPrompt } ]; // Prepare API request with all relevant parameters const requestParams = { model: input.modelName, messages, max_tokens: 4000, temperature: 0.5, stream: false, }; await this.logger.debug("Request params:" + JSON.stringify(requestParams)); // Choose client based on whether using finetuned model const client = isFineTuned ? this.openaiClient : this.openLLMClient; try { const completion = await client.chat.completions.create(requestParams); // Extract completion const rawOutput = completion.choices[0].message.content || ""; const processedOutput = isFineTuned ? JSON.parse(rawOutput) : // If finetuned, skip post-processing (input.postprocessFn ? await input.postprocessFn(rawOutput, input.extraData) : rawOutput); await this.logger.debug("Processed output:" + processedOutput); const cost = (0, utils_1.calculateCost)(JSON.stringify(messages), rawOutput); // Store completion data const indexResponse = await this.esClient.index({ index: input.pipelineName.toLowerCase(), body: { timestamp: new Date().toISOString(), pipelineName: input.pipelineName, pipelineHash: input.templateHash, input: { raw: JSON.stringify(input.originalInput), preprocessed: { systemPrompt: systemMessage, userPrompt: isFineTuned ? JSON.stringify(input.parameters) : input.userPrompt, parameters: JSON.stringify(input.parameters) } }, rawOutput, output: JSON.stringify(processedOutput), cost, model: input.modelName, metadata: input.extraData, isFineTuned } }); // Return exactly what InferenceResult expects return { detail: indexResponse._id, rawInput: input.originalInput, preprocessedInput: input, rawOutput, processedOutput, cost, retryCount: 0 }; } catch (error) { // Log the error and rethrow await this.logger.error("OpenAI API error: " + JSON.stringify(error)); throw error; } } } exports.InferenceEngine = InferenceEngine;