@lewist9x/distil
Version:
An opinionated library for managing LLM pipelines. Define, track, rate, and curate prompt–completion pairs for fine-tuning.
117 lines (116 loc) • 5.26 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.InferenceEngine = void 0;
// src/inference.ts
const openai_1 = require("openai");
const elasticsearch_1 = require("@elastic/elasticsearch");
const config_1 = require("./config");
const utils_1 = require("./utils");
const logger_1 = require("./logger");
class InferenceEngine {
constructor(logLevel) {
this.esClient = new elasticsearch_1.Client({
node: config_1.config.elastic.host,
auth: {
username: config_1.config.elastic.user,
password: config_1.config.elastic.password
}
});
this.logger = new logger_1.Logger(logLevel || "DEBUG");
// Initialize OpenAI clients
this.openaiClient = new openai_1.OpenAI({
apiKey: config_1.config.openai.apiKey,
baseURL: config_1.config.openai.baseUrl,
defaultHeaders: {
'HTTP-Referer': 'https://www.theclarityproject.net/',
'X-Title': 'Unbrowse Agentic Launcher'
}
});
this.openLLMClient = new openai_1.OpenAI({
apiKey: config_1.config.openLLM.apiKey,
baseURL: config_1.config.openLLM.baseUrl,
defaultHeaders: {
'HTTP-Referer': 'https://www.theclarityproject.net/',
'X-Title': 'Unbrowse Agentic Launcher'
}
});
}
async callInference(input) {
var _a, _b;
// Determine if using finetuned model
const isFineTuned = ((_a = input.parameters) === null || _a === void 0 ? void 0 : _a.useFinetuned) === true;
// Construct messages array with appropriate system message
const systemMessage = isFineTuned ?
`You are an AI assistant trained to help with ${input.pipelineName.toLowerCase()} tasks.` :
input.systemPrompt;
(_b = input.parameters) === null || _b === void 0 ? true : delete _b.useFinetuned;
const promptFinetuned = input.parameters;
promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.temperature;
promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.top_p;
promptFinetuned === null || promptFinetuned === void 0 ? true : delete promptFinetuned.max_tokens;
const messages = [
{ role: "system", content: systemMessage },
{ role: "user", content: isFineTuned ? JSON.stringify(promptFinetuned) : input.userPrompt }
];
// Prepare API request with all relevant parameters
const requestParams = {
model: input.modelName,
messages,
max_tokens: 4000,
temperature: 0.5,
stream: false,
};
await this.logger.debug("Request params:" + JSON.stringify(requestParams));
// Choose client based on whether using finetuned model
const client = isFineTuned ? this.openaiClient : this.openLLMClient;
try {
const completion = await client.chat.completions.create(requestParams);
// Extract completion
const rawOutput = completion.choices[0].message.content || "";
const processedOutput = isFineTuned ?
JSON.parse(rawOutput) : // If finetuned, skip post-processing
(input.postprocessFn ? await input.postprocessFn(rawOutput, input.extraData) : rawOutput);
await this.logger.debug("Processed output:" + processedOutput);
const cost = (0, utils_1.calculateCost)(JSON.stringify(messages), rawOutput);
// Store completion data
const indexResponse = await this.esClient.index({
index: input.pipelineName.toLowerCase(),
body: {
timestamp: new Date().toISOString(),
pipelineName: input.pipelineName,
pipelineHash: input.templateHash,
input: {
raw: JSON.stringify(input.originalInput),
preprocessed: {
systemPrompt: systemMessage,
userPrompt: isFineTuned ? JSON.stringify(input.parameters) : input.userPrompt,
parameters: JSON.stringify(input.parameters)
}
},
rawOutput,
output: JSON.stringify(processedOutput),
cost,
model: input.modelName,
metadata: input.extraData,
isFineTuned
}
});
// Return exactly what InferenceResult expects
return {
detail: indexResponse._id,
rawInput: input.originalInput,
preprocessedInput: input,
rawOutput,
processedOutput,
cost,
retryCount: 0
};
}
catch (error) {
// Log the error and rethrow
await this.logger.error("OpenAI API error: " + JSON.stringify(error));
throw error;
}
}
}
exports.InferenceEngine = InferenceEngine;