@lewist9x/distil-beta1
Version:
An opinionated library for managing LLM pipelines. Define, track, rate, and curate prompt–completion pairs for fine-tuning.
105 lines (104 loc) • 4.53 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.InferenceEngine = void 0;
// src/inference.ts
const axios_1 = __importDefault(require("axios"));
const elasticsearch_1 = require("@elastic/elasticsearch");
const config_1 = require("./config");
const utils_1 = require("./utils");
const logger_1 = require("./logger");
class InferenceEngine {
constructor(logLevel) {
this.esClient = new elasticsearch_1.Client({
node: config_1.config.elastic.host,
auth: {
username: config_1.config.elastic.user,
password: config_1.config.elastic.password
}
});
this.logger = new logger_1.Logger(logLevel || "DEBUG");
}
async callInference(input) {
var _a;
// Determine if using finetuned model
const isFineTuned = ((_a = input.parameters) === null || _a === void 0 ? void 0 : _a.useFinetuned) === true;
// Construct messages array with appropriate system message
const systemMessage = isFineTuned ?
`You are an AI assistant trained to help with ${input.pipelineName.toLowerCase()} tasks.` :
input.systemPrompt;
const messages = [
{ role: "system", content: systemMessage },
{ role: "user", content: isFineTuned ? JSON.stringify(JSON.stringify(input.parameters)) : input.userPrompt }
];
// Prepare API request with all relevant parameters
const requestBody = {
model: input.modelName,
messages,
max_tokens: 4000,
temperature: 1,
...(input.parameters || {}) // Include any custom parameters
};
// Choose API endpoint based on whether using finetuned model
const apiConfig = isFineTuned ? {
baseUrl: config_1.config.openai.baseUrl,
endpoint: config_1.config.openai.finetune.endpoint,
apiKey: config_1.config.openai.apiKey
} : {
baseUrl: config_1.config.openLLM.baseUrl,
endpoint: "/chat/completions",
apiKey: config_1.config.openLLM.apiKey
};
await this.logger.debug("Request body:" + `${apiConfig.baseUrl}${apiConfig.endpoint}`);
await this.logger.debug("Request body:" + JSON.stringify(requestBody));
const response = await (0, utils_1.retry)(() => axios_1.default.post(`${apiConfig.baseUrl}${apiConfig.endpoint}`, requestBody, {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiConfig.apiKey}`
}
}));
// Extract completion
const rawOutput = response.data.choices[0].message.content;
const processedOutput = isFineTuned ?
rawOutput : // If finetuned, skip post-processing
(input.postprocessFn ? await input.postprocessFn(rawOutput, input.extraData) : rawOutput);
await this.logger.debug("Processed output:" + processedOutput);
const cost = (0, utils_1.calculateCost)(JSON.stringify(messages), rawOutput);
// Store completion data
const indexResponse = await this.esClient.index({
index: input.pipelineName.toLowerCase(),
body: {
timestamp: new Date().toISOString(),
pipelineName: input.pipelineName,
pipelineHash: input.templateHash,
input: {
raw: JSON.stringify(input.originalInput),
preprocessed: {
systemPrompt: systemMessage,
userPrompt: isFineTuned ? JSON.stringify(input.parameters) : input.userPrompt,
parameters: JSON.stringify(input.parameters)
}
},
rawOutput,
output: JSON.stringify(processedOutput),
cost,
model: input.modelName,
metadata: input.extraData,
isFineTuned
}
});
// Return exactly what InferenceResult expects
return {
detail: indexResponse._id,
rawInput: input.originalInput,
preprocessedInput: input,
rawOutput,
processedOutput,
cost,
retryCount: 0
};
}
}
exports.InferenceEngine = InferenceEngine;