aaab
Version:
Advanced AI Agent Builder - A comprehensive framework for building AI agents with TypeScript support
195 lines (163 loc) • 5.47 kB
JavaScript
const logger = require('../diagnostics/logger');
// const mask = require('../diagnostics/mask'); // Unused for now
/**
* Llama Provider
* Real implementation for local or remote Llama instances
*/
async function run(modelName, prompt, context = {}) {
logger.debug(`Llama provider called with model: ${modelName}`);
// Get Llama base URL from context or environment
const baseUrl = context.secrets?.LLAMA_URL || process.env.LLAMA_URL || 'http://127.0.0.1:11434';
logger.debug(`Using Llama at: ${baseUrl}`);
try {
// Check if Llama is running
await checkLlamaHealth(baseUrl);
logger.debug(`Sending request to Llama: ${modelName}, ${prompt.length} chars`);
if (context.stream) {
// Handle streaming response
return await handleStreamingResponse(baseUrl, modelName, prompt, context);
} else {
// Handle single-shot response
return await handleSingleResponse(baseUrl, modelName, prompt, context);
}
} catch (error) {
logger.error(`Llama provider error: ${error.message}`);
// Handle specific Llama errors
if (error.message.includes('ECONNREFUSED')) {
throw new Error('Llama is not running. Please start Llama with: llama serve');
} else if (error.message.includes('model not found')) {
throw new Error(`Model '${modelName}' not found. Pull it with: llama pull ${modelName}`);
}
throw error;
}
}
/**
* Check if Llama is running and healthy
*/
async function checkLlamaHealth(baseUrl) {
try {
const response = await fetch(`${baseUrl}/api/tags`);
if (!response.ok) {
throw new Error(`Llama health check failed: ${response.status}`);
}
} catch (error) {
throw new Error(`Cannot connect to Llama at ${baseUrl}: ${error.message}`);
}
}
/**
* Handle single-shot response from Llama
*/
async function handleSingleResponse(baseUrl, modelName, prompt, context) {
const requestBody = {
model: modelName,
prompt: prompt,
stream: false,
options: {
temperature: context.temperature || 0.7,
top_p: context.topP || 0.9,
top_k: context.topK || 40,
num_predict: context.maxTokens || 1000,
repeat_penalty: context.repeatPenalty || 1.1,
stop: context.stopSequences || [],
},
};
const response = await fetch(`${baseUrl}/api/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Llama API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
if (!data.response) {
throw new Error('No response received from Llama');
}
logger.debug(`Llama response received: ${data.response.length} chars`);
return {
content: data.response,
usage: {
promptTokens: data.prompt_eval_count || 0,
completionTokens: data.eval_count || 0,
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
},
model: modelName,
finishReason: data.done ? 'stop' : 'length',
};
}
/**
* Handle streaming response from Llama
*/
async function handleStreamingResponse(baseUrl, modelName, prompt, context) {
const requestBody = {
model: modelName,
prompt: prompt,
stream: true,
options: {
temperature: context.temperature || 0.7,
top_p: context.topP || 0.9,
top_k: context.topK || 40,
num_predict: context.maxTokens || 1000,
repeat_penalty: context.repeatPenalty || 1.1,
stop: context.stopSequences || [],
},
};
const response = await fetch(`${baseUrl}/api/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Llama API error: ${response.status} - ${errorText}`);
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let fullContent = '';
const usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
try {
// eslint-disable-next-line no-constant-condition
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim());
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = JSON.parse(line.substring(6));
if (data.response) {
fullContent += data.response;
// Update usage if available
if (data.prompt_eval_count) usage.promptTokens = data.prompt_eval_count;
if (data.eval_count) usage.completionTokens = data.eval_count;
}
if (data.done) {
usage.totalTokens = usage.promptTokens + usage.completionTokens;
return {
content: fullContent,
usage,
model: modelName,
finishReason: 'stop',
};
}
}
}
}
} finally {
reader.releaseLock();
}
// Fallback if stream ends unexpectedly
usage.totalTokens = usage.promptTokens + usage.completionTokens;
return {
content: fullContent,
usage,
model: modelName,
finishReason: 'length',
};
}
module.exports = run;