cmte
Version:
Design by Committee™ except it's just you and LLMs
238 lines (226 loc) • 8.15 kB
JavaScript
import { BaseLLMClient } from "./base-llm-client.js";
import { logger } from '../../utils/logger.js';
export class LocalLLMAdapter extends BaseLLMClient {
constructor(config) {
super({
...config,
model: config.model || process.env.LOCAL_LLM_MODEL || 'test-model',
apiDryRun: config.apiDryRun || false
});
this.localLLMUrl = config.localLLMUrl || process.env.LOCAL_LLM_URL || 'http://localhost:1234';
if (!this.localLLMUrl && process.env.NODE_ENV !== 'test') {
throw new Error('LLM URL (localLLMUrl or LOCAL_LLM_URL) is not set');
}
this.retryConfig = {
maxRetries: parseInt(process.env.LOCAL_LLM_MAX_RETRIES || '3', 10),
initialDelayMs: parseInt(process.env.LOCAL_LLM_INITIAL_DELAY_MS || '1000', 10),
maxDelayMs: parseInt(process.env.LOCAL_LLM_MAX_DELAY_MS || '5000', 10)
};
logger.debug('Initialized local LLM adapter', {
localLLMUrl: this.localLLMUrl,
retryConfig: this.retryConfig,
model: this.config.model,
apiDryRun: this.config.apiDryRun
});
}
async healthCheck() {
try {
logger.debug('Starting local LLM health check', {
url: this.localLLMUrl,
model: this.config.model
});
const response = await fetch(`${this.localLLMUrl}/v1/models`);
if (!response.ok) {
logger.warn('Local LLM health check failed - server responded with error', {
status: response.status,
statusText: response.statusText
});
return false;
}
const data = await response.json();
const modelExists = Array.isArray(data.data) && data.data.some(model => model.id === this.config.model);
if (!modelExists) {
logger.warn('Local LLM health check failed - model not found', {
model: this.config.model,
availableModels: data.data?.map(m => m.id)
});
return false;
}
logger.debug('Local LLM health check succeeded', {
model: this.config.model
});
return true;
} catch (error) {
logger.warn('Local LLM health check failed - network error', {
error: error.message,
url: this.localLLMUrl
});
return false;
}
}
/**
* Complete a conversation with the local LLM
* @param messages Array of message objects with role and content
* @param config Optional configuration overrides
* @returns Promise resolving to LLM's response
*/
async completeMessages(messages, config = {}) {
const mergedConfig = this.getMergedConfig(config);
// Handle dry run mode
if (mergedConfig.apiDryRun) {
const preview = this.createCompressedPrompt(messages);
return `# Compressed Prompt for API Dry Run\n\n${preview}`;
}
// Create the request function
const makeRequest = async () => {
const response = await this.withExponentialBackoff(async () => {
const response = await fetch(`${this.localLLMUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: mergedConfig.model,
messages: messages,
max_tokens: mergedConfig.maxTokens,
temperature: mergedConfig.temperature,
stream: !mergedConfig.noStreaming
})
});
if (!response.ok) {
throw response;
}
return response;
});
// Handle streaming vs non-streaming response
if (mergedConfig.noStreaming) {
const data = await response.json();
return data.choices[0].message.content;
} else {
return this.handleStreamingResponse(response);
}
};
// Enqueue the request to be processed when a slot is available
return this.enqueueRequest(makeRequest);
}
async sendMessage(prompt, config) {
const messages = [{ role: 'user', content: prompt }];
return this.completeMessages(messages, config);
}
createCompressedPrompt(messages) {
let preview = '';
for (const message of messages) {
const content = message.content;
const codeBlocks = content.match(/```[\s\S]*?```/g) || [];
const codeBlockSummaries = codeBlocks.map(block => {
const language = block.split('\n')[0].replace('```', '').trim();
const lines = block.split('\n').length - 2; // Subtract opening and closing ```
return `${language}: ${lines}`;
});
preview += `[${message.role}] ${content.split('\n')[0]}...\n`;
if (codeBlockSummaries.length > 0) {
preview += `Code blocks: ${codeBlockSummaries.join(', ')}\n`;
}
preview += '\n';
}
return preview;
}
async handleStreamingResponse(response) {
const reader = response.body.getReader();
const decoder = new TextDecoder();
let result = '';
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
logger.debug('Raw chunk:', { chunk });
buffer += chunk;
const lines = buffer.split('\n');
// Keep the last partial line in the buffer
buffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed === '') {
logger.debug('Skipping empty line');
continue;
}
if (trimmed.startsWith('data: ')) {
const data = trimmed.slice(5).trim();
logger.debug('Processing data line:', { data });
if (data === '[DONE]') {
logger.debug('Received [DONE] message');
continue;
}
try {
const parsed = JSON.parse(data);
logger.debug('Successfully parsed JSON:', { parsed });
const content = parsed.choices?.[0]?.delta?.content || parsed.choices?.[0]?.message?.content;
if (content) {
result += content;
logger.debug('Added content to result:', { content, result });
} else {
logger.debug('No content found in parsed data:', { parsed });
}
} catch (e) {
// If not valid JSON and not a control message, treat as raw content
if (data !== '[DONE]') {
logger.debug('Failed to parse JSON:', { data, error: e });
result += data;
logger.debug('Added raw data to result:', { data, result });
}
}
} else {
logger.debug('Skipping non-data line:', { line: trimmed });
}
}
}
} finally {
reader.releaseLock();
}
if (!result) {
logger.debug('No content received from streaming response');
} else {
logger.debug('Final result:', { result });
}
return result;
}
async makeRequest(messages, config) {
const requestBody = {
model: config.model,
messages: messages.map(msg => ({
role: msg.role,
content: msg.content
})),
temperature: config.temperature,
max_tokens: config.maxTokens,
stream: true
};
// Log the request body being sent
logger.debug(`Sending request to local LLM: ${this.localLLMUrl}/v1/chat/completions`);
logger.debug('Request Body:', JSON.stringify(requestBody, null, 2));
return await this.withExponentialBackoff(async () => {
const response = await fetch(`${this.localLLMUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(requestBody)
});
return response;
});
}
getMergedConfig(config) {
return {
...this.config,
...config,
model: config?.model || this.config.model,
apiDryRun: config?.apiDryRun || this.config.apiDryRun || false,
noStreaming: config?.noStreaming || this.config.noStreaming || false
};
}
}
export default function getLocalLLMClient(options) {
return new LocalLLMAdapter(options);
}