UNPKG

termcode

Version:

Superior terminal AI coding agent with enterprise-grade security, intelligent error recovery, performance monitoring, and plugin system - Advanced Claude Code alternative

427 lines (405 loc) 15.1 kB
import { promises as fs } from "node:fs"; import path from "node:path"; import os from "node:os"; import { runShell } from "../tools/shell.js"; import { log } from "../util/logging.js"; const fineTuneDir = path.join(os.homedir(), ".termcode", "fine-tuning"); const jobsFile = path.join(fineTuneDir, "jobs.json"); // Ensure fine-tuning directory exists async function ensureFineTuneDir() { await fs.mkdir(fineTuneDir, { recursive: true }); } // Load existing fine-tune jobs export async function loadFineTuneJobs() { try { await ensureFineTuneDir(); const content = await fs.readFile(jobsFile, "utf8"); return JSON.parse(content); } catch (error) { return []; } } // Save fine-tune jobs export async function saveFineTuneJobs(jobs) { try { await ensureFineTuneDir(); await fs.writeFile(jobsFile, JSON.stringify(jobs, null, 2), "utf8"); } catch (error) { log.error("Failed to save fine-tune jobs:", error); } } // Generate training data from session history export async function generateTrainingData(repoPath, outputPath) { const { loadSession } = await import("../state/session.js"); const session = await loadSession(repoPath); if (!session || session.recentTasks.length === 0) { throw new Error("No session data found for training"); } const trainingData = []; const sessionLogPath = path.join(repoPath, ".termcode", "session.log"); try { // Try to read detailed session logs const logContent = await fs.readFile(sessionLogPath, "utf8"); const logEntries = logContent.split("\n").filter(line => line.trim()); for (const line of logEntries) { try { const entry = JSON.parse(line); if (entry.task && entry.diff) { trainingData.push({ input: `Task: ${entry.task}\n\nContext: Repository code analysis needed for implementation.`, output: entry.diff, system: "You are an expert software developer. Generate precise code changes in unified diff format based on the given task and repository context.", metadata: { timestamp: entry.timestamp, branchName: entry.branchName, filesChanged: entry.applied?.length || 0 } }); } } catch { // Skip invalid log entries continue; } } } catch (error) { // Fallback to session tasks only log.warn("No detailed logs found, using basic task data"); for (const task of session.recentTasks) { trainingData.push({ input: `Task: ${task}`, output: "# Implementation would go here based on repository context", system: "You are a software developer assistant. Implement the requested changes.", metadata: { source: "session_tasks" } }); } } if (trainingData.length === 0) { throw new Error("No suitable training data found"); } // Save training data const dataPath = outputPath || path.join(fineTuneDir, `training-data-${Date.now()}.jsonl`); const jsonlContent = trainingData.map(entry => JSON.stringify({ messages: [ { role: "system", content: entry.system || "You are a helpful coding assistant." }, { role: "user", content: entry.input }, { role: "assistant", content: entry.output } ], metadata: entry.metadata })).join("\n"); await fs.writeFile(dataPath, jsonlContent, "utf8"); log.success(`Generated training data: ${trainingData.length} examples → ${dataPath}`); return { dataPath, entries: trainingData.length }; } // Start fine-tuning job with Ollama async function startOllamaFineTuning(config) { log.info("Starting Ollama fine-tuning..."); // Check if Ollama is running try { const result = await runShell(['ollama', 'list'], process.cwd()); if (!result.ok) { throw new Error("Ollama is not running. Start with: ollama serve"); } } catch (error) { throw new Error("Ollama is not available. Install from https://ollama.ai"); } // Create Modelfile for fine-tuning const modelfilePath = path.join(config.outputDir, "Modelfile"); const modelfileContent = `FROM ${config.baseModel} # Fine-tuned model for TermCode TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> {{ .Response }}<|eot_id|>""" PARAMETER temperature 0.2 PARAMETER top_p 0.9 PARAMETER stop <|eot_id|> SYSTEM """You are an expert software developer assistant trained on TermCode usage patterns. You generate precise, production-ready code changes based on natural language tasks and repository context.""" `; await fs.writeFile(modelfilePath, modelfileContent, "utf8"); // Create the model const createResult = await runShell([ 'ollama', 'create', config.name, '-f', modelfilePath ], config.outputDir); if (!createResult.ok) { throw new Error(`Failed to create Ollama model: ${'error' in createResult ? createResult.error : 'Unknown error'}`); } log.success(`Created Ollama model: ${config.name}`); return config.name; } // Start fine-tuning job with HuggingFace async function startHuggingFaceFineTuning(config) { log.info("Starting HuggingFace fine-tuning..."); // Check if required tools are available try { const result = await runShell(['python', '--version'], process.cwd()); if (!result.ok) { throw new Error("Python is required for HuggingFace fine-tuning"); } } catch (error) { throw new Error("Python is not available"); } // Create training script const trainingScript = ` import json import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling ) from datasets import Dataset import logging logging.basicConfig(level=logging.INFO) def load_dataset(file_path): with open(file_path, 'r') as f: data = [json.loads(line) for line in f] inputs = [] for item in data: messages = item['messages'] conversation = "" for msg in messages: conversation += f"<|{msg['role']}|>\\n{msg['content']}\\n" inputs.append(conversation) return Dataset.from_dict({"text": inputs}) def main(): # Load model and tokenizer model_name = "${config.baseModel}" tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right") tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) # Load and prepare dataset dataset = load_dataset("${config.datasetPath}") def tokenize_function(examples): return tokenizer( examples["text"], truncation=True, max_length=${config.maxSeqLength}, padding=True ) tokenized_dataset = dataset.map(tokenize_function, batched=True) # Training arguments training_args = TrainingArguments( output_dir="${config.outputDir}", num_train_epochs=${config.epochs}, per_device_train_batch_size=${config.batchSize}, learning_rate=${config.learningRate}, save_steps=500, save_total_limit=2, prediction_loss_only=True, logging_steps=100, logging_dir="${config.outputDir}/logs", ) # Data collator data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False ) # Trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, data_collator=data_collator, ) # Start training print("Starting fine-tuning...") trainer.train() # Save model model.save_pretrained("${config.outputDir}/final_model") tokenizer.save_pretrained("${config.outputDir}/final_model") print(f"Model saved to ${config.outputDir}/final_model") if __name__ == "__main__": main() `; const scriptPath = path.join(config.outputDir, "train.py"); await fs.writeFile(scriptPath, trainingScript, "utf8"); // Install dependencies log.step("Installing dependencies", "pip install transformers datasets torch"); const installResult = await runShell([ 'pip', 'install', 'transformers', 'datasets', 'torch', 'accelerate' ], config.outputDir); if (!installResult.ok) { log.warn("Failed to install dependencies, continuing anyway"); } // Start training (this will run in background) log.step("Starting training", "This may take a while..."); const trainResult = await runShell([ 'python', scriptPath ], config.outputDir); if (!trainResult.ok) { throw new Error(`Training failed: ${'error' in trainResult ? trainResult.error : 'Unknown error'}`); } return path.join(config.outputDir, "final_model"); } // Start a fine-tuning job export async function startFineTuning(config) { await ensureFineTuneDir(); // Create job const job = { id: `ft-${Date.now()}`, name: config.name, status: "preparing", config, startTime: new Date().toISOString(), progress: 0, logs: [] }; // Create output directory await fs.mkdir(config.outputDir, { recursive: true }); // Save job const jobs = await loadFineTuneJobs(); jobs.push(job); await saveFineTuneJobs(jobs); // Start training in background try { job.status = "training"; job.logs.push(`Starting ${config.provider} fine-tuning...`); let modelPath; switch (config.provider) { case "ollama": modelPath = await startOllamaFineTuning(config); break; case "huggingface": modelPath = await startHuggingFaceFineTuning(config); break; default: throw new Error(`Unsupported provider: ${config.provider}`); } job.status = "completed"; job.endTime = new Date().toISOString(); job.progress = 100; job.logs.push(`Training completed. Model saved to: ${modelPath}`); log.success(`Fine-tuning completed: ${config.name}`); } catch (error) { job.status = "failed"; job.endTime = new Date().toISOString(); job.logs.push(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`); log.error("Fine-tuning failed:", error); } // Update job await saveFineTuneJobs(jobs); return job; } // List available fine-tuned models export async function listFineTunedModels() { const jobs = await loadFineTuneJobs(); const models = []; for (const job of jobs.filter(j => j.status === "completed")) { models.push({ name: job.name, provider: job.config.provider, baseModel: job.config.baseModel, status: job.status, createdAt: job.startTime, }); } // Also check Ollama for locally created models try { const result = await runShell(['ollama', 'list'], process.cwd()); if (result.ok) { const lines = result.data.stdout.split('\n').slice(1); // Skip header for (const line of lines) { if (line.trim()) { const parts = line.trim().split(/\s+/); if (parts.length >= 2 && parts[0].startsWith('termcode-')) { models.push({ name: parts[0], provider: 'ollama', baseModel: 'unknown', status: 'completed', createdAt: 'unknown', size: parts[1] || 'unknown' }); } } } } } catch (error) { // Ollama not available } return models; } // Delete a fine-tuned model export async function deleteFineTunedModel(name, provider) { try { if (provider === 'ollama') { const result = await runShell(['ollama', 'rm', name], process.cwd()); return result.ok; } else if (provider === 'huggingface') { // Remove from jobs list and delete directory const jobs = await loadFineTuneJobs(); const job = jobs.find(j => j.name === name); if (job) { try { await fs.rm(job.config.outputDir, { recursive: true, force: true }); } catch { } // Remove from jobs const updatedJobs = jobs.filter(j => j.name !== name); await saveFineTuneJobs(updatedJobs); return true; } } return false; } catch (error) { log.error(`Failed to delete model ${name}:`, error); return false; } } // Get training recommendations based on usage patterns export async function getTrainingRecommendations(repoPath) { const { loadSession } = await import("../state/session.js"); const session = await loadSession(repoPath); if (!session) { return { recommended: false, reasoning: "No session data available for analysis", estimatedData: 0, requirements: [] }; } const taskCount = session.recentTasks.length; if (taskCount < 10) { return { recommended: false, reasoning: `Only ${taskCount} tasks recorded. Fine-tuning requires at least 10-20 examples for meaningful improvement.`, estimatedData: taskCount, requirements: [ "Use TermCode for more tasks to build training data", "Minimum 10-20 diverse coding tasks recommended" ] }; } return { recommended: true, reasoning: `${taskCount} tasks recorded. This provides good training data for a custom model tailored to your coding patterns.`, estimatedData: taskCount, requirements: [ "Ollama installed for local fine-tuning", "Or Python + transformers for HuggingFace training", "Sufficient disk space for model storage (2-8GB)", "Time for training process (30min - 2hours)" ] }; }