termcode
Version:
Superior terminal AI coding agent with enterprise-grade security, intelligent error recovery, performance monitoring, and plugin system - Advanced Claude Code alternative
427 lines (405 loc) • 15.1 kB
JavaScript
import { promises as fs } from "node:fs";
import path from "node:path";
import os from "node:os";
import { runShell } from "../tools/shell.js";
import { log } from "../util/logging.js";
const fineTuneDir = path.join(os.homedir(), ".termcode", "fine-tuning");
const jobsFile = path.join(fineTuneDir, "jobs.json");
// Ensure fine-tuning directory exists
async function ensureFineTuneDir() {
await fs.mkdir(fineTuneDir, { recursive: true });
}
// Load existing fine-tune jobs
export async function loadFineTuneJobs() {
try {
await ensureFineTuneDir();
const content = await fs.readFile(jobsFile, "utf8");
return JSON.parse(content);
}
catch (error) {
return [];
}
}
// Save fine-tune jobs
export async function saveFineTuneJobs(jobs) {
try {
await ensureFineTuneDir();
await fs.writeFile(jobsFile, JSON.stringify(jobs, null, 2), "utf8");
}
catch (error) {
log.error("Failed to save fine-tune jobs:", error);
}
}
// Generate training data from session history
export async function generateTrainingData(repoPath, outputPath) {
const { loadSession } = await import("../state/session.js");
const session = await loadSession(repoPath);
if (!session || session.recentTasks.length === 0) {
throw new Error("No session data found for training");
}
const trainingData = [];
const sessionLogPath = path.join(repoPath, ".termcode", "session.log");
try {
// Try to read detailed session logs
const logContent = await fs.readFile(sessionLogPath, "utf8");
const logEntries = logContent.split("\n").filter(line => line.trim());
for (const line of logEntries) {
try {
const entry = JSON.parse(line);
if (entry.task && entry.diff) {
trainingData.push({
input: `Task: ${entry.task}\n\nContext: Repository code analysis needed for implementation.`,
output: entry.diff,
system: "You are an expert software developer. Generate precise code changes in unified diff format based on the given task and repository context.",
metadata: {
timestamp: entry.timestamp,
branchName: entry.branchName,
filesChanged: entry.applied?.length || 0
}
});
}
}
catch {
// Skip invalid log entries
continue;
}
}
}
catch (error) {
// Fallback to session tasks only
log.warn("No detailed logs found, using basic task data");
for (const task of session.recentTasks) {
trainingData.push({
input: `Task: ${task}`,
output: "# Implementation would go here based on repository context",
system: "You are a software developer assistant. Implement the requested changes.",
metadata: {
source: "session_tasks"
}
});
}
}
if (trainingData.length === 0) {
throw new Error("No suitable training data found");
}
// Save training data
const dataPath = outputPath || path.join(fineTuneDir, `training-data-${Date.now()}.jsonl`);
const jsonlContent = trainingData.map(entry => JSON.stringify({
messages: [
{ role: "system", content: entry.system || "You are a helpful coding assistant." },
{ role: "user", content: entry.input },
{ role: "assistant", content: entry.output }
],
metadata: entry.metadata
})).join("\n");
await fs.writeFile(dataPath, jsonlContent, "utf8");
log.success(`Generated training data: ${trainingData.length} examples → ${dataPath}`);
return {
dataPath,
entries: trainingData.length
};
}
// Start fine-tuning job with Ollama
async function startOllamaFineTuning(config) {
log.info("Starting Ollama fine-tuning...");
// Check if Ollama is running
try {
const result = await runShell(['ollama', 'list'], process.cwd());
if (!result.ok) {
throw new Error("Ollama is not running. Start with: ollama serve");
}
}
catch (error) {
throw new Error("Ollama is not available. Install from https://ollama.ai");
}
// Create Modelfile for fine-tuning
const modelfilePath = path.join(config.outputDir, "Modelfile");
const modelfileContent = `FROM ${config.baseModel}
# Fine-tuned model for TermCode
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
{{ .Response }}<|eot_id|>"""
PARAMETER temperature 0.2
PARAMETER top_p 0.9
PARAMETER stop <|eot_id|>
SYSTEM """You are an expert software developer assistant trained on TermCode usage patterns. You generate precise, production-ready code changes based on natural language tasks and repository context."""
`;
await fs.writeFile(modelfilePath, modelfileContent, "utf8");
// Create the model
const createResult = await runShell([
'ollama', 'create', config.name, '-f', modelfilePath
], config.outputDir);
if (!createResult.ok) {
throw new Error(`Failed to create Ollama model: ${'error' in createResult ? createResult.error : 'Unknown error'}`);
}
log.success(`Created Ollama model: ${config.name}`);
return config.name;
}
// Start fine-tuning job with HuggingFace
async function startHuggingFaceFineTuning(config) {
log.info("Starting HuggingFace fine-tuning...");
// Check if required tools are available
try {
const result = await runShell(['python', '--version'], process.cwd());
if (!result.ok) {
throw new Error("Python is required for HuggingFace fine-tuning");
}
}
catch (error) {
throw new Error("Python is not available");
}
// Create training script
const trainingScript = `
import json
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling
)
from datasets import Dataset
import logging
logging.basicConfig(level=logging.INFO)
def load_dataset(file_path):
with open(file_path, 'r') as f:
data = [json.loads(line) for line in f]
inputs = []
for item in data:
messages = item['messages']
conversation = ""
for msg in messages:
conversation += f"<|{msg['role']}|>\\n{msg['content']}\\n"
inputs.append(conversation)
return Dataset.from_dict({"text": inputs})
def main():
# Load model and tokenizer
model_name = "${config.baseModel}"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
# Load and prepare dataset
dataset = load_dataset("${config.datasetPath}")
def tokenize_function(examples):
return tokenizer(
examples["text"],
truncation=True,
max_length=${config.maxSeqLength},
padding=True
)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Training arguments
training_args = TrainingArguments(
output_dir="${config.outputDir}",
num_train_epochs=${config.epochs},
per_device_train_batch_size=${config.batchSize},
learning_rate=${config.learningRate},
save_steps=500,
save_total_limit=2,
prediction_loss_only=True,
logging_steps=100,
logging_dir="${config.outputDir}/logs",
)
# Data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=data_collator,
)
# Start training
print("Starting fine-tuning...")
trainer.train()
# Save model
model.save_pretrained("${config.outputDir}/final_model")
tokenizer.save_pretrained("${config.outputDir}/final_model")
print(f"Model saved to ${config.outputDir}/final_model")
if __name__ == "__main__":
main()
`;
const scriptPath = path.join(config.outputDir, "train.py");
await fs.writeFile(scriptPath, trainingScript, "utf8");
// Install dependencies
log.step("Installing dependencies", "pip install transformers datasets torch");
const installResult = await runShell([
'pip', 'install', 'transformers', 'datasets', 'torch', 'accelerate'
], config.outputDir);
if (!installResult.ok) {
log.warn("Failed to install dependencies, continuing anyway");
}
// Start training (this will run in background)
log.step("Starting training", "This may take a while...");
const trainResult = await runShell([
'python', scriptPath
], config.outputDir);
if (!trainResult.ok) {
throw new Error(`Training failed: ${'error' in trainResult ? trainResult.error : 'Unknown error'}`);
}
return path.join(config.outputDir, "final_model");
}
// Start a fine-tuning job
export async function startFineTuning(config) {
await ensureFineTuneDir();
// Create job
const job = {
id: `ft-${Date.now()}`,
name: config.name,
status: "preparing",
config,
startTime: new Date().toISOString(),
progress: 0,
logs: []
};
// Create output directory
await fs.mkdir(config.outputDir, { recursive: true });
// Save job
const jobs = await loadFineTuneJobs();
jobs.push(job);
await saveFineTuneJobs(jobs);
// Start training in background
try {
job.status = "training";
job.logs.push(`Starting ${config.provider} fine-tuning...`);
let modelPath;
switch (config.provider) {
case "ollama":
modelPath = await startOllamaFineTuning(config);
break;
case "huggingface":
modelPath = await startHuggingFaceFineTuning(config);
break;
default:
throw new Error(`Unsupported provider: ${config.provider}`);
}
job.status = "completed";
job.endTime = new Date().toISOString();
job.progress = 100;
job.logs.push(`Training completed. Model saved to: ${modelPath}`);
log.success(`Fine-tuning completed: ${config.name}`);
}
catch (error) {
job.status = "failed";
job.endTime = new Date().toISOString();
job.logs.push(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
log.error("Fine-tuning failed:", error);
}
// Update job
await saveFineTuneJobs(jobs);
return job;
}
// List available fine-tuned models
export async function listFineTunedModels() {
const jobs = await loadFineTuneJobs();
const models = [];
for (const job of jobs.filter(j => j.status === "completed")) {
models.push({
name: job.name,
provider: job.config.provider,
baseModel: job.config.baseModel,
status: job.status,
createdAt: job.startTime,
});
}
// Also check Ollama for locally created models
try {
const result = await runShell(['ollama', 'list'], process.cwd());
if (result.ok) {
const lines = result.data.stdout.split('\n').slice(1); // Skip header
for (const line of lines) {
if (line.trim()) {
const parts = line.trim().split(/\s+/);
if (parts.length >= 2 && parts[0].startsWith('termcode-')) {
models.push({
name: parts[0],
provider: 'ollama',
baseModel: 'unknown',
status: 'completed',
createdAt: 'unknown',
size: parts[1] || 'unknown'
});
}
}
}
}
}
catch (error) {
// Ollama not available
}
return models;
}
// Delete a fine-tuned model
export async function deleteFineTunedModel(name, provider) {
try {
if (provider === 'ollama') {
const result = await runShell(['ollama', 'rm', name], process.cwd());
return result.ok;
}
else if (provider === 'huggingface') {
// Remove from jobs list and delete directory
const jobs = await loadFineTuneJobs();
const job = jobs.find(j => j.name === name);
if (job) {
try {
await fs.rm(job.config.outputDir, { recursive: true, force: true });
}
catch { }
// Remove from jobs
const updatedJobs = jobs.filter(j => j.name !== name);
await saveFineTuneJobs(updatedJobs);
return true;
}
}
return false;
}
catch (error) {
log.error(`Failed to delete model ${name}:`, error);
return false;
}
}
// Get training recommendations based on usage patterns
export async function getTrainingRecommendations(repoPath) {
const { loadSession } = await import("../state/session.js");
const session = await loadSession(repoPath);
if (!session) {
return {
recommended: false,
reasoning: "No session data available for analysis",
estimatedData: 0,
requirements: []
};
}
const taskCount = session.recentTasks.length;
if (taskCount < 10) {
return {
recommended: false,
reasoning: `Only ${taskCount} tasks recorded. Fine-tuning requires at least 10-20 examples for meaningful improvement.`,
estimatedData: taskCount,
requirements: [
"Use TermCode for more tasks to build training data",
"Minimum 10-20 diverse coding tasks recommended"
]
};
}
return {
recommended: true,
reasoning: `${taskCount} tasks recorded. This provides good training data for a custom model tailored to your coding patterns.`,
estimatedData: taskCount,
requirements: [
"Ollama installed for local fine-tuning",
"Or Python + transformers for HuggingFace training",
"Sufficient disk space for model storage (2-8GB)",
"Time for training process (30min - 2hours)"
]
};
}