devibe
Version:
Intelligent repository cleanup with auto mode, AI learning, markdown consolidation, auto-consolidate workflow, context-aware classification, and cost optimization
500 lines (481 loc) • 18.6 kB
JavaScript
import * as path from 'path';
export class AnthropicClassifier {
apiKey;
name = 'anthropic';
apiUrl = 'https://api.anthropic.com/v1/messages';
modelId;
constructor(apiKey, modelId) {
this.apiKey = apiKey;
// Default to Haiku (cheapest) if not specified
this.modelId = modelId || process.env.AI_MODEL_ID || 'claude-3-haiku-20240307';
}
async classifyBatch(files, repositories) {
const repoList = repositories
.map(r => `- ${r.name} (${r.isRoot ? 'root' : 'sub-repo'})`)
.join('\n');
const filesList = files.map((f, idx) => {
return `FILE ${idx + 1}: ${f.fileName}
Path: ${f.filePath}
Content preview (first 500 chars):
${f.contentPreview}
---`;
}).join('\n\n');
const prompt = `Analyze these ${files.length} files from a monorepo and classify each one.
Available repositories:
${repoList}
FILES TO ANALYZE:
${filesList}
For each file, determine:
1. Category: documentation, script, test, source, config, or asset
2. Which repository it belongs to (based on content, not just filename)
3. Confidence level (0.0-1.0)
4. Brief reasoning
IMPORTANT:
- Look for technology mentions (iOS/Swift → iOS repo, API/Express → API repo, React/web → web repo)
- Test scripts (test-*, check-*, debug-*) are usually test utilities
- Documentation about a specific tech should go to that repo's documents/ folder
- If it's general project-wide documentation, it stays at root
Respond in JSON array format with one object per file:
[
{
"fileName": "exact filename",
"category": "documentation|script|test|source|config|asset",
"confidence": 0.0-1.0,
"reasoning": "brief explanation",
"suggestedRepo": "repo name from list or root"
},
...
]`;
try {
const response = await fetch(this.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': this.apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model: this.modelId,
max_tokens: 4000, // Larger for batch response
messages: [
{
role: 'user',
content: prompt,
},
],
}),
});
if (!response.ok) {
throw new Error(`Anthropic API error: ${response.status}`);
}
const data = await response.json();
const jsonMatch = data.content[0].text.match(/\[[\s\S]*\]/);
if (!jsonMatch) {
throw new Error('No JSON array in response');
}
const parsed = JSON.parse(jsonMatch[0]);
return parsed;
}
catch (error) {
throw new Error(`Batch AI classification failed: ${error.message}`);
}
}
async classify(filePath, content) {
const prompt = this.buildPrompt(filePath, content);
try {
const response = await fetch(this.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': this.apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model: this.modelId,
max_tokens: 500,
messages: [
{
role: 'user',
content: prompt,
},
],
}),
});
if (!response.ok) {
throw new Error(`Anthropic API error: ${response.status}`);
}
const data = await response.json();
const result = this.parseResponse(data.content[0].text, filePath);
return result;
}
catch (error) {
throw new Error(`AI classification failed: ${error.message}`);
}
}
buildPrompt(filePath, content) {
const truncatedContent = content.substring(0, 2000); // Limit content size
return `Classify this file into ONE category: source, config, documentation, script, test, or asset.
File: ${path.basename(filePath)}
Content (first 2000 chars):
${truncatedContent}
Respond in JSON format:
{
"category": "source|config|documentation|script|test|asset",
"confidence": 0.0-1.0,
"reasoning": "brief explanation"
}`;
}
async suggestRepository(filePath, content, repositories) {
const truncatedContent = content.substring(0, 3000);
const fileName = path.basename(filePath);
const ext = path.extname(filePath);
const repoList = repositories
.map(r => `- ${r.name} (${r.isRoot ? 'root' : 'sub-repo'})`)
.join('\n');
const prompt = `Analyze this file and determine which repository it belongs to in this monorepo.
File: ${fileName}
Content (first 3000 chars):
${truncatedContent}
Available repositories:
${repoList}
IMPORTANT ANALYSIS GUIDELINES:
${ext === '.md' ? `
- This is a MARKDOWN file. Look for:
* Mentions of specific technologies (iOS, Swift, Android, API, React, etc.)
* References to specific sub-projects or components
* If it discusses iOS/Swift implementation → belongs in iOS repo
* If it discusses API/backend → belongs in API repo
* If it discusses web/frontend → belongs in web/UI repo
* If it's a general project summary or documentation → keep at root
` : ''}
${ext === '.js' || ext === '.ts' ? `
- This is a SCRIPT file. Look for:
* Test scripts (test-*, check-*, debug-*) → should go to tests/ directory
* Import statements and dependencies
* Which codebase it's testing or interacting with
` : ''}
Based on the file's content, imports, purpose, and context clues, determine which repository this file should belong to.
Respond in JSON format:
{
"repositoryName": "exact name from list above",
"confidence": 0.0-1.0,
"reasoning": "brief explanation of why this file belongs to this repo"
}`;
try {
const response = await fetch(this.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': this.apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model: this.modelId,
max_tokens: 500,
messages: [
{
role: 'user',
content: prompt,
},
],
}),
});
if (!response.ok) {
throw new Error(`Anthropic API error: ${response.status}`);
}
const data = await response.json();
const jsonMatch = data.content[0].text.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON in response');
}
const parsed = JSON.parse(jsonMatch[0]);
return {
repositoryName: parsed.repositoryName,
confidence: parsed.confidence,
reasoning: parsed.reasoning,
};
}
catch (error) {
// Fallback: use root repository
const rootRepo = repositories.find(r => r.isRoot);
return {
repositoryName: rootRepo?.name || repositories[0]?.name || '',
confidence: 0.3,
reasoning: 'AI analysis failed, defaulting to root',
};
}
}
parseResponse(text, filePath) {
try {
// Extract JSON from response
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in response');
}
const parsed = JSON.parse(jsonMatch[0]);
return {
path: filePath,
category: parsed.category,
confidence: parsed.confidence,
reasoning: `AI: ${parsed.reasoning}`,
};
}
catch (error) {
// Fallback to pattern matching
return {
path: filePath,
category: 'unknown',
confidence: 0.3,
reasoning: 'AI parsing failed',
};
}
}
}
export class OpenAIClassifier {
apiKey;
name = 'openai';
apiUrl = 'https://api.openai.com/v1/chat/completions';
modelId;
constructor(apiKey, modelId) {
this.apiKey = apiKey;
// Default to GPT-4o Mini (cheapest) if not specified
this.modelId = modelId || process.env.AI_MODEL_ID || 'gpt-4o-mini';
}
async classify(filePath, content) {
const prompt = this.buildPrompt(filePath, content);
try {
const response = await fetch(this.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: this.modelId,
messages: [
{
role: 'system',
content: 'You are a file classification expert. Respond only with valid JSON.',
},
{
role: 'user',
content: prompt,
},
],
temperature: 0.1,
max_tokens: 300,
}),
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status}`);
}
const data = await response.json();
const result = this.parseResponse(data.choices[0].message.content, filePath);
return result;
}
catch (error) {
throw new Error(`AI classification failed: ${error.message}`);
}
}
buildPrompt(filePath, content) {
const truncatedContent = content.substring(0, 2000);
return `Classify this file into ONE category: source, config, documentation, script, test, or asset.
File: ${path.basename(filePath)}
Content (first 2000 chars):
${truncatedContent}
Respond with JSON only:
{
"category": "source|config|documentation|script|test|asset",
"confidence": 0.0-1.0,
"reasoning": "brief explanation"
}`;
}
async suggestRepository(filePath, content, repositories) {
const truncatedContent = content.substring(0, 3000);
const fileName = path.basename(filePath);
const ext = path.extname(filePath);
const repoList = repositories
.map(r => `- ${r.name} (${r.isRoot ? 'root' : 'sub-repo'})`)
.join('\n');
const prompt = `Analyze this file and determine which repository it belongs to in this monorepo.
File: ${fileName}
Content (first 3000 chars):
${truncatedContent}
Available repositories:
${repoList}
IMPORTANT ANALYSIS GUIDELINES:
${ext === '.md' ? `
- This is a MARKDOWN file. Look for:
* Mentions of specific technologies (iOS, Swift, Android, API, React, etc.)
* References to specific sub-projects or components
* If it discusses iOS/Swift implementation → belongs in iOS repo
* If it discusses API/backend → belongs in API repo
* If it discusses web/frontend → belongs in web/UI repo
* If it's a general project summary or documentation → keep at root
` : ''}
${ext === '.js' || ext === '.ts' ? `
- This is a SCRIPT file. Look for:
* Test scripts (test-*, check-*, debug-*) → should go to tests/ directory
* Import statements and dependencies
* Which codebase it's testing or interacting with
` : ''}
Based on the file's content, imports, purpose, and context clues, determine which repository this file should belong to.
Respond with JSON only:
{
"repositoryName": "exact name from list above",
"confidence": 0.0-1.0,
"reasoning": "brief explanation of why this file belongs to this repo"
}`;
try {
const response = await fetch(this.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: this.modelId,
messages: [
{
role: 'system',
content: 'You are a monorepo analysis expert. Respond only with valid JSON.',
},
{
role: 'user',
content: prompt,
},
],
temperature: 0.1,
max_tokens: 300,
}),
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status}`);
}
const data = await response.json();
const jsonMatch = data.choices[0].message.content.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON in response');
}
const parsed = JSON.parse(jsonMatch[0]);
return {
repositoryName: parsed.repositoryName,
confidence: parsed.confidence,
reasoning: parsed.reasoning,
};
}
catch (error) {
// Fallback: use root repository
const rootRepo = repositories.find(r => r.isRoot);
return {
repositoryName: rootRepo?.name || repositories[0]?.name || '',
confidence: 0.3,
reasoning: 'AI analysis failed, defaulting to root',
};
}
}
parseResponse(text, filePath) {
try {
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in response');
}
const parsed = JSON.parse(jsonMatch[0]);
return {
path: filePath,
category: parsed.category,
confidence: parsed.confidence,
reasoning: `AI: ${parsed.reasoning}`,
};
}
catch (error) {
return {
path: filePath,
category: 'unknown',
confidence: 0.3,
reasoning: 'AI parsing failed',
};
}
}
}
export class AIClassifierFactory {
static async create(provider = 'anthropic', modelId) {
// Try to get key from stored keys first, then environment
const { getKeyManager } = await import('./ai-key-manager.js');
const keyManager = getKeyManager();
const anthropicKey = await keyManager.getKeyWithFallback('anthropic');
const openaiKey = await keyManager.getKeyWithFallback('openai');
const googleKey = await keyManager.getKeyWithFallback('google');
if (provider === 'anthropic' && anthropicKey) {
return new AnthropicClassifier(anthropicKey, modelId);
}
if (provider === 'openai' && openaiKey) {
return new OpenAIClassifier(openaiKey, modelId);
}
// TODO: Add Google Gemini classifier
// if (provider === 'google' && googleKey) {
// return new GoogleClassifier(googleKey, modelId);
// }
return null;
}
static async isAvailable() {
const { getKeyManager } = await import('./ai-key-manager.js');
const keyManager = getKeyManager();
const providers = await keyManager.getConfiguredProviders();
// Also check environment variables
const hasEnvKeys = !!(process.env.ANTHROPIC_API_KEY || process.env.OPENAI_API_KEY || process.env.GOOGLE_API_KEY);
return providers.length > 0 || hasEnvKeys;
}
static async getPreferredProvider() {
// Check environment variable for model selection
const selectedModel = process.env.AI_MODEL;
if (selectedModel) {
const { AVAILABLE_MODELS } = await import('./ai-model-config.js');
const model = AVAILABLE_MODELS[selectedModel];
if (model)
return model.provider;
}
// Check stored keys
const { getKeyManager } = await import('./ai-key-manager.js');
const keyManager = getKeyManager();
const providers = await keyManager.getConfiguredProviders();
// Prefer Google (cheapest), then Anthropic, then OpenAI
if (providers.includes('google'))
return 'google';
if (providers.includes('anthropic'))
return 'anthropic';
if (providers.includes('openai'))
return 'openai';
// Fallback to environment variables
if (process.env.ANTHROPIC_API_KEY)
return 'anthropic';
if (process.env.OPENAI_API_KEY)
return 'openai';
if (process.env.GOOGLE_API_KEY)
return 'google';
return null;
}
/**
* Get the specific model to use based on config/environment
*/
static async getModelConfig() {
const { getModelConfig } = await import('./ai-model-config.js');
const config = getModelConfig();
// Check if we have a key for this provider
const { getKeyManager } = await import('./ai-key-manager.js');
const keyManager = getKeyManager();
const key = await keyManager.getKeyWithFallback(config.provider);
if (!key) {
// Try to find another available provider
const provider = await this.getPreferredProvider();
if (!provider)
return null;
// Get default model for this provider
const { selectModel } = await import('./ai-model-config.js');
const fallbackModel = selectModel('best-value');
return { provider: fallbackModel.provider, modelId: fallbackModel.id };
}
return { provider: config.provider, modelId: config.id };
}
}
//# sourceMappingURL=ai-classifier.js.map