devibe
Version:
Intelligent repository cleanup with auto mode, AI learning, markdown consolidation, auto-consolidate workflow, context-aware classification, and cost optimization
311 lines • 11.6 kB
JavaScript
import * as path from 'path';
import * as fs from 'fs/promises';
import { AIClassifierFactory } from './ai-classifier.js';
export class FileClassifier {
async classify(filePath, content) {
// Try AI classification first if available
const aiProvider = await AIClassifierFactory.getPreferredProvider();
if (aiProvider && content) {
try {
const ai = await AIClassifierFactory.create();
if (ai) {
const result = await ai.classify(filePath, content);
// AI succeeded, return result
return result;
}
}
catch (error) {
// AI failed, fall through to heuristics
}
}
// Load content if not provided and AI is available
if (!content && await AIClassifierFactory.isAvailable()) {
try {
const stats = await fs.stat(filePath);
if (stats.size < 100000) { // Only read files < 100KB for AI
content = await fs.readFile(filePath, 'utf-8');
const aiProvider = await AIClassifierFactory.getPreferredProvider();
if (aiProvider) {
try {
const ai = await AIClassifierFactory.create();
if (ai) {
return await ai.classify(filePath, content);
}
}
catch (error) {
// Fall through to heuristics
}
}
}
}
catch {
// File not readable, continue with heuristics
}
}
// Fallback to heuristic classification
const ext = path.extname(filePath);
const basename = path.basename(filePath);
const dirname = path.dirname(filePath);
// Check content first (highest priority)
if (content) {
const contentResult = this.classifyByContent(content);
if (contentResult) {
return {
path: filePath,
category: contentResult.category,
confidence: contentResult.confidence,
reasoning: contentResult.reasoning,
};
}
}
// Check by filename patterns
if (this.isTestFile(basename, dirname)) {
return {
path: filePath,
category: 'test',
confidence: 0.95,
reasoning: 'File name/path indicates test file',
};
}
// Check by extension
const extResult = this.classifyByExtension(ext, basename);
return {
path: filePath,
category: extResult.category,
confidence: extResult.confidence,
reasoning: extResult.reasoning,
};
}
async classifyBatch(files) {
// Check if AI is available for batch processing
const aiProvider = await AIClassifierFactory.getPreferredProvider();
if (!aiProvider || files.length === 0) {
// Fall back to individual classification
return Promise.all(files.map((file) => this.classify(file)));
}
try {
// Use intelligent batch processing with AI
const ai = await AIClassifierFactory.create();
if (ai && ai.classifyBatch) {
// Note: This simplified version doesn't have repository context
// For full batch processing, use IntelligentBatchProcessor instead
const batchFiles = await Promise.all(files.map(async (filePath) => {
try {
const content = await fs.readFile(filePath, 'utf-8');
return {
fileName: path.basename(filePath),
filePath,
contentPreview: content.substring(0, 500),
};
}
catch {
return null;
}
}));
const validFiles = batchFiles.filter((f) => f !== null);
if (validFiles.length === 0) {
return Promise.all(files.map((file) => this.classify(file)));
}
const results = await ai.classifyBatch(validFiles, []);
// Convert batch results to FileClassification format
return results.map((result) => ({
path: result.fileName, // Will be the file path from batchFiles
category: result.category,
confidence: result.confidence,
reasoning: result.reasoning,
}));
}
}
catch (error) {
// AI batch failed, fall through
}
// Fallback to sequential processing
return Promise.all(files.map((file) => this.classify(file)));
}
async suggestLocation(file, repositories, content) {
// Find which repository this file currently belongs to
const currentRepo = repositories.find((r) => file.path.startsWith(r.path));
if (!currentRepo)
return null;
// Determine target repository (might be different in monorepo)
let targetRepo = currentRepo;
// If monorepo and AI available, try to determine the right sub-repo
if (repositories.length > 1 && await AIClassifierFactory.isAvailable()) {
const suggestedRepo = await this.suggestTargetRepository(file, repositories, content);
if (suggestedRepo) {
targetRepo = suggestedRepo;
}
}
// Suggest location within target repository
switch (file.category) {
case 'documentation':
return path.join(targetRepo.path, 'documents', path.basename(file.path));
case 'script': {
// Check if this is a test script
const basename = path.basename(file.path).toLowerCase();
if (basename.startsWith('test-') || basename.includes('-test') ||
basename.startsWith('check-') || basename.startsWith('debug-')) {
// Test scripts go to tests directory
return path.join(targetRepo.path, 'tests', path.basename(file.path));
}
// Regular scripts go to scripts directory
return path.join(targetRepo.path, 'scripts', path.basename(file.path));
}
case 'test':
return path.join(targetRepo.path, 'tests', path.basename(file.path));
case 'source':
return path.join(targetRepo.path, 'src', path.basename(file.path));
default:
return null;
}
}
async suggestTargetRepository(file, repositories, content) {
// Load file content if not provided
if (!content) {
try {
const stats = await fs.stat(file.path);
if (stats.size < 100000) {
content = await fs.readFile(file.path, 'utf-8');
}
else {
return null; // File too large
}
}
catch {
return null;
}
}
// Use AI to analyze which repository this file belongs to
const aiProvider = await AIClassifierFactory.getPreferredProvider();
if (!aiProvider)
return null;
try {
const ai = await AIClassifierFactory.create();
if (!ai)
return null;
// Build context about available repositories
const repoNames = repositories.map(r => ({
name: path.basename(r.path),
path: r.path,
isRoot: r.isRoot
}));
const result = await ai.suggestRepository(file.path, content, repoNames);
// Find matching repository
const targetRepo = repositories.find(r => path.basename(r.path) === result.repositoryName ||
r.path === result.repositoryName);
return targetRepo || null;
}
catch {
return null; // AI analysis failed, use current repo
}
}
classifyByContent(content) {
// Check for shebang
if (content.startsWith('#!')) {
return {
category: 'script',
confidence: 0.9,
reasoning: 'Contains shebang line',
};
}
// Check for common test patterns
if (content.includes('describe(') ||
content.includes('test(') ||
content.includes('it(')) {
return {
category: 'test',
confidence: 0.85,
reasoning: 'Contains test framework functions',
};
}
return null;
}
isTestFile(basename, dirname) {
// Check filename
if (basename.includes('.test.') ||
basename.includes('.spec.') ||
basename.endsWith('_test.py') ||
basename.endsWith('_test.go')) {
return true;
}
// Check directory
if (dirname.includes('/test/') ||
dirname.includes('/tests/') ||
dirname.includes('/__tests__/')) {
return true;
}
return false;
}
classifyByExtension(ext, basename) {
// Documentation
if (['.md', '.txt', '.rst', '.adoc'].includes(ext)) {
return {
category: 'documentation',
confidence: 0.9,
reasoning: 'Documentation file extension',
};
}
// Configuration
if (['.json', '.yaml', '.yml', '.toml', '.ini', '.env'].includes(ext) ||
basename.startsWith('.')) {
return {
category: 'config',
confidence: 0.85,
reasoning: 'Configuration file',
};
}
// Scripts
if (['.sh', '.bash', '.zsh', '.py', '.rb', '.pl'].includes(ext)) {
return {
category: 'script',
confidence: 0.8,
reasoning: 'Script file extension',
};
}
// Source code
if ([
'.ts',
'.tsx',
'.js',
'.jsx',
'.go',
'.java',
'.c',
'.cpp',
'.rs',
'.swift',
].includes(ext)) {
return {
category: 'source',
confidence: 0.9,
reasoning: 'Source code file extension',
};
}
// Assets
if ([
'.png',
'.jpg',
'.jpeg',
'.gif',
'.svg',
'.ico',
'.woff',
'.woff2',
'.ttf',
'.eot',
].includes(ext)) {
return {
category: 'asset',
confidence: 0.95,
reasoning: 'Asset file (image/font)',
};
}
// Unknown
return {
category: 'unknown',
confidence: 0.3,
reasoning: 'Unknown file type',
};
}
}
//# sourceMappingURL=file-classifier.js.map