@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
420 lines โข 19.5 kB
JavaScript
/**
* Database Cleanup Tool - Comprehensive Invalid Model Entry Removal
*
* This tool provides a robust, production-ready cleanup system for removing
* invalid model entries from the OpenRouter models database and updating
* pipeline profiles that reference removed models.
*/
import { z } from "zod";
/**
* Schema for the cleanup models tool
*/
export const CleanupModelsSchema = z.object({
dryRun: z.boolean().optional().default(false).describe('If true, only shows what would be cleaned without making changes'),
force: z.boolean().optional().default(false).describe('If true, skips confirmation prompts'),
batchSize: z.number().optional().default(50).describe('Number of models to validate per batch (to avoid rate limiting)'),
timeout: z.number().optional().default(30000).describe('Timeout in milliseconds for each API validation request')
});
/**
* Get current valid models from OpenRouter API
*/
async function getValidModelsFromOpenRouter(apiKey, timeout = 30000) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch('https://openrouter.ai/api/v1/models', {
headers: {
'Authorization': `Bearer ${apiKey}`,
'HTTP-Referer': 'https://hivetechs.io',
'X-Title': 'hive-ai-cleanup'
},
signal: controller.signal
});
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error(`OpenRouter API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
const validModels = new Set();
if (data.data && Array.isArray(data.data)) {
data.data.forEach((model) => {
if (model.id) {
validModels.add(model.id);
}
});
}
return validModels;
}
catch (error) {
clearTimeout(timeoutId);
if (error.name === 'AbortError') {
throw new Error(`OpenRouter API request timed out after ${timeout}ms`);
}
throw error;
}
}
/**
* Validate models in batches to avoid overwhelming the API
*/
async function validateModelsInBatches(models, validModels, batchSize = 50) {
const results = [];
for (let i = 0; i < models.length; i += batchSize) {
const batch = models.slice(i, i + batchSize);
console.log(`๐ Validating models ${i + 1}-${Math.min(i + batchSize, models.length)} of ${models.length}...`);
// Add a small delay between batches to be respectful of API limits
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
for (const model of batch) {
const isValid = validModels.has(model.openrouter_id);
results.push({
modelId: model.openrouter_id,
isValid,
provider: model.provider_name,
name: model.name,
error: isValid ? undefined : 'Model not found in OpenRouter API'
});
}
}
return results;
}
/**
* Find the best alternative model for a given invalid model
*/
async function findAlternativeModel(invalidModel, validModels, db) {
const [provider] = invalidModel.split('/');
// First, try to find a similar model from the same provider
const similarModels = Array.from(validModels).filter(model => {
const [modelProvider, modelName] = model.split('/');
return modelProvider === provider && modelName && model !== invalidModel;
});
if (similarModels.length > 0) {
// Prefer models with similar names
const invalidModelName = invalidModel.split('/')[1] || '';
const nameWords = invalidModelName.toLowerCase().split(/[-_]/);
// Score models by name similarity
const scoredModels = similarModels.map(model => {
const modelName = model.split('/')[1] || '';
const modelWords = modelName.toLowerCase().split(/[-_]/);
let score = 0;
nameWords.forEach(word => {
if (modelWords.some(mWord => mWord.includes(word) || word.includes(mWord))) {
score++;
}
});
return { model, score };
});
scoredModels.sort((a, b) => b.score - a.score);
if (scoredModels[0].score > 0) {
return scoredModels[0].model;
}
// If no name similarity, return the first model from the same provider
return similarModels[0];
}
// If no models from the same provider, try to find a popular alternative
const popularAlternatives = await db.all(`
SELECT om.openrouter_id, COUNT(pp.id) as usage_count
FROM openrouter_models om
LEFT JOIN pipeline_profiles pp ON (
pp.generator_model_internal_id = om.internal_id OR
pp.refiner_model_internal_id = om.internal_id OR
pp.validator_model_internal_id = om.internal_id OR
pp.curator_model_internal_id = om.internal_id
)
WHERE om.is_active = 1 AND om.openrouter_id IN (${Array.from(validModels).map(() => '?').join(',')})
GROUP BY om.openrouter_id
ORDER BY usage_count DESC, om.openrouter_id
LIMIT 10
`, Array.from(validModels));
return popularAlternatives.length > 0 ? popularAlternatives[0].openrouter_id : null;
}
/**
* Update pipeline profiles that reference invalid models
*/
async function updateProfilesWithInvalidModels(invalidModels, validModels, db, dryRun = false) {
const updatedProfiles = [];
// Get all profiles that might be affected
const profiles = await db.all(`
SELECT pp.*,
gen.openrouter_id as generator_model,
ref.openrouter_id as refiner_model,
val.openrouter_id as validator_model,
cur.openrouter_id as curator_model
FROM pipeline_profiles pp
LEFT JOIN openrouter_models gen ON pp.generator_model_internal_id = gen.internal_id
LEFT JOIN openrouter_models ref ON pp.refiner_model_internal_id = ref.internal_id
LEFT JOIN openrouter_models val ON pp.validator_model_internal_id = val.internal_id
LEFT JOIN openrouter_models cur ON pp.curator_model_internal_id = cur.internal_id
`);
for (const profile of profiles) {
const changes = {};
let hasChanges = false;
// Check each stage for invalid models
const stages = [
{ name: 'generator', current: profile.generator_model, internalIdField: 'generator_model_internal_id' },
{ name: 'refiner', current: profile.refiner_model, internalIdField: 'refiner_model_internal_id' },
{ name: 'validator', current: profile.validator_model, internalIdField: 'validator_model_internal_id' },
{ name: 'curator', current: profile.curator_model, internalIdField: 'curator_model_internal_id' }
];
for (const stage of stages) {
if (stage.current && invalidModels.includes(stage.current)) {
const alternative = await findAlternativeModel(stage.current, validModels, db);
if (alternative) {
changes[stage.name] = { from: stage.current, to: alternative };
hasChanges = true;
if (!dryRun) {
// Get the internal ID for the alternative model
const altModel = await db.get('SELECT internal_id FROM openrouter_models WHERE openrouter_id = ?', [alternative]);
if (altModel) {
await db.run(`UPDATE pipeline_profiles SET ${stage.internalIdField} = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?`, [altModel.internal_id, profile.id]);
// Also update legacy fields for compatibility
const legacyField = stage.name === 'generator' ? 'generator_model' :
stage.name === 'refiner' ? 'refiner_model' :
stage.name === 'validator' ? 'validator_model' : 'curator_model';
await db.run(`UPDATE pipeline_profiles SET ${legacyField} = ? WHERE id = ?`, [alternative, profile.id]);
}
}
}
}
}
if (hasChanges) {
updatedProfiles.push({
id: profile.id,
name: profile.name,
changes
});
}
}
return updatedProfiles;
}
/**
* Main cleanup function
*/
export async function runDatabaseCleanup(args) {
const startTime = Date.now();
const { dryRun = false, force = false, batchSize = 50, timeout = 30000 } = args;
const result = {
validModels: [],
invalidModels: [],
removedModels: [],
updatedProfiles: [],
errors: [],
stats: {
totalModelsChecked: 0,
modelsRemoved: 0,
profilesUpdated: 0,
executionTimeMs: 0
}
};
try {
// Initialize database and get API key
const { getDatabase, getOpenRouterApiKey } = await import('../storage/unified-database.js');
const db = await getDatabase();
const apiKey = await getOpenRouterApiKey();
if (!apiKey) {
throw new Error('OpenRouter API key not configured. Please run: hive-ai provider configure openrouter <key>');
}
console.log('๐ Starting comprehensive database cleanup...');
console.log(`Mode: ${dryRun ? 'DRY RUN (no changes will be made)' : 'LIVE CLEANUP'}`);
// Step 1: Get current valid models from OpenRouter API
console.log('๐ฅ Fetching current valid models from OpenRouter API...');
const validModels = await getValidModelsFromOpenRouter(apiKey, timeout);
console.log(`โ
Retrieved ${validModels.size} valid models from OpenRouter`);
// Step 2: Get all models from our database
console.log('๐ Analyzing local database models...');
const localModels = await db.all(`
SELECT internal_id, openrouter_id, provider_name, name, is_active
FROM openrouter_models
WHERE is_active = 1
ORDER BY provider_name, openrouter_id
`);
result.stats.totalModelsChecked = localModels.length;
console.log(`๐ Found ${localModels.length} active models in local database`);
// Step 3: Validate models in batches
console.log('๐ Validating models against OpenRouter API...');
const validationResults = await validateModelsInBatches(localModels, validModels, batchSize);
// Process validation results
validationResults.forEach(validation => {
if (validation.isValid) {
result.validModels.push(validation.modelId);
}
else {
result.invalidModels.push(validation.modelId);
}
});
console.log(`โ
Valid models: ${result.validModels.length}`);
console.log(`โ Invalid models: ${result.invalidModels.length}`);
if (result.invalidModels.length === 0) {
console.log('๐ No invalid models found! Database is clean.');
result.stats.executionTimeMs = Date.now() - startTime;
return result;
}
// Step 4: Show detailed invalid model information
console.log('\n๐ Invalid models found:');
result.invalidModels.forEach((model, index) => {
const validation = validationResults.find(v => v.modelId === model);
console.log(` ${index + 1}. ${model} (${validation?.provider || 'unknown'}) - ${validation?.error || 'Not found'}`);
});
// Step 5: Update profiles with invalid model references
console.log('\n๐ง Checking pipeline profiles for invalid model references...');
const updatedProfiles = await updateProfilesWithInvalidModels(result.invalidModels, validModels, db, dryRun);
result.updatedProfiles = updatedProfiles;
result.stats.profilesUpdated = updatedProfiles.length;
if (updatedProfiles.length > 0) {
console.log(`๐ Profiles requiring updates: ${updatedProfiles.length}`);
updatedProfiles.forEach(profile => {
console.log(` โข ${profile.name}:`);
Object.entries(profile.changes).forEach(([stage, change]) => {
console.log(` ${stage}: ${change.from} โ ${change.to}`);
});
});
}
else {
console.log('โ
No profiles reference invalid models');
}
// Step 6: Remove invalid models from database
if (!dryRun) {
console.log('\n๐๏ธ Removing invalid models from database...');
for (const invalidModel of result.invalidModels) {
try {
await db.run('UPDATE openrouter_models SET is_active = 0, last_updated = CURRENT_TIMESTAMP WHERE openrouter_id = ?', [invalidModel]);
result.removedModels.push(invalidModel);
}
catch (error) {
result.errors.push(`Failed to remove ${invalidModel}: ${error.message}`);
}
}
result.stats.modelsRemoved = result.removedModels.length;
console.log(`โ
Deactivated ${result.removedModels.length} invalid models`);
}
else {
console.log(`\n๐ DRY RUN: Would deactivate ${result.invalidModels.length} invalid models`);
}
// Step 7: Update sync metadata
if (!dryRun) {
await db.run(`
INSERT OR REPLACE INTO sync_metadata
(id, sync_type, started_at, completed_at, status, models_synced, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
`, [
'cleanup_' + Date.now(),
'cleanup_invalid_models',
new Date(startTime).toISOString(),
new Date().toISOString(),
'completed',
result.removedModels.length,
new Date().toISOString()
]);
}
}
catch (error) {
result.errors.push(`Cleanup failed: ${error.message}`);
console.error('โ Cleanup error:', error.message);
}
result.stats.executionTimeMs = Date.now() - startTime;
return result;
}
/**
* CLI tool function for the cleanup command
*/
export async function runCleanupModelsTool(args) {
try {
const { dryRun = false, force = false } = args;
console.log('๐งน Database Cleanup Tool - Invalid Model Removal\n');
if (!dryRun && !force) {
// Show safety warning and get confirmation
const readline = await import('readline');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
console.log('โ ๏ธ WARNING: This will modify your database by removing invalid model entries.');
console.log(' Pipeline profiles will be automatically updated with valid alternatives.');
console.log(' This operation cannot be undone.\n');
console.log('๐ก Safety options:');
console.log(' โข Use --dry-run to see what would be changed first');
console.log(' โข Database backups are recommended before running\n');
const proceed = await new Promise((resolve) => {
rl.question('Continue with cleanup? (y/N): ', resolve);
});
rl.close();
if (proceed.toLowerCase() !== 'y' && proceed.toLowerCase() !== 'yes') {
return {
result: '๐ซ Cleanup cancelled by user.\n\n๐ก Use --dry-run to preview changes: hive-ai cleanup-models --dry-run'
};
}
}
const result = await runDatabaseCleanup(args);
// Format results for display
let output = `๐งน Database Cleanup ${dryRun ? 'Preview' : 'Results'}\n\n`;
// Statistics
output += `๐ Summary:\n`;
output += ` โข Models checked: ${result.stats.totalModelsChecked}\n`;
output += ` โข Valid models: ${result.validModels.length}\n`;
output += ` โข Invalid models found: ${result.invalidModels.length}\n`;
if (!dryRun) {
output += ` โข Models removed: ${result.stats.modelsRemoved}\n`;
}
output += ` โข Profiles updated: ${result.stats.profilesUpdated}\n`;
output += ` โข Execution time: ${(result.stats.executionTimeMs / 1000).toFixed(1)}s\n\n`;
// Invalid models details
if (result.invalidModels.length > 0) {
output += `โ Invalid Models ${dryRun ? 'Found' : 'Removed'}:\n`;
result.invalidModels.slice(0, 10).forEach((model, index) => {
output += ` ${index + 1}. ${model}\n`;
});
if (result.invalidModels.length > 10) {
output += ` ... and ${result.invalidModels.length - 10} more\n`;
}
output += '\n';
}
// Profile updates
if (result.updatedProfiles.length > 0) {
output += `๐ง Profile Updates ${dryRun ? 'Required' : 'Applied'}:\n`;
result.updatedProfiles.forEach(profile => {
output += ` โข ${profile.name}:\n`;
Object.entries(profile.changes).forEach(([stage, change]) => {
output += ` ${stage}: ${change.from} โ ${change.to}\n`;
});
});
output += '\n';
}
// Errors
if (result.errors.length > 0) {
output += `โ ๏ธ Errors:\n`;
result.errors.forEach(error => {
output += ` โข ${error}\n`;
});
output += '\n';
}
// Next steps
if (dryRun) {
output += `๐ Next Steps:\n`;
output += ` โข Review the changes above\n`;
output += ` โข Run without --dry-run to apply changes: hive-ai cleanup-models\n`;
output += ` โข Or use --force to skip confirmation: hive-ai cleanup-models --force\n\n`;
}
else if (result.stats.modelsRemoved > 0 || result.stats.profilesUpdated > 0) {
output += `โ
Cleanup completed successfully!\n\n`;
output += `๐ Recommendations:\n`;
output += ` โข Test your profiles: hive-ai pipeline list\n`;
output += ` โข Update model data: hive-ai models update\n`;
output += ` โข Run consensus to verify: hive-ai consensus "test question"\n\n`;
}
output += `๐ก This cleanup tool is safe to run regularly to maintain database integrity.`;
return { result: output };
}
catch (error) {
return {
result: `โ Cleanup tool error: ${error.message}\n\n` +
`๐ก Troubleshooting:\n` +
` โข Check OpenRouter API key: hive-ai test-providers\n` +
` โข Verify database: hive-ai models list\n` +
` โข Try with --dry-run first\n`
};
}
}
// Tool exports for the CLI
export const cleanupModelsToolName = 'cleanup_models';
export const cleanupModelsToolDescription = 'Remove invalid model entries and update affected pipeline profiles';
//# sourceMappingURL=database-cleanup.js.map