codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
572 lines (498 loc) • 15.9 kB
text/typescript
import { logger } from '../core/logger.js';
import axios from 'axios';
export interface HuggingFaceConfig {
enabled: boolean;
apiKey?: string;
baseUrl: string;
timeout: number;
}
export interface ModelInfo {
id: string;
name: string;
description: string;
downloads: number;
likes: number;
tags: string[];
pipeline_tag?: string;
library_name?: string;
language?: string[];
license?: string;
modelType:
| 'text-generation'
| 'text-classification'
| 'translation'
| 'summarization'
| 'question-answering'
| 'other';
}
export interface DatasetInfo {
id: string;
name: string;
description: string;
downloads: number;
likes: number;
tags: string[];
size?: string;
language?: string[];
license?: string;
}
export interface SpaceInfo {
id: string;
name: string;
description: string;
likes: number;
sdk?: string;
tags: string[];
runtime?: string;
}
/**
* Hugging Face MCP Tool Integration
*
* Provides access to Hugging Face Hub for model discovery,
* dataset exploration, and space browsing.
*/
export class HuggingFaceTool {
private config: HuggingFaceConfig;
private requestCount: number = 0;
constructor(config: HuggingFaceConfig) {
this.config = {
...config,
baseUrl: config.baseUrl || 'https://huggingface.co/api',
timeout: config.timeout || 30000,
};
}
/**
* Search for models with advanced filtering
*/
async searchModels(
query: string,
options: {
task?: string;
library?: string;
language?: string;
license?: string;
sort?: 'downloads' | 'likes' | 'updated' | 'created';
direction?: 'asc' | 'desc';
limit?: number;
filter?: string;
} = {}
): Promise<ModelInfo[]> {
if (!this.config.enabled) {
throw new Error('Hugging Face integration is not enabled');
}
try {
const params = new URLSearchParams();
if (query) params.append('search', query);
if (options.task) params.append('pipeline_tag', options.task);
if (options.library) params.append('library', options.library);
if (options.language) params.append('language', options.language);
if (options.license) params.append('license', options.license);
if (options.sort) params.append('sort', options.sort);
if (options.direction) params.append('direction', options.direction);
if (options.limit) params.append('limit', Math.min(options.limit, 100).toString());
if (options.filter) params.append('filter', options.filter);
const response = await axios.get(`${this.config.baseUrl}/models?${params.toString()}`, {
headers: this.getHeaders(),
timeout: this.config.timeout,
});
this.requestCount++;
return response.data.map((model: any) => this.processModelInfo(model));
} catch (error: any) {
logger.error('Hugging Face model search failed:', error);
this.handleError(error);
throw error;
}
}
/**
* Get detailed information about a specific model
*/
async getModelInfo(modelId: string): Promise<ModelInfo> {
if (!this.config.enabled) {
throw new Error('Hugging Face integration is not enabled');
}
try {
const response = await axios.get(`${this.config.baseUrl}/models/${modelId}`, {
headers: this.getHeaders(),
timeout: this.config.timeout,
});
this.requestCount++;
return this.processModelInfo(response.data);
} catch (error: any) {
logger.error('Failed to get model info:', error);
this.handleError(error);
throw error;
}
}
/**
* Search for datasets
*/
async searchDatasets(
query: string,
options: {
task?: string;
language?: string;
size?: string;
license?: string;
sort?: 'downloads' | 'likes' | 'updated' | 'created';
direction?: 'asc' | 'desc';
limit?: number;
} = {}
): Promise<DatasetInfo[]> {
if (!this.config.enabled) {
throw new Error('Hugging Face integration is not enabled');
}
try {
const params = new URLSearchParams();
if (query) params.append('search', query);
if (options.task) params.append('task_categories', options.task);
if (options.language) params.append('language', options.language);
if (options.size) params.append('size_categories', options.size);
if (options.license) params.append('license', options.license);
if (options.sort) params.append('sort', options.sort);
if (options.direction) params.append('direction', options.direction);
if (options.limit) params.append('limit', Math.min(options.limit, 100).toString());
const response = await axios.get(`${this.config.baseUrl}/datasets?${params.toString()}`, {
headers: this.getHeaders(),
timeout: this.config.timeout,
});
this.requestCount++;
return response.data.map((dataset: any) => this.processDatasetInfo(dataset));
} catch (error: any) {
logger.error('Hugging Face dataset search failed:', error);
this.handleError(error);
throw error;
}
}
/**
* Search for Spaces (demos/applications)
*/
async searchSpaces(
query: string,
options: {
sdk?: string;
sort?: 'likes' | 'updated' | 'created';
direction?: 'asc' | 'desc';
limit?: number;
} = {}
): Promise<SpaceInfo[]> {
if (!this.config.enabled) {
throw new Error('Hugging Face integration is not enabled');
}
try {
const params = new URLSearchParams();
if (query) params.append('search', query);
if (options.sdk) params.append('sdk', options.sdk);
if (options.sort) params.append('sort', options.sort);
if (options.direction) params.append('direction', options.direction);
if (options.limit) params.append('limit', Math.min(options.limit, 100).toString());
const response = await axios.get(`${this.config.baseUrl}/spaces?${params.toString()}`, {
headers: this.getHeaders(),
timeout: this.config.timeout,
});
this.requestCount++;
return response.data.map((space: any) => this.processSpaceInfo(space));
} catch (error: any) {
logger.error('Hugging Face spaces search failed:', error);
this.handleError(error);
throw error;
}
}
/**
* Find models suitable for a specific coding task
*/
async findCodeModels(
task:
| 'code-generation'
| 'code-completion'
| 'code-explanation'
| 'bug-fixing'
| 'code-translation',
language?: string
): Promise<ModelInfo[]> {
const taskMappings = {
'code-generation': 'text-generation',
'code-completion': 'text-generation',
'code-explanation': 'text-generation',
'bug-fixing': 'text-generation',
'code-translation': 'translation',
};
const searchQuery = `${task} ${language || 'code'}`;
return this.searchModels(searchQuery, {
task: taskMappings[task],
sort: 'downloads',
direction: 'desc',
limit: 20,
filter: 'code',
});
}
/**
* Get trending models in a specific category
*/
async getTrendingModels(
category: string = 'text-generation',
timeframe: 'day' | 'week' | 'month' = 'week'
): Promise<ModelInfo[]> {
return this.searchModels('', {
task: category,
sort: 'downloads',
direction: 'desc',
limit: 15,
});
}
/**
* Find models by specific criteria for coding assistance
*/
async findModelsByCriteria(criteria: {
language?: string[];
size?: 'small' | 'medium' | 'large';
performance?: 'fast' | 'balanced' | 'quality';
license?: 'commercial' | 'research' | 'open';
task?: string;
}): Promise<ModelInfo[]> {
let query = '';
const options: any = {
limit: 25,
sort: 'downloads',
direction: 'desc',
};
if (criteria.language) {
query += ` ${criteria.language.join(' ')}`;
}
if (criteria.task) {
options.task = criteria.task;
}
if (criteria.size) {
const sizeFilters = {
small: '< 1B',
medium: '1B - 10B',
large: '> 10B',
};
query += ` ${sizeFilters[criteria.size]} parameters`;
}
if (criteria.license) {
const licenseMap = {
commercial: 'apache-2.0',
research: 'cc-by-nc-4.0',
open: 'mit',
};
options.license = licenseMap[criteria.license];
}
return this.searchModels(query.trim(), options);
}
/**
* Get model recommendations based on a use case description
*/
async getModelRecommendations(
useCase: string,
constraints: {
maxSize?: string;
requiresCommercialLicense?: boolean;
preferredLibrary?: string;
performanceRequirement?: 'speed' | 'quality' | 'balanced';
} = {}
): Promise<{
recommended: ModelInfo[];
alternatives: ModelInfo[];
reasoning: string;
}> {
// Analyze use case to determine task type
const taskAnalysis = this.analyzeUseCase(useCase);
// Search for models
const models = await this.searchModels(useCase, {
task: taskAnalysis.primaryTask,
library: constraints.preferredLibrary,
license: constraints.requiresCommercialLicense ? 'apache-2.0' : undefined,
sort: constraints.performanceRequirement === 'speed' ? 'downloads' : 'likes',
limit: 30,
});
// Filter and rank models
const filtered = this.filterModelsByConstraints(models, constraints);
return {
recommended: filtered.slice(0, 5),
alternatives: filtered.slice(5, 10),
reasoning: this.generateRecommendationReasoning(taskAnalysis, constraints, filtered),
};
}
/**
* Private helper methods
*/
private processModelInfo(data: any): ModelInfo {
return {
id: data.id || data.modelId || '',
name: data.name || data.id || '',
description: data.description || '',
downloads: data.downloads || 0,
likes: data.likes || 0,
tags: data.tags || [],
pipeline_tag: data.pipeline_tag,
library_name: data.library_name,
language: data.language,
license: data.license,
modelType: this.categorizeModel(data.pipeline_tag, data.tags),
};
}
private processDatasetInfo(data: any): DatasetInfo {
return {
id: data.id || '',
name: data.name || data.id || '',
description: data.description || '',
downloads: data.downloads || 0,
likes: data.likes || 0,
tags: data.tags || [],
size: data.size_categories?.[0],
language: data.language,
license: data.license,
};
}
private processSpaceInfo(data: any): SpaceInfo {
return {
id: data.id || '',
name: data.name || data.id || '',
description: data.description || '',
likes: data.likes || 0,
sdk: data.sdk,
tags: data.tags || [],
runtime: data.runtime,
};
}
private categorizeModel(pipeline_tag?: string, tags: string[] = []): ModelInfo['modelType'] {
if (pipeline_tag) {
switch (pipeline_tag) {
case 'text-generation':
case 'text2text-generation':
return 'text-generation';
case 'text-classification':
return 'text-classification';
case 'translation':
return 'translation';
case 'summarization':
return 'summarization';
case 'question-answering':
return 'question-answering';
default:
return 'other';
}
}
// Fallback to tag analysis
if (tags.some(tag => tag.includes('generation'))) return 'text-generation';
if (tags.some(tag => tag.includes('classification'))) return 'text-classification';
if (tags.some(tag => tag.includes('translation'))) return 'translation';
return 'other';
}
private analyzeUseCase(useCase: string): { primaryTask: string; confidence: number } {
const useCaseLower = useCase.toLowerCase();
if (
useCaseLower.includes('generat') ||
useCaseLower.includes('creat') ||
useCaseLower.includes('writ')
) {
return { primaryTask: 'text-generation', confidence: 0.9 };
}
if (useCaseLower.includes('classif') || useCaseLower.includes('categor')) {
return { primaryTask: 'text-classification', confidence: 0.8 };
}
if (useCaseLower.includes('translat')) {
return { primaryTask: 'translation', confidence: 0.9 };
}
if (useCaseLower.includes('summar')) {
return { primaryTask: 'summarization', confidence: 0.8 };
}
if (useCaseLower.includes('question') || useCaseLower.includes('answer')) {
return { primaryTask: 'question-answering', confidence: 0.8 };
}
return { primaryTask: 'text-generation', confidence: 0.5 };
}
private filterModelsByConstraints(models: ModelInfo[], constraints: any): ModelInfo[] {
return models.filter(model => {
// Size constraint
if (constraints.maxSize) {
const hasSize = model.tags.some(tag => tag.includes('size') || tag.includes('param'));
// This is a simplified check - in practice, you'd parse size information
}
// Commercial license constraint
if (constraints.requiresCommercialLicense) {
const commercialLicenses = ['apache-2.0', 'mit', 'bsd-3-clause'];
if (model.license && !commercialLicenses.includes(model.license.toLowerCase())) {
return false;
}
}
return true;
});
}
private generateRecommendationReasoning(
taskAnalysis: any,
constraints: any,
models: ModelInfo[]
): string {
let reasoning = `Based on your use case, I identified this as a ${taskAnalysis.primaryTask} task with ${taskAnalysis.confidence * 100}% confidence. `;
if (models.length > 0) {
reasoning += `I found ${models.length} suitable models. `;
reasoning += `The top recommendation is ${models[0].name} due to its ${models[0].downloads} downloads and popularity. `;
}
if (constraints.requiresCommercialLicense) {
reasoning += `Filtered for commercial-friendly licenses. `;
}
if (constraints.preferredLibrary) {
reasoning += `Prioritized ${constraints.preferredLibrary} library compatibility. `;
}
return reasoning;
}
private getHeaders(): Record<string, string> {
const headers: Record<string, string> = {
'User-Agent': 'CodeCrucible-Synth',
};
if (this.config.apiKey) {
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
}
return headers;
}
private handleError(error: any): void {
if (error.response?.status === 401) {
logger.error('Hugging Face API authentication failed');
} else if (error.response?.status === 429) {
logger.error('Hugging Face API rate limit exceeded');
} else if (error.code === 'ECONNABORTED') {
logger.error('Hugging Face API request timed out');
}
}
/**
* Public utility methods
*/
/**
* Test API connectivity
*/
async testConnection(): Promise<boolean> {
if (!this.config.enabled) {
return false;
}
try {
await this.searchModels('test', { limit: 1 });
return true;
} catch (error) {
logger.warn('Hugging Face connection test failed:', error);
return false;
}
}
/**
* Get usage statistics
*/
getUsageStats(): any {
return {
requestCount: this.requestCount,
isEnabled: this.config.enabled,
hasApiKey: !!this.config.apiKey,
};
}
/**
* Get model download URL for local use
*/
getModelDownloadUrl(modelId: string): string {
return `${this.config.baseUrl.replace('/api', '')}/${modelId}`;
}
/**
* Get model card URL for detailed information
*/
getModelCardUrl(modelId: string): string {
return `https://huggingface.co/${modelId}`;
}
}