UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

418 lines 19.7 kB
/** * Topic Tagging System for Hive.AI MCP Server * * Implements a sophisticated tagging system for conversations and messages * to enable thematic relation discovery across the consensus pipeline. * * Uses provider-agnostic approach to ensure resilience across different LLM providers. */ // Removed legacy provider selection - OpenRouter handles all provider logic import { createChatCompletion } from '../tools/hiveai/provider-client.js'; // Pipeline utils replaced with secure pipeline config import { logger, formatTopicList } from '../utils/logging.js'; import { getDatabase } from './unified-database.js'; // Database connection helper - now uses unified database async function getTopicDatabase() { return await getDatabase(); } // Define a comprehensive topic hierarchy for technical domains const TOPIC_HIERARCHY = { "artificial_intelligence": { related: ["machine_learning", "deep_learning", "neural_networks", "ai", "algorithms"], subtopics: { "machine_learning": ["supervised", "unsupervised", "reinforcement", "models", "algorithms", "training", "features"], "neural_networks": ["deep_learning", "cnn", "rnn", "transformer", "attention", "layers", "activation"], "nlp": ["language_models", "chatbots", "text_processing", "sentiment", "tokens", "embedding"], "computer_vision": ["image_recognition", "object_detection", "segmentation", "face_recognition"], "ethics": ["bias", "fairness", "transparency", "accountability", "privacy", "safety"] } }, "programming": { related: ["software_development", "coding", "languages", "frameworks", "algorithms"], subtopics: { "languages": ["javascript", "python", "java", "typescript", "c++", "go", "rust", "php"], "web_development": ["frontend", "backend", "fullstack", "frameworks", "html", "css", "react", "angular", "vue"], "mobile_development": ["ios", "android", "react_native", "flutter", "mobile_apps"], "databases": ["sql", "nosql", "relational", "document", "graph", "key_value"], "devops": ["ci_cd", "containers", "kubernetes", "deployment", "cloud"] } }, "data": { related: ["database", "analysis", "storage", "processing", "visualization"], subtopics: { "databases": ["relational", "nosql", "sql", "mysql", "postgresql", "mongodb", "cassandra"], "big_data": ["hadoop", "spark", "distributed", "processing", "streams"], "data_science": ["analysis", "statistics", "visualization", "insights", "pandas", "jupyter"], "data_engineering": ["pipelines", "etl", "warehousing", "lake", "governance"] } }, "blockchain": { related: ["cryptocurrency", "distributed_ledger", "smart_contracts", "decentralized"], subtopics: { "cryptocurrency": ["bitcoin", "ethereum", "tokens", "mining", "wallets"], "smart_contracts": ["solidity", "execution", "conditions", "automation"], "consensus": ["proof_of_work", "proof_of_stake", "algorithms", "validation", "mining"], "defi": ["decentralized_finance", "lending", "trading", "yield", "staking"] } }, "security": { related: ["cybersecurity", "encryption", "protection", "threats", "privacy"], subtopics: { "encryption": ["cryptography", "algorithms", "keys", "hashing", "secure_communication"], "network_security": ["firewall", "protocols", "detection", "prevention", "monitoring"], "application_security": ["vulnerabilities", "testing", "secure_coding", "owasp"], "authentication": ["identity", "authorization", "tokens", "biometrics", "mfa"] } }, "cloud_computing": { related: ["aws", "azure", "gcp", "iaas", "paas", "saas", "serverless"], subtopics: { "infrastructure": ["servers", "storage", "networking", "virtualization", "containers"], "platforms": ["services", "apis", "functions", "managed", "solutions"], "deployment": ["ci_cd", "automation", "scaling", "monitoring", "reliability"], "architecture": ["microservices", "serverless", "event_driven", "distributed"] } } }; /** * Extract topics from a text using the topic hierarchy */ function extractTopicsFromText(text) { const lowerText = text.toLowerCase(); const topics = new Set(); // Search through main topics and related terms for (const [mainTopic, details] of Object.entries(TOPIC_HIERARCHY)) { // Check for main topic if (lowerText.includes(mainTopic.toLowerCase().replace('_', ' '))) { topics.add(mainTopic); } // Check for related terms for (const relatedTerm of details.related) { if (lowerText.includes(relatedTerm.toLowerCase().replace('_', ' '))) { topics.add(mainTopic); break; } } // Check for subtopics for (const [subtopic, keywords] of Object.entries(details.subtopics)) { if (lowerText.includes(subtopic.toLowerCase().replace('_', ' '))) { topics.add(`${mainTopic}:${subtopic}`); } // Check for subtopic keywords for (const keyword of keywords) { if (lowerText.includes(keyword.toLowerCase().replace('_', ' '))) { topics.add(`${mainTopic}:${subtopic}`); break; } } } } return Array.from(topics); } /** * Extract topics from text using AI * @param text The text to extract topics from * @param providerName Optional provider name to use */ export async function extractTopicsWithAI(text, providerName) { try { // Prepare the prompt for topic extraction const prompt = `You are a topic extraction system. Extract key technical topics from the text as a JSON array of snake_case strings. Focus on technical domains, programming concepts, and specific technologies. Return ONLY the JSON array, nothing else.`; // Prepare the user message const userMessage = text; // Function to extract topics with a specific provider const extractWithProvider = async (providerName) => { logger.debug(`Attempting topic extraction with provider: ${providerName}`); try { const options = { model: 'auto', messages: [ { role: 'system', content: prompt }, { role: 'user', content: userMessage } ] }; const result = await createChatCompletion(options); // Try to parse the result as JSON try { if (typeof result.content === 'string') { return JSON.parse(result.content); } else { return []; } } catch (jsonError) { logger.debug("Failed to parse JSON response, attempting to extract array content"); // If not valid JSON, try to extract array-like content if (typeof result.content === 'string') { const match = result.content.match(/\[(.*?)\]/s); if (match && match[1]) { // Extract items that look like they're in quotes const items = match[1].match(/"([^"]*)"/g); if (items) { return items.map((item) => item.replace(/"/g, '')); } } } // If all else fails, return a default topic return ['general']; } } catch (error) { const err = error; logger.debug(`Error with provider ${providerName}: ${err.message}`); throw err; // Re-throw to try the next provider } }; // Function to extract topics using keyword extraction as a fallback const extractWithFallback = async () => { logger.debug("Using keyword-based topic extraction as fallback"); return extractTopicsFromText(text); }; // If a specific provider is requested, use only that one if (providerName) { try { return await extractWithProvider(providerName); } catch (error) { const err = error; logger.debug(`Provider ${providerName} failed: ${err.message}`); return await extractWithFallback(); } } // Use the dynamic fallback mechanism from provider-client.js try { // If a specific provider was requested, use it if (providerName) { return await extractWithProvider(providerName); } // Otherwise, use createChatCompletion directly which has built-in fallback const result = await createChatCompletion({ model: 'auto', // Use 'auto' to let the system pick the best available model messages: [ { role: 'system', content: prompt }, { role: 'user', content: text } ], temperature: 0.1, max_tokens: 200 }); // Parse the response const topics = extractTopicsFromText(result.content); return topics.length > 0 ? topics : ['general']; } catch (error) { // If all providers fail, fall back to keyword extraction return await extractWithFallback(); } } catch (error) { const err = error; logger.debug(`Error extracting topics with AI: ${err.message}`); return ['general']; } } /** * Tag a conversation with topics based on its content */ export async function tagConversation(conversationId) { try { // Get the conversation from the database const db = await getTopicDatabase(); try { // Get all messages for this conversation const messages = await db.all('SELECT * FROM messages WHERE conversation_id = ? ORDER BY timestamp ASC', conversationId); // If no messages, return empty array if (!messages || messages.length === 0) { logger.warn(`No messages found for conversation ${conversationId}`); await db.close(); return []; } // Combine all message content const combinedContent = messages.map((m) => m.content).join('\n\n'); // Extract topics directly without using provider selection // This avoids the nested provider selection issue const topics = await extractTopicsWithAI(combinedContent); // Tag the conversation with the extracted topics in the database if (topics && topics.length > 0) { // First delete existing topics for this conversation await db.run("DELETE FROM conversation_topics WHERE conversation_id = ?", [conversationId]); // Insert each topic separately with a weight of 1.0 for (const topic of topics) { await db.run("INSERT INTO conversation_topics (conversation_id, topic, weight) VALUES (?, ?, ?)", [conversationId, topic, 1.0]); } logger.info(`Tagged conversation ${conversationId} with topics: ${formatTopicList(topics)}`); await db.close(); return topics; } else { logger.debug(`No topics extracted for conversation ${conversationId}`); await db.close(); return []; } } catch (error) { const err = error; logger.error(`Error tagging conversation: ${err.message}`); await db.close(); return []; } } catch (dbError) { const err = dbError; logger.error(`Error connecting to database: ${err.message}`); return []; } } /** * Find conversations related to a specific topic */ export async function findConversationsByTopic(topic, limit = 3) { try { const db = await getTopicDatabase(); // Search for exact topic match let conversations = await db.all('SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic = ? LIMIT ?', topic, limit); // If no exact matches, search for partial matches if (conversations.length === 0) { conversations = await db.all('SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic LIKE ? LIMIT ?', `%${topic}%`, limit); } // If still no matches, check for related topics in our hierarchy if (conversations.length === 0) { // Find related topics const relatedTopics = []; // Check main topics for (const [mainTopic, details] of Object.entries(TOPIC_HIERARCHY)) { if (mainTopic === topic || details.related.includes(topic)) { relatedTopics.push(mainTopic); details.related.forEach(rt => relatedTopics.push(rt)); } // Check subtopics for (const [subtopic, keywords] of Object.entries(details.subtopics)) { const fullSubtopic = `${mainTopic}:${subtopic}`; if (fullSubtopic === topic || keywords.includes(topic)) { relatedTopics.push(fullSubtopic); keywords.forEach(kw => relatedTopics.push(kw)); } } } if (relatedTopics.length > 0) { // Create placeholders for the IN clause const placeholders = relatedTopics.map(() => '?').join(','); conversations = await db.all(`SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic IN (${placeholders}) LIMIT ?`, ...relatedTopics, limit); } } await db.close(); return conversations.map(c => c.conversation_id); } catch (error) { console.error('Error finding conversations by topic:', error); return []; } } /** * Find topics related to a specific query */ export async function findTopicsForQuery(query) { // First extract topics directly from the query const directTopics = extractTopicsFromText(query); // Also use AI to extract topics const aiTopics = await extractTopicsWithAI(query); // Combine both sets return [...new Set([...directTopics, ...aiTopics])]; } /** * Extract topics from text (exported for external use) */ export function extractTopics(text) { return extractTopicsFromText(text); } /** * Extract keywords from text using AI (exported for external use) */ export async function extractKeywords(text, providerName) { try { // Use AI topic extraction as keyword extraction const topics = await extractTopicsWithAI(text, providerName); return topics; } catch (error) { // Fall back to topic extraction return extractTopicsFromText(text); } } /** * Find relevant knowledge for each AI in the pipeline based on the query * Uses provider-agnostic approach for topic extraction and knowledge retrieval */ export async function findRelevantKnowledgeForAI(query, stage) { try { logger.info(`Finding thematically relevant knowledge for ${stage} stage...`); // Extract topics from the query using our provider-agnostic approach const queryTopics = await findTopicsForQuery(query); if (queryTopics.length === 0) { logger.info(`No topics found for query in ${stage} stage`); return ""; } // Format the topics for display const formattedTopics = formatTopicList(queryTopics); logger.info(`Topics for ${stage} stage:\n${formattedTopics}`); // Find conversations related to these topics const allConversations = new Set(); for (const topic of queryTopics) { const topicConversations = await findConversationsByTopic(topic); topicConversations.forEach(c => allConversations.add(c)); } // If no related conversations found, return empty if (allConversations.size === 0) { return ""; } // For each conversation, get messages and previous results for this stage const db = await getTopicDatabase(); let relevantKnowledge = ""; for (const conversationId of allConversations) { // Get conversation history const messages = await db.all('SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY timestamp ASC LIMIT 5', conversationId); if (messages.length > 0) { // Format conversation const conversationText = messages .map(msg => `${msg.role.toUpperCase()}: ${msg.content}`) .join('\n\n'); // Get pipeline results for this stage if available let stageResults = ""; const results = await db.all(`SELECT pr.content FROM pipeline_results pr JOIN messages m ON pr.message_id = m.id WHERE m.conversation_id = ? AND pr.stage = ? ORDER BY pr.timestamp DESC LIMIT 2`, conversationId, stage); if (results.length > 0) { stageResults = `\n\nPrevious ${stage.toUpperCase()} stage outputs:\n${results.map(r => r.content).join('\n---\n')}`; } const topics = queryTopics.join(', '); relevantKnowledge += `\n\n--- RELATED CONVERSATION (${topics}) ---\n${conversationText}${stageResults}\n`; } } await db.close(); // Custom-tailor the knowledge based on the stage let stageSpecificContext = relevantKnowledge; switch (stage) { case 'generator': stageSpecificContext = `Here is relevant information from previous conversations on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to generate a comprehensive initial response.`; break; case 'refiner': stageSpecificContext = `Here is relevant information and previous refinements on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to enhance and refine the response.`; break; case 'validator': stageSpecificContext = `Here is relevant information for fact-checking on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to validate and correct any inaccuracies.`; break; case 'curator': stageSpecificContext = `Here is relevant information for curating responses on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to create a polished and consistent final response.`; break; } return stageSpecificContext; } catch (error) { logger.error(`Error finding relevant knowledge for ${stage} stage`, error); return ""; } } //# sourceMappingURL=topicTagging.js.map