@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
418 lines • 19.7 kB
JavaScript
/**
* Topic Tagging System for Hive.AI MCP Server
*
* Implements a sophisticated tagging system for conversations and messages
* to enable thematic relation discovery across the consensus pipeline.
*
* Uses provider-agnostic approach to ensure resilience across different LLM providers.
*/
// Removed legacy provider selection - OpenRouter handles all provider logic
import { createChatCompletion } from '../tools/hiveai/provider-client.js';
// Pipeline utils replaced with secure pipeline config
import { logger, formatTopicList } from '../utils/logging.js';
import { getDatabase } from './unified-database.js';
// Database connection helper - now uses unified database
async function getTopicDatabase() {
return await getDatabase();
}
// Define a comprehensive topic hierarchy for technical domains
const TOPIC_HIERARCHY = {
"artificial_intelligence": {
related: ["machine_learning", "deep_learning", "neural_networks", "ai", "algorithms"],
subtopics: {
"machine_learning": ["supervised", "unsupervised", "reinforcement", "models", "algorithms", "training", "features"],
"neural_networks": ["deep_learning", "cnn", "rnn", "transformer", "attention", "layers", "activation"],
"nlp": ["language_models", "chatbots", "text_processing", "sentiment", "tokens", "embedding"],
"computer_vision": ["image_recognition", "object_detection", "segmentation", "face_recognition"],
"ethics": ["bias", "fairness", "transparency", "accountability", "privacy", "safety"]
}
},
"programming": {
related: ["software_development", "coding", "languages", "frameworks", "algorithms"],
subtopics: {
"languages": ["javascript", "python", "java", "typescript", "c++", "go", "rust", "php"],
"web_development": ["frontend", "backend", "fullstack", "frameworks", "html", "css", "react", "angular", "vue"],
"mobile_development": ["ios", "android", "react_native", "flutter", "mobile_apps"],
"databases": ["sql", "nosql", "relational", "document", "graph", "key_value"],
"devops": ["ci_cd", "containers", "kubernetes", "deployment", "cloud"]
}
},
"data": {
related: ["database", "analysis", "storage", "processing", "visualization"],
subtopics: {
"databases": ["relational", "nosql", "sql", "mysql", "postgresql", "mongodb", "cassandra"],
"big_data": ["hadoop", "spark", "distributed", "processing", "streams"],
"data_science": ["analysis", "statistics", "visualization", "insights", "pandas", "jupyter"],
"data_engineering": ["pipelines", "etl", "warehousing", "lake", "governance"]
}
},
"blockchain": {
related: ["cryptocurrency", "distributed_ledger", "smart_contracts", "decentralized"],
subtopics: {
"cryptocurrency": ["bitcoin", "ethereum", "tokens", "mining", "wallets"],
"smart_contracts": ["solidity", "execution", "conditions", "automation"],
"consensus": ["proof_of_work", "proof_of_stake", "algorithms", "validation", "mining"],
"defi": ["decentralized_finance", "lending", "trading", "yield", "staking"]
}
},
"security": {
related: ["cybersecurity", "encryption", "protection", "threats", "privacy"],
subtopics: {
"encryption": ["cryptography", "algorithms", "keys", "hashing", "secure_communication"],
"network_security": ["firewall", "protocols", "detection", "prevention", "monitoring"],
"application_security": ["vulnerabilities", "testing", "secure_coding", "owasp"],
"authentication": ["identity", "authorization", "tokens", "biometrics", "mfa"]
}
},
"cloud_computing": {
related: ["aws", "azure", "gcp", "iaas", "paas", "saas", "serverless"],
subtopics: {
"infrastructure": ["servers", "storage", "networking", "virtualization", "containers"],
"platforms": ["services", "apis", "functions", "managed", "solutions"],
"deployment": ["ci_cd", "automation", "scaling", "monitoring", "reliability"],
"architecture": ["microservices", "serverless", "event_driven", "distributed"]
}
}
};
/**
* Extract topics from a text using the topic hierarchy
*/
function extractTopicsFromText(text) {
const lowerText = text.toLowerCase();
const topics = new Set();
// Search through main topics and related terms
for (const [mainTopic, details] of Object.entries(TOPIC_HIERARCHY)) {
// Check for main topic
if (lowerText.includes(mainTopic.toLowerCase().replace('_', ' '))) {
topics.add(mainTopic);
}
// Check for related terms
for (const relatedTerm of details.related) {
if (lowerText.includes(relatedTerm.toLowerCase().replace('_', ' '))) {
topics.add(mainTopic);
break;
}
}
// Check for subtopics
for (const [subtopic, keywords] of Object.entries(details.subtopics)) {
if (lowerText.includes(subtopic.toLowerCase().replace('_', ' '))) {
topics.add(`${mainTopic}:${subtopic}`);
}
// Check for subtopic keywords
for (const keyword of keywords) {
if (lowerText.includes(keyword.toLowerCase().replace('_', ' '))) {
topics.add(`${mainTopic}:${subtopic}`);
break;
}
}
}
}
return Array.from(topics);
}
/**
* Extract topics from text using AI
* @param text The text to extract topics from
* @param providerName Optional provider name to use
*/
export async function extractTopicsWithAI(text, providerName) {
try {
// Prepare the prompt for topic extraction
const prompt = `You are a topic extraction system. Extract key technical topics from the text as a JSON array of snake_case strings. Focus on technical domains, programming concepts, and specific technologies. Return ONLY the JSON array, nothing else.`;
// Prepare the user message
const userMessage = text;
// Function to extract topics with a specific provider
const extractWithProvider = async (providerName) => {
logger.debug(`Attempting topic extraction with provider: ${providerName}`);
try {
const options = {
model: 'auto',
messages: [
{ role: 'system', content: prompt },
{ role: 'user', content: userMessage }
]
};
const result = await createChatCompletion(options);
// Try to parse the result as JSON
try {
if (typeof result.content === 'string') {
return JSON.parse(result.content);
}
else {
return [];
}
}
catch (jsonError) {
logger.debug("Failed to parse JSON response, attempting to extract array content");
// If not valid JSON, try to extract array-like content
if (typeof result.content === 'string') {
const match = result.content.match(/\[(.*?)\]/s);
if (match && match[1]) {
// Extract items that look like they're in quotes
const items = match[1].match(/"([^"]*)"/g);
if (items) {
return items.map((item) => item.replace(/"/g, ''));
}
}
}
// If all else fails, return a default topic
return ['general'];
}
}
catch (error) {
const err = error;
logger.debug(`Error with provider ${providerName}: ${err.message}`);
throw err; // Re-throw to try the next provider
}
};
// Function to extract topics using keyword extraction as a fallback
const extractWithFallback = async () => {
logger.debug("Using keyword-based topic extraction as fallback");
return extractTopicsFromText(text);
};
// If a specific provider is requested, use only that one
if (providerName) {
try {
return await extractWithProvider(providerName);
}
catch (error) {
const err = error;
logger.debug(`Provider ${providerName} failed: ${err.message}`);
return await extractWithFallback();
}
}
// Use the dynamic fallback mechanism from provider-client.js
try {
// If a specific provider was requested, use it
if (providerName) {
return await extractWithProvider(providerName);
}
// Otherwise, use createChatCompletion directly which has built-in fallback
const result = await createChatCompletion({
model: 'auto', // Use 'auto' to let the system pick the best available model
messages: [
{ role: 'system', content: prompt },
{ role: 'user', content: text }
],
temperature: 0.1,
max_tokens: 200
});
// Parse the response
const topics = extractTopicsFromText(result.content);
return topics.length > 0 ? topics : ['general'];
}
catch (error) {
// If all providers fail, fall back to keyword extraction
return await extractWithFallback();
}
}
catch (error) {
const err = error;
logger.debug(`Error extracting topics with AI: ${err.message}`);
return ['general'];
}
}
/**
* Tag a conversation with topics based on its content
*/
export async function tagConversation(conversationId) {
try {
// Get the conversation from the database
const db = await getTopicDatabase();
try {
// Get all messages for this conversation
const messages = await db.all('SELECT * FROM messages WHERE conversation_id = ? ORDER BY timestamp ASC', conversationId);
// If no messages, return empty array
if (!messages || messages.length === 0) {
logger.warn(`No messages found for conversation ${conversationId}`);
await db.close();
return [];
}
// Combine all message content
const combinedContent = messages.map((m) => m.content).join('\n\n');
// Extract topics directly without using provider selection
// This avoids the nested provider selection issue
const topics = await extractTopicsWithAI(combinedContent);
// Tag the conversation with the extracted topics in the database
if (topics && topics.length > 0) {
// First delete existing topics for this conversation
await db.run("DELETE FROM conversation_topics WHERE conversation_id = ?", [conversationId]);
// Insert each topic separately with a weight of 1.0
for (const topic of topics) {
await db.run("INSERT INTO conversation_topics (conversation_id, topic, weight) VALUES (?, ?, ?)", [conversationId, topic, 1.0]);
}
logger.info(`Tagged conversation ${conversationId} with topics: ${formatTopicList(topics)}`);
await db.close();
return topics;
}
else {
logger.debug(`No topics extracted for conversation ${conversationId}`);
await db.close();
return [];
}
}
catch (error) {
const err = error;
logger.error(`Error tagging conversation: ${err.message}`);
await db.close();
return [];
}
}
catch (dbError) {
const err = dbError;
logger.error(`Error connecting to database: ${err.message}`);
return [];
}
}
/**
* Find conversations related to a specific topic
*/
export async function findConversationsByTopic(topic, limit = 3) {
try {
const db = await getTopicDatabase();
// Search for exact topic match
let conversations = await db.all('SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic = ? LIMIT ?', topic, limit);
// If no exact matches, search for partial matches
if (conversations.length === 0) {
conversations = await db.all('SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic LIKE ? LIMIT ?', `%${topic}%`, limit);
}
// If still no matches, check for related topics in our hierarchy
if (conversations.length === 0) {
// Find related topics
const relatedTopics = [];
// Check main topics
for (const [mainTopic, details] of Object.entries(TOPIC_HIERARCHY)) {
if (mainTopic === topic || details.related.includes(topic)) {
relatedTopics.push(mainTopic);
details.related.forEach(rt => relatedTopics.push(rt));
}
// Check subtopics
for (const [subtopic, keywords] of Object.entries(details.subtopics)) {
const fullSubtopic = `${mainTopic}:${subtopic}`;
if (fullSubtopic === topic || keywords.includes(topic)) {
relatedTopics.push(fullSubtopic);
keywords.forEach(kw => relatedTopics.push(kw));
}
}
}
if (relatedTopics.length > 0) {
// Create placeholders for the IN clause
const placeholders = relatedTopics.map(() => '?').join(',');
conversations = await db.all(`SELECT DISTINCT conversation_id FROM conversation_topics WHERE topic IN (${placeholders}) LIMIT ?`, ...relatedTopics, limit);
}
}
await db.close();
return conversations.map(c => c.conversation_id);
}
catch (error) {
console.error('Error finding conversations by topic:', error);
return [];
}
}
/**
* Find topics related to a specific query
*/
export async function findTopicsForQuery(query) {
// First extract topics directly from the query
const directTopics = extractTopicsFromText(query);
// Also use AI to extract topics
const aiTopics = await extractTopicsWithAI(query);
// Combine both sets
return [...new Set([...directTopics, ...aiTopics])];
}
/**
* Extract topics from text (exported for external use)
*/
export function extractTopics(text) {
return extractTopicsFromText(text);
}
/**
* Extract keywords from text using AI (exported for external use)
*/
export async function extractKeywords(text, providerName) {
try {
// Use AI topic extraction as keyword extraction
const topics = await extractTopicsWithAI(text, providerName);
return topics;
}
catch (error) {
// Fall back to topic extraction
return extractTopicsFromText(text);
}
}
/**
* Find relevant knowledge for each AI in the pipeline based on the query
* Uses provider-agnostic approach for topic extraction and knowledge retrieval
*/
export async function findRelevantKnowledgeForAI(query, stage) {
try {
logger.info(`Finding thematically relevant knowledge for ${stage} stage...`);
// Extract topics from the query using our provider-agnostic approach
const queryTopics = await findTopicsForQuery(query);
if (queryTopics.length === 0) {
logger.info(`No topics found for query in ${stage} stage`);
return "";
}
// Format the topics for display
const formattedTopics = formatTopicList(queryTopics);
logger.info(`Topics for ${stage} stage:\n${formattedTopics}`);
// Find conversations related to these topics
const allConversations = new Set();
for (const topic of queryTopics) {
const topicConversations = await findConversationsByTopic(topic);
topicConversations.forEach(c => allConversations.add(c));
}
// If no related conversations found, return empty
if (allConversations.size === 0) {
return "";
}
// For each conversation, get messages and previous results for this stage
const db = await getTopicDatabase();
let relevantKnowledge = "";
for (const conversationId of allConversations) {
// Get conversation history
const messages = await db.all('SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY timestamp ASC LIMIT 5', conversationId);
if (messages.length > 0) {
// Format conversation
const conversationText = messages
.map(msg => `${msg.role.toUpperCase()}: ${msg.content}`)
.join('\n\n');
// Get pipeline results for this stage if available
let stageResults = "";
const results = await db.all(`SELECT pr.content
FROM pipeline_results pr
JOIN messages m ON pr.message_id = m.id
WHERE m.conversation_id = ? AND pr.stage = ?
ORDER BY pr.timestamp DESC
LIMIT 2`, conversationId, stage);
if (results.length > 0) {
stageResults = `\n\nPrevious ${stage.toUpperCase()} stage outputs:\n${results.map(r => r.content).join('\n---\n')}`;
}
const topics = queryTopics.join(', ');
relevantKnowledge += `\n\n--- RELATED CONVERSATION (${topics}) ---\n${conversationText}${stageResults}\n`;
}
}
await db.close();
// Custom-tailor the knowledge based on the stage
let stageSpecificContext = relevantKnowledge;
switch (stage) {
case 'generator':
stageSpecificContext = `Here is relevant information from previous conversations on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to generate a comprehensive initial response.`;
break;
case 'refiner':
stageSpecificContext = `Here is relevant information and previous refinements on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to enhance and refine the response.`;
break;
case 'validator':
stageSpecificContext = `Here is relevant information for fact-checking on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to validate and correct any inaccuracies.`;
break;
case 'curator':
stageSpecificContext = `Here is relevant information for curating responses on similar topics (${queryTopics.join(', ')}):\n${relevantKnowledge}\nUse this context to create a polished and consistent final response.`;
break;
}
return stageSpecificContext;
}
catch (error) {
logger.error(`Error finding relevant knowledge for ${stage} stage`, error);
return "";
}
}
//# sourceMappingURL=topicTagging.js.map