UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

502 lines (485 loc) 21.7 kB
import { open } from 'sqlite'; import * as path from 'path'; import * as os from 'os'; import * as fs from 'fs'; /** * Comprehensive Database for Hive.AI Knowledge Base * Stores every word from every stage for permanent knowledge retention */ const DB_DIR = path.join(os.homedir(), '.hive-ai'); const DB_PATH = path.join(DB_DIR, 'hive-ai-knowledge.db'); let db; /** * Initialize comprehensive database schema */ export async function initializeComprehensiveDatabase() { try { // Ensure directory exists if (!fs.existsSync(DB_DIR)) { fs.mkdirSync(DB_DIR, { recursive: true }); } // Open database const sqlite3Driver = await import('sqlite3'); db = await open({ filename: DB_PATH, driver: sqlite3Driver.default.Database, }); // Create comprehensive schema with optimized indexing await db.exec(` -- Conversations: Core question-answer pairs with metadata CREATE TABLE IF NOT EXISTS conversations ( id TEXT PRIMARY KEY, question TEXT NOT NULL, -- Original user question final_answer TEXT NOT NULL, -- Final pipeline result source_of_truth TEXT NOT NULL, -- Curator output for knowledge retrieval conversation_context TEXT, -- Any additional context profile_id TEXT, -- User/project profile created_at TEXT DEFAULT CURRENT_TIMESTAMP, last_updated TEXT DEFAULT CURRENT_TIMESTAMP ); -- PERFORMANCE INDEXES for timestamp-based queries CREATE INDEX IF NOT EXISTS idx_conversations_created_at ON conversations(created_at DESC); CREATE INDEX IF NOT EXISTS idx_conversations_source_search ON conversations(created_at DESC, source_of_truth); CREATE INDEX IF NOT EXISTS idx_conversations_question_search ON conversations(created_at DESC, question); -- Stage Outputs: Every word from every stage preserved forever CREATE TABLE IF NOT EXISTS stage_outputs ( id TEXT PRIMARY KEY, conversation_id TEXT NOT NULL, stage_name TEXT NOT NULL CHECK (stage_name IN ('generator', 'refiner', 'validator', 'curator')), stage_number INTEGER NOT NULL CHECK (stage_number IN (1, 2, 3, 4)), provider TEXT NOT NULL, -- Gemini, Grok, OpenAI, etc. model TEXT NOT NULL, -- Specific model used full_output TEXT NOT NULL, -- Complete unedited AI response character_count INTEGER NOT NULL, word_count INTEGER NOT NULL, temperature REAL, processing_time_ms INTEGER, tokens_used INTEGER, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- PERFORMANCE INDEXES for stage outputs queries CREATE INDEX IF NOT EXISTS idx_stage_outputs_conversation ON stage_outputs(conversation_id, stage_name); CREATE INDEX IF NOT EXISTS idx_stage_outputs_created_at ON stage_outputs(created_at DESC); CREATE INDEX IF NOT EXISTS idx_stage_outputs_stage_time ON stage_outputs(stage_name, created_at DESC); -- Knowledge Base: Curator outputs flagged as source of truth CREATE TABLE IF NOT EXISTS knowledge_base ( id TEXT PRIMARY KEY, conversation_id TEXT NOT NULL, curator_content TEXT NOT NULL, -- Full curator output - source of truth topics TEXT NOT NULL, -- JSON array of topics keywords TEXT NOT NULL, -- JSON array of keywords semantic_embedding BLOB, -- Vector embedding for similarity search is_source_of_truth INTEGER DEFAULT 1, -- Always 1 for curator outputs relevance_score REAL DEFAULT 1.0, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Topics: Searchable topic extraction CREATE TABLE IF NOT EXISTS conversation_topics ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL, topic TEXT NOT NULL, confidence REAL DEFAULT 1.0, is_primary INTEGER DEFAULT 0, -- Main topic flag FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Keywords: Full-text search support CREATE TABLE IF NOT EXISTS conversation_keywords ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL, keyword TEXT NOT NULL, frequency INTEGER DEFAULT 1, weight REAL DEFAULT 1.0, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Stage Comparisons: Track how content evolves through pipeline CREATE TABLE IF NOT EXISTS stage_evolution ( id TEXT PRIMARY KEY, conversation_id TEXT NOT NULL, generator_length INTEGER, refiner_length INTEGER, validator_length INTEGER, curator_length INTEGER, total_evolution_percentage REAL, quality_score REAL, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Performance indices for fast querying CREATE INDEX IF NOT EXISTS idx_conversations_created ON conversations(created_at DESC); CREATE INDEX IF NOT EXISTS idx_conversations_question ON conversations(question); CREATE INDEX IF NOT EXISTS idx_stage_outputs_conversation ON stage_outputs(conversation_id); CREATE INDEX IF NOT EXISTS idx_stage_outputs_stage ON stage_outputs(stage_name); CREATE INDEX IF NOT EXISTS idx_knowledge_base_conversation ON knowledge_base(conversation_id); CREATE INDEX IF NOT EXISTS idx_knowledge_base_source ON knowledge_base(is_source_of_truth); CREATE INDEX IF NOT EXISTS idx_topics_conversation ON conversation_topics(conversation_id); CREATE INDEX IF NOT EXISTS idx_topics_topic ON conversation_topics(topic); CREATE INDEX IF NOT EXISTS idx_keywords_conversation ON conversation_keywords(conversation_id); CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON conversation_keywords(keyword); -- Full-text search virtual table CREATE VIRTUAL TABLE IF NOT EXISTS conversations_fts USING fts5( conversation_id, question, source_of_truth, content='knowledge_base', content_rowid='id' ); -- Enhanced tables for new consensus features -- Conversation context tracking for cross-conversation memory CREATE TABLE IF NOT EXISTS conversation_context ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL, referenced_conversation_id TEXT NOT NULL, relevance_score REAL DEFAULT 1.0, context_type TEXT CHECK (context_type IN ('recent_24h', 'thematic', 'direct_reference')), created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id), FOREIGN KEY (referenced_conversation_id) REFERENCES conversations(id) ); -- Stage confidence and reasoning tracking CREATE TABLE IF NOT EXISTS stage_confidence ( id INTEGER PRIMARY KEY AUTOINCREMENT, stage_output_id TEXT NOT NULL, confidence_score REAL NOT NULL, reasoning TEXT, sources_used TEXT, -- JSON array of source conversation IDs content_quality_score REAL, technical_depth_score REAL, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (stage_output_id) REFERENCES stage_outputs(id) ); -- Consensus metrics for each conversation CREATE TABLE IF NOT EXISTS consensus_metrics ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL UNIQUE, final_confidence REAL NOT NULL, stage_agreement REAL, content_quality REAL, provider_reliability REAL, context_utilization REAL, agreement_matrix TEXT, -- JSON representation of agreement matrix processing_time_ms INTEGER, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Curator truths tracking (single source of truth) CREATE TABLE IF NOT EXISTS curator_truths ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL UNIQUE, curator_output TEXT NOT NULL, confidence_score REAL NOT NULL, topic_summary TEXT, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ); -- Indexes for enhanced features CREATE INDEX IF NOT EXISTS idx_conversation_context_conv ON conversation_context(conversation_id); CREATE INDEX IF NOT EXISTS idx_conversation_context_ref ON conversation_context(referenced_conversation_id); CREATE INDEX IF NOT EXISTS idx_conversation_context_type ON conversation_context(context_type); CREATE INDEX IF NOT EXISTS idx_stage_confidence_stage ON stage_confidence(stage_output_id); CREATE INDEX IF NOT EXISTS idx_consensus_metrics_conv ON consensus_metrics(conversation_id); CREATE INDEX IF NOT EXISTS idx_consensus_metrics_confidence ON consensus_metrics(final_confidence DESC); CREATE INDEX IF NOT EXISTS idx_curator_truths_conv ON curator_truths(conversation_id); CREATE INDEX IF NOT EXISTS idx_curator_truths_topic ON curator_truths(topic_summary); CREATE INDEX IF NOT EXISTS idx_curator_truths_confidence ON curator_truths(confidence_score DESC); -- Enhanced FTS table for curator truths CREATE VIRTUAL TABLE IF NOT EXISTS curator_truths_fts USING fts5( conversation_id, curator_output, topic_summary, content='curator_truths', content_rowid='id' ); -- Trigger to keep FTS table in sync CREATE TRIGGER IF NOT EXISTS knowledge_base_fts_insert AFTER INSERT ON knowledge_base BEGIN INSERT INTO conversations_fts(conversation_id, question, source_of_truth) SELECT NEW.conversation_id, c.question, NEW.curator_content FROM conversations c WHERE c.id = NEW.conversation_id; END; -- Trigger to keep curator truths FTS in sync CREATE TRIGGER IF NOT EXISTS curator_truths_fts_insert AFTER INSERT ON curator_truths BEGIN INSERT INTO curator_truths_fts(conversation_id, curator_output, topic_summary) VALUES (NEW.conversation_id, NEW.curator_output, NEW.topic_summary); END; `); console.log('✅ Comprehensive database schema initialized'); return true; } catch (error) { console.error('❌ Failed to initialize comprehensive database:', error); return false; } } /** * Store complete conversation with all stage outputs */ export async function storeCompleteConversation(conversationId, question, stageOutputs, finalAnswer, topics = [], keywords = []) { try { if (!db) await initializeComprehensiveDatabase(); // Start transaction await db.run('BEGIN TRANSACTION'); // Store main conversation record await db.run(` INSERT OR REPLACE INTO conversations (id, question, final_answer, source_of_truth, created_at, last_updated) VALUES (?, ?, ?, ?, datetime('now'), datetime('now')) `, [conversationId, question, finalAnswer, stageOutputs.curator.content]); // Store each stage output with full preservation const stages = [ { name: 'generator', number: 1, data: stageOutputs.generator }, { name: 'refiner', number: 2, data: stageOutputs.refiner }, { name: 'validator', number: 3, data: stageOutputs.validator }, { name: 'curator', number: 4, data: stageOutputs.curator } ]; for (const stage of stages) { const stageId = `${conversationId}-${stage.name}`; const wordCount = stage.data.content.split(/\s+/).length; await db.run(` INSERT OR REPLACE INTO stage_outputs ( id, conversation_id, stage_name, stage_number, provider, model, full_output, character_count, word_count, temperature, processing_time_ms, tokens_used, created_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now')) `, [ stageId, conversationId, stage.name, stage.number, stage.data.provider, stage.data.model, stage.data.content, stage.data.content.length, wordCount, stage.data.temperature, stage.data.processingTime, stage.data.tokens || 0 ]); } // Store curator output as source of truth in knowledge base const knowledgeId = `${conversationId}-knowledge`; await db.run(` INSERT OR REPLACE INTO knowledge_base ( id, conversation_id, curator_content, topics, keywords, is_source_of_truth, created_at ) VALUES (?, ?, ?, ?, ?, 1, datetime('now')) `, [knowledgeId, conversationId, stageOutputs.curator.content, JSON.stringify(topics), JSON.stringify(keywords)]); // Store topics for searchability for (const topic of topics) { await db.run(` INSERT OR REPLACE INTO conversation_topics (conversation_id, topic, confidence, is_primary) VALUES (?, ?, 1.0, ?) `, [conversationId, topic, topics.indexOf(topic) === 0 ? 1 : 0]); } // Store keywords for full-text search for (const keyword of keywords) { await db.run(` INSERT OR REPLACE INTO conversation_keywords (conversation_id, keyword, frequency, weight) VALUES (?, ?, 1, 1.0) `, [conversationId, keyword]); } // Store evolution metrics const evolutionId = `${conversationId}-evolution`; const generatorLength = stageOutputs.generator.content.length; const curatorLength = stageOutputs.curator.content.length; const evolutionPercentage = ((curatorLength / generatorLength) * 100); await db.run(` INSERT OR REPLACE INTO stage_evolution ( id, conversation_id, generator_length, refiner_length, validator_length, curator_length, total_evolution_percentage, quality_score, created_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now')) `, [ evolutionId, conversationId, generatorLength, stageOutputs.refiner.content.length, stageOutputs.validator.content.length, curatorLength, evolutionPercentage, 1.0 ]); // Commit transaction await db.run('COMMIT'); console.log(`✅ Stored complete conversation ${conversationId} with all stage outputs`); return true; } catch (error) { await db.run('ROLLBACK'); console.error('❌ Failed to store complete conversation:', error); return false; } } /** * Query conversation by ID with all stage outputs */ export async function getCompleteConversation(conversationId) { try { if (!db) await initializeComprehensiveDatabase(); // Get main conversation const conversation = await db.get(` SELECT * FROM conversations WHERE id = ? `, [conversationId]); if (!conversation) return null; // Get all stage outputs const stages = await db.all(` SELECT * FROM stage_outputs WHERE conversation_id = ? ORDER BY stage_number `, [conversationId]); // Get knowledge base entry const knowledge = await db.get(` SELECT * FROM knowledge_base WHERE conversation_id = ? `, [conversationId]); return { conversation, stages, knowledge }; } catch (error) { console.error('❌ Failed to get complete conversation:', error); return null; } } /** * Search source of truth entries with temporal prioritization * Checks recent conversations first (past 24 hours) before doing broader search */ export async function searchSourceOfTruth(query, limit = 5) { try { if (!db) await initializeComprehensiveDatabase(); const results = []; // STEP 1: Check conversations from last 24 hours first (most likely relevant) const recentResults = await db.all(` SELECT c.id, c.question, c.source_of_truth as curator_content, '[]' as topics, '[]' as keywords, 1 as is_source_of_truth, c.created_at, 'recent' as search_tier FROM conversations c WHERE c.source_of_truth LIKE ? AND datetime(c.created_at) >= datetime('now', '-24 hours') ORDER BY c.created_at DESC LIMIT ? `, [`%${query}%`, limit]); results.push(...recentResults); console.log(`🕐 Found ${recentResults.length} matches in recent conversations (24h)`); // STEP 2: If not enough results, check last 7 days if (results.length < limit) { const weekResults = await db.all(` SELECT c.id, c.question, c.source_of_truth as curator_content, '[]' as topics, '[]' as keywords, 1 as is_source_of_truth, c.created_at, 'week' as search_tier FROM conversations c WHERE c.source_of_truth LIKE ? AND datetime(c.created_at) >= datetime('now', '-7 days') AND datetime(c.created_at) < datetime('now', '-24 hours') ORDER BY c.created_at DESC LIMIT ? `, [`%${query}%`, limit - results.length]); results.push(...weekResults); console.log(`📅 Found ${weekResults.length} additional matches in past week`); } // STEP 3: If still not enough, search all historical conversations if (results.length < limit) { const historicalResults = await db.all(` SELECT c.id, c.question, c.source_of_truth as curator_content, '[]' as topics, '[]' as keywords, 1 as is_source_of_truth, c.created_at, 'historical' as search_tier FROM conversations c WHERE c.source_of_truth LIKE ? AND datetime(c.created_at) < datetime('now', '-7 days') ORDER BY c.created_at DESC LIMIT ? `, [`%${query}%`, limit - results.length]); results.push(...historicalResults); console.log(`📚 Found ${historicalResults.length} matches in historical data`); } console.log(`🎯 Total search results: ${results.length} (prioritized by recency)`); return results.map(row => ({ id: row.id, conversation_id: row.id, curator_content: row.curator_content, topics: row.topics, keywords: row.keywords, is_source_of_truth: Boolean(row.is_source_of_truth), created_at: row.created_at })).slice(0, limit); } catch (error) { console.error('❌ Failed to search source of truth:', error); return []; } } /** * Get all conversations with basic info for browsing */ export async function getAllConversations() { try { if (!db) await initializeComprehensiveDatabase(); return await db.all(` SELECT id, question, source_of_truth, created_at, last_updated FROM conversations ORDER BY created_at DESC `); } catch (error) { console.error('❌ Failed to get all conversations:', error); return []; } } /** * Get conversation details by ID */ export async function getConversationDetails(conversationId) { try { await initializeComprehensiveDatabase(); const conversation = await db.get('SELECT * FROM conversations WHERE id = ?', [conversationId]); return conversation || null; } catch (error) { console.error('Error getting conversation details:', error); return null; } } /** * Get all stage outputs for a conversation */ export async function getAllStageOutputs(conversationId) { try { await initializeComprehensiveDatabase(); const outputs = await db.all('SELECT * FROM stage_outputs WHERE conversation_id = ? ORDER BY created_at ASC', [conversationId]); return outputs || []; } catch (error) { console.error('Error getting stage outputs:', error); return []; } } /** * Retrieve source of truth knowledge for a query */ export async function retrieveSourceOfTruthKnowledge(query) { try { const sourceOfTruthEntries = await searchSourceOfTruth(query, 3); if (sourceOfTruthEntries.length === 0) { return null; } // Format the knowledge from curator outputs only let relevantKnowledge = ''; for (const entry of sourceOfTruthEntries) { const topicsParsed = typeof entry.topics === 'string' ? JSON.parse(entry.topics || '[]') : (entry.topics || []); const topics = Array.isArray(topicsParsed) ? topicsParsed : []; const topicsString = topics.length > 0 ? `Topics: ${topics.map(String).join(', ')}` : ''; relevantKnowledge += `--- Source of Truth Entry ---\n` + `Conversation: ${entry.conversation_id.substring(0, 8)}\n` + `${topicsString}\n` + `${entry.curator_content}\n\n`; } return relevantKnowledge.trim(); } catch (error) { console.error('Error retrieving source of truth knowledge:', error); return null; } } export { db }; //# sourceMappingURL=comprehensive-database.js.map