@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
502 lines (485 loc) • 21.7 kB
JavaScript
import { open } from 'sqlite';
import * as path from 'path';
import * as os from 'os';
import * as fs from 'fs';
/**
* Comprehensive Database for Hive.AI Knowledge Base
* Stores every word from every stage for permanent knowledge retention
*/
const DB_DIR = path.join(os.homedir(), '.hive-ai');
const DB_PATH = path.join(DB_DIR, 'hive-ai-knowledge.db');
let db;
/**
* Initialize comprehensive database schema
*/
export async function initializeComprehensiveDatabase() {
try {
// Ensure directory exists
if (!fs.existsSync(DB_DIR)) {
fs.mkdirSync(DB_DIR, { recursive: true });
}
// Open database
const sqlite3Driver = await import('sqlite3');
db = await open({
filename: DB_PATH,
driver: sqlite3Driver.default.Database,
});
// Create comprehensive schema with optimized indexing
await db.exec(`
-- Conversations: Core question-answer pairs with metadata
CREATE TABLE IF NOT EXISTS conversations (
id TEXT PRIMARY KEY,
question TEXT NOT NULL, -- Original user question
final_answer TEXT NOT NULL, -- Final pipeline result
source_of_truth TEXT NOT NULL, -- Curator output for knowledge retrieval
conversation_context TEXT, -- Any additional context
profile_id TEXT, -- User/project profile
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
-- PERFORMANCE INDEXES for timestamp-based queries
CREATE INDEX IF NOT EXISTS idx_conversations_created_at ON conversations(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_conversations_source_search ON conversations(created_at DESC, source_of_truth);
CREATE INDEX IF NOT EXISTS idx_conversations_question_search ON conversations(created_at DESC, question);
-- Stage Outputs: Every word from every stage preserved forever
CREATE TABLE IF NOT EXISTS stage_outputs (
id TEXT PRIMARY KEY,
conversation_id TEXT NOT NULL,
stage_name TEXT NOT NULL CHECK (stage_name IN ('generator', 'refiner', 'validator', 'curator')),
stage_number INTEGER NOT NULL CHECK (stage_number IN (1, 2, 3, 4)),
provider TEXT NOT NULL, -- Gemini, Grok, OpenAI, etc.
model TEXT NOT NULL, -- Specific model used
full_output TEXT NOT NULL, -- Complete unedited AI response
character_count INTEGER NOT NULL,
word_count INTEGER NOT NULL,
temperature REAL,
processing_time_ms INTEGER,
tokens_used INTEGER,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- PERFORMANCE INDEXES for stage outputs queries
CREATE INDEX IF NOT EXISTS idx_stage_outputs_conversation ON stage_outputs(conversation_id, stage_name);
CREATE INDEX IF NOT EXISTS idx_stage_outputs_created_at ON stage_outputs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_stage_outputs_stage_time ON stage_outputs(stage_name, created_at DESC);
-- Knowledge Base: Curator outputs flagged as source of truth
CREATE TABLE IF NOT EXISTS knowledge_base (
id TEXT PRIMARY KEY,
conversation_id TEXT NOT NULL,
curator_content TEXT NOT NULL, -- Full curator output - source of truth
topics TEXT NOT NULL, -- JSON array of topics
keywords TEXT NOT NULL, -- JSON array of keywords
semantic_embedding BLOB, -- Vector embedding for similarity search
is_source_of_truth INTEGER DEFAULT 1, -- Always 1 for curator outputs
relevance_score REAL DEFAULT 1.0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Topics: Searchable topic extraction
CREATE TABLE IF NOT EXISTS conversation_topics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
conversation_id TEXT NOT NULL,
topic TEXT NOT NULL,
confidence REAL DEFAULT 1.0,
is_primary INTEGER DEFAULT 0, -- Main topic flag
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Keywords: Full-text search support
CREATE TABLE IF NOT EXISTS conversation_keywords (
id INTEGER PRIMARY KEY AUTOINCREMENT,
conversation_id TEXT NOT NULL,
keyword TEXT NOT NULL,
frequency INTEGER DEFAULT 1,
weight REAL DEFAULT 1.0,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Stage Comparisons: Track how content evolves through pipeline
CREATE TABLE IF NOT EXISTS stage_evolution (
id TEXT PRIMARY KEY,
conversation_id TEXT NOT NULL,
generator_length INTEGER,
refiner_length INTEGER,
validator_length INTEGER,
curator_length INTEGER,
total_evolution_percentage REAL,
quality_score REAL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Performance indices for fast querying
CREATE INDEX IF NOT EXISTS idx_conversations_created ON conversations(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_conversations_question ON conversations(question);
CREATE INDEX IF NOT EXISTS idx_stage_outputs_conversation ON stage_outputs(conversation_id);
CREATE INDEX IF NOT EXISTS idx_stage_outputs_stage ON stage_outputs(stage_name);
CREATE INDEX IF NOT EXISTS idx_knowledge_base_conversation ON knowledge_base(conversation_id);
CREATE INDEX IF NOT EXISTS idx_knowledge_base_source ON knowledge_base(is_source_of_truth);
CREATE INDEX IF NOT EXISTS idx_topics_conversation ON conversation_topics(conversation_id);
CREATE INDEX IF NOT EXISTS idx_topics_topic ON conversation_topics(topic);
CREATE INDEX IF NOT EXISTS idx_keywords_conversation ON conversation_keywords(conversation_id);
CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON conversation_keywords(keyword);
-- Full-text search virtual table
CREATE VIRTUAL TABLE IF NOT EXISTS conversations_fts USING fts5(
conversation_id,
question,
source_of_truth,
content='knowledge_base',
content_rowid='id'
);
-- Enhanced tables for new consensus features
-- Conversation context tracking for cross-conversation memory
CREATE TABLE IF NOT EXISTS conversation_context (
id INTEGER PRIMARY KEY AUTOINCREMENT,
conversation_id TEXT NOT NULL,
referenced_conversation_id TEXT NOT NULL,
relevance_score REAL DEFAULT 1.0,
context_type TEXT CHECK (context_type IN ('recent_24h', 'thematic', 'direct_reference')),
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id),
FOREIGN KEY (referenced_conversation_id) REFERENCES conversations(id)
);
-- Stage confidence and reasoning tracking
CREATE TABLE IF NOT EXISTS stage_confidence (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stage_output_id TEXT NOT NULL,
confidence_score REAL NOT NULL,
reasoning TEXT,
sources_used TEXT, -- JSON array of source conversation IDs
content_quality_score REAL,
technical_depth_score REAL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stage_output_id) REFERENCES stage_outputs(id)
);
-- Consensus metrics for each conversation
CREATE TABLE IF NOT EXISTS consensus_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
conversation_id TEXT NOT NULL UNIQUE,
final_confidence REAL NOT NULL,
stage_agreement REAL,
content_quality REAL,
provider_reliability REAL,
context_utilization REAL,
agreement_matrix TEXT, -- JSON representation of agreement matrix
processing_time_ms INTEGER,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Curator truths tracking (single source of truth)
CREATE TABLE IF NOT EXISTS curator_truths (
id INTEGER PRIMARY KEY AUTOINCREMENT,
conversation_id TEXT NOT NULL UNIQUE,
curator_output TEXT NOT NULL,
confidence_score REAL NOT NULL,
topic_summary TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES conversations(id)
);
-- Indexes for enhanced features
CREATE INDEX IF NOT EXISTS idx_conversation_context_conv ON conversation_context(conversation_id);
CREATE INDEX IF NOT EXISTS idx_conversation_context_ref ON conversation_context(referenced_conversation_id);
CREATE INDEX IF NOT EXISTS idx_conversation_context_type ON conversation_context(context_type);
CREATE INDEX IF NOT EXISTS idx_stage_confidence_stage ON stage_confidence(stage_output_id);
CREATE INDEX IF NOT EXISTS idx_consensus_metrics_conv ON consensus_metrics(conversation_id);
CREATE INDEX IF NOT EXISTS idx_consensus_metrics_confidence ON consensus_metrics(final_confidence DESC);
CREATE INDEX IF NOT EXISTS idx_curator_truths_conv ON curator_truths(conversation_id);
CREATE INDEX IF NOT EXISTS idx_curator_truths_topic ON curator_truths(topic_summary);
CREATE INDEX IF NOT EXISTS idx_curator_truths_confidence ON curator_truths(confidence_score DESC);
-- Enhanced FTS table for curator truths
CREATE VIRTUAL TABLE IF NOT EXISTS curator_truths_fts USING fts5(
conversation_id,
curator_output,
topic_summary,
content='curator_truths',
content_rowid='id'
);
-- Trigger to keep FTS table in sync
CREATE TRIGGER IF NOT EXISTS knowledge_base_fts_insert AFTER INSERT ON knowledge_base BEGIN
INSERT INTO conversations_fts(conversation_id, question, source_of_truth)
SELECT NEW.conversation_id, c.question, NEW.curator_content
FROM conversations c WHERE c.id = NEW.conversation_id;
END;
-- Trigger to keep curator truths FTS in sync
CREATE TRIGGER IF NOT EXISTS curator_truths_fts_insert AFTER INSERT ON curator_truths BEGIN
INSERT INTO curator_truths_fts(conversation_id, curator_output, topic_summary)
VALUES (NEW.conversation_id, NEW.curator_output, NEW.topic_summary);
END;
`);
console.log('✅ Comprehensive database schema initialized');
return true;
}
catch (error) {
console.error('❌ Failed to initialize comprehensive database:', error);
return false;
}
}
/**
* Store complete conversation with all stage outputs
*/
export async function storeCompleteConversation(conversationId, question, stageOutputs, finalAnswer, topics = [], keywords = []) {
try {
if (!db)
await initializeComprehensiveDatabase();
// Start transaction
await db.run('BEGIN TRANSACTION');
// Store main conversation record
await db.run(`
INSERT OR REPLACE INTO conversations (id, question, final_answer, source_of_truth, created_at, last_updated)
VALUES (?, ?, ?, ?, datetime('now'), datetime('now'))
`, [conversationId, question, finalAnswer, stageOutputs.curator.content]);
// Store each stage output with full preservation
const stages = [
{ name: 'generator', number: 1, data: stageOutputs.generator },
{ name: 'refiner', number: 2, data: stageOutputs.refiner },
{ name: 'validator', number: 3, data: stageOutputs.validator },
{ name: 'curator', number: 4, data: stageOutputs.curator }
];
for (const stage of stages) {
const stageId = `${conversationId}-${stage.name}`;
const wordCount = stage.data.content.split(/\s+/).length;
await db.run(`
INSERT OR REPLACE INTO stage_outputs (
id, conversation_id, stage_name, stage_number, provider, model,
full_output, character_count, word_count, temperature,
processing_time_ms, tokens_used, created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
`, [
stageId, conversationId, stage.name, stage.number, stage.data.provider, stage.data.model,
stage.data.content, stage.data.content.length, wordCount, stage.data.temperature,
stage.data.processingTime, stage.data.tokens || 0
]);
}
// Store curator output as source of truth in knowledge base
const knowledgeId = `${conversationId}-knowledge`;
await db.run(`
INSERT OR REPLACE INTO knowledge_base (
id, conversation_id, curator_content, topics, keywords, is_source_of_truth, created_at
) VALUES (?, ?, ?, ?, ?, 1, datetime('now'))
`, [knowledgeId, conversationId, stageOutputs.curator.content, JSON.stringify(topics), JSON.stringify(keywords)]);
// Store topics for searchability
for (const topic of topics) {
await db.run(`
INSERT OR REPLACE INTO conversation_topics (conversation_id, topic, confidence, is_primary)
VALUES (?, ?, 1.0, ?)
`, [conversationId, topic, topics.indexOf(topic) === 0 ? 1 : 0]);
}
// Store keywords for full-text search
for (const keyword of keywords) {
await db.run(`
INSERT OR REPLACE INTO conversation_keywords (conversation_id, keyword, frequency, weight)
VALUES (?, ?, 1, 1.0)
`, [conversationId, keyword]);
}
// Store evolution metrics
const evolutionId = `${conversationId}-evolution`;
const generatorLength = stageOutputs.generator.content.length;
const curatorLength = stageOutputs.curator.content.length;
const evolutionPercentage = ((curatorLength / generatorLength) * 100);
await db.run(`
INSERT OR REPLACE INTO stage_evolution (
id, conversation_id, generator_length, refiner_length, validator_length, curator_length,
total_evolution_percentage, quality_score, created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
`, [
evolutionId, conversationId, generatorLength, stageOutputs.refiner.content.length,
stageOutputs.validator.content.length, curatorLength, evolutionPercentage, 1.0
]);
// Commit transaction
await db.run('COMMIT');
console.log(`✅ Stored complete conversation ${conversationId} with all stage outputs`);
return true;
}
catch (error) {
await db.run('ROLLBACK');
console.error('❌ Failed to store complete conversation:', error);
return false;
}
}
/**
* Query conversation by ID with all stage outputs
*/
export async function getCompleteConversation(conversationId) {
try {
if (!db)
await initializeComprehensiveDatabase();
// Get main conversation
const conversation = await db.get(`
SELECT * FROM conversations WHERE id = ?
`, [conversationId]);
if (!conversation)
return null;
// Get all stage outputs
const stages = await db.all(`
SELECT * FROM stage_outputs WHERE conversation_id = ? ORDER BY stage_number
`, [conversationId]);
// Get knowledge base entry
const knowledge = await db.get(`
SELECT * FROM knowledge_base WHERE conversation_id = ?
`, [conversationId]);
return { conversation, stages, knowledge };
}
catch (error) {
console.error('❌ Failed to get complete conversation:', error);
return null;
}
}
/**
* Search source of truth entries with temporal prioritization
* Checks recent conversations first (past 24 hours) before doing broader search
*/
export async function searchSourceOfTruth(query, limit = 5) {
try {
if (!db)
await initializeComprehensiveDatabase();
const results = [];
// STEP 1: Check conversations from last 24 hours first (most likely relevant)
const recentResults = await db.all(`
SELECT
c.id,
c.question,
c.source_of_truth as curator_content,
'[]' as topics,
'[]' as keywords,
1 as is_source_of_truth,
c.created_at,
'recent' as search_tier
FROM conversations c
WHERE c.source_of_truth LIKE ?
AND datetime(c.created_at) >= datetime('now', '-24 hours')
ORDER BY c.created_at DESC
LIMIT ?
`, [`%${query}%`, limit]);
results.push(...recentResults);
console.log(`🕐 Found ${recentResults.length} matches in recent conversations (24h)`);
// STEP 2: If not enough results, check last 7 days
if (results.length < limit) {
const weekResults = await db.all(`
SELECT
c.id,
c.question,
c.source_of_truth as curator_content,
'[]' as topics,
'[]' as keywords,
1 as is_source_of_truth,
c.created_at,
'week' as search_tier
FROM conversations c
WHERE c.source_of_truth LIKE ?
AND datetime(c.created_at) >= datetime('now', '-7 days')
AND datetime(c.created_at) < datetime('now', '-24 hours')
ORDER BY c.created_at DESC
LIMIT ?
`, [`%${query}%`, limit - results.length]);
results.push(...weekResults);
console.log(`📅 Found ${weekResults.length} additional matches in past week`);
}
// STEP 3: If still not enough, search all historical conversations
if (results.length < limit) {
const historicalResults = await db.all(`
SELECT
c.id,
c.question,
c.source_of_truth as curator_content,
'[]' as topics,
'[]' as keywords,
1 as is_source_of_truth,
c.created_at,
'historical' as search_tier
FROM conversations c
WHERE c.source_of_truth LIKE ?
AND datetime(c.created_at) < datetime('now', '-7 days')
ORDER BY c.created_at DESC
LIMIT ?
`, [`%${query}%`, limit - results.length]);
results.push(...historicalResults);
console.log(`📚 Found ${historicalResults.length} matches in historical data`);
}
console.log(`🎯 Total search results: ${results.length} (prioritized by recency)`);
return results.map(row => ({
id: row.id,
conversation_id: row.id,
curator_content: row.curator_content,
topics: row.topics,
keywords: row.keywords,
is_source_of_truth: Boolean(row.is_source_of_truth),
created_at: row.created_at
})).slice(0, limit);
}
catch (error) {
console.error('❌ Failed to search source of truth:', error);
return [];
}
}
/**
* Get all conversations with basic info for browsing
*/
export async function getAllConversations() {
try {
if (!db)
await initializeComprehensiveDatabase();
return await db.all(`
SELECT id, question, source_of_truth, created_at, last_updated
FROM conversations
ORDER BY created_at DESC
`);
}
catch (error) {
console.error('❌ Failed to get all conversations:', error);
return [];
}
}
/**
* Get conversation details by ID
*/
export async function getConversationDetails(conversationId) {
try {
await initializeComprehensiveDatabase();
const conversation = await db.get('SELECT * FROM conversations WHERE id = ?', [conversationId]);
return conversation || null;
}
catch (error) {
console.error('Error getting conversation details:', error);
return null;
}
}
/**
* Get all stage outputs for a conversation
*/
export async function getAllStageOutputs(conversationId) {
try {
await initializeComprehensiveDatabase();
const outputs = await db.all('SELECT * FROM stage_outputs WHERE conversation_id = ? ORDER BY created_at ASC', [conversationId]);
return outputs || [];
}
catch (error) {
console.error('Error getting stage outputs:', error);
return [];
}
}
/**
* Retrieve source of truth knowledge for a query
*/
export async function retrieveSourceOfTruthKnowledge(query) {
try {
const sourceOfTruthEntries = await searchSourceOfTruth(query, 3);
if (sourceOfTruthEntries.length === 0) {
return null;
}
// Format the knowledge from curator outputs only
let relevantKnowledge = '';
for (const entry of sourceOfTruthEntries) {
const topicsParsed = typeof entry.topics === 'string' ? JSON.parse(entry.topics || '[]') : (entry.topics || []);
const topics = Array.isArray(topicsParsed) ? topicsParsed : [];
const topicsString = topics.length > 0 ? `Topics: ${topics.map(String).join(', ')}` : '';
relevantKnowledge += `--- Source of Truth Entry ---\n` +
`Conversation: ${entry.conversation_id.substring(0, 8)}\n` +
`${topicsString}\n` +
`${entry.curator_content}\n\n`;
}
return relevantKnowledge.trim();
}
catch (error) {
console.error('Error retrieving source of truth knowledge:', error);
return null;
}
}
export { db };
//# sourceMappingURL=comprehensive-database.js.map