UNPKG

@shirokuma-library/mcp-knowledge-base

Version:

MCP server for AI-powered knowledge management with semantic search, graph analysis, and automatic enrichment

147 lines (144 loc) 5.97 kB
import { spawn } from 'child_process'; export class ClaudeInterface { async callClaude(prompt, input) { return new Promise((resolve) => { try { const claudeProcess = spawn('claude', ['--model', 'sonnet', prompt], { env: { ...process.env }, timeout: 30000 }); let stdout = ''; let stderr = ''; claudeProcess.stdin.write(input); claudeProcess.stdin.end(); claudeProcess.stdout.on('data', (data) => { stdout += data.toString(); }); claudeProcess.stderr.on('data', (data) => { stderr += data.toString(); }); claudeProcess.on('close', (code) => { if (code === 0 && stdout) { resolve(stdout.trim()); } else { resolve('{}'); } }); claudeProcess.on('error', () => { resolve('{}'); }); } catch { resolve('{}'); } }); } async extractWeightedKeywords(content) { const text = `${content.title} ${content.description} ${content.content}`.trim(); if (!text) { return this.fallbackExtraction(''); } const prompt = `Analyze this text and extract important keywords. Rules: 1. Extract keywords in ENGLISH whenever possible (translate common concepts to English) 2. Break down compound words and technical terms: - "GraphDB" → extract both "graph" and "database" as separate keywords - "GraphRAG" → extract "graph", "rag", "retrieval" - "MLOps" → extract "ml", "machine learning", "ops", "operations" - "TensorFlow" → extract "tensor", "flow", "tensorflow" 3. Normalize to base/singular forms (e.g., "running" -> "run", "databases" -> "database") 4. Include both the original compound term AND its components as keywords 5. Assign weights: compound terms get 0.6-1.0, component parts get 0.4-0.8 6. Maximum 20 keywords total 7. Concepts should be high-level categories like "authentication", "database", "optimization", etc. You MUST output valid JSON only with this exact structure: { "keywords": [{"keyword": "example", "weight": 0.9}], "concepts": [{"concept": "category", "confidence": 0.8}], "summary": "brief summary in English" } No additional text, only the JSON object.`; try { const result = await this.callClaude(prompt, text); let cleanedResult = result; if (result.includes('```json')) { cleanedResult = result.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); } const parsed = JSON.parse(cleanedResult); if (!parsed.keywords || !Array.isArray(parsed.keywords) || parsed.keywords.length === 0) { return this.fallbackExtraction(text); } const embedding = this.generateEmbedding(parsed.keywords); const keywords = parsed.keywords.map((k) => ({ keyword: k.keyword || k.word, weight: k.weight })); return { keywords: keywords.slice(0, 15), concepts: parsed.concepts || [], embedding: this.quantizeEmbedding(embedding), summary: parsed.summary || `${text.substring(0, 200)}...`, searchIndex: text.substring(0, 500) }; } catch { return this.fallbackExtraction(text); } } quantizeEmbedding(embedding) { const quantized = new Uint8Array(embedding.length); for (let i = 0; i < embedding.length; i++) { const clamped = Math.max(-1, Math.min(1, embedding[i])); quantized[i] = Math.round((clamped + 1) * 127.5); } return Buffer.from(quantized); } generateEmbedding(keywords) { const embedding = new Array(128).fill(0); for (let i = 0; i < keywords.length && i < 10; i++) { const word = (keywords[i].keyword || keywords[i].word || '').toLowerCase(); const weight = keywords[i].weight; let hash = 0; for (let j = 0; j < word.length; j++) { hash = ((hash << 5) - hash + word.charCodeAt(j)) & 0xffffffff; } for (let dim = 0; dim < 8; dim++) { const index = (Math.abs(hash) + dim * 16) % 128; embedding[index] += weight * (0.5 + 0.5 * Math.sin(hash + dim)); } } const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0)); if (magnitude > 0) { for (let i = 0; i < embedding.length; i++) { embedding[i] /= magnitude; } } return embedding; } fallbackExtraction(text) { const words = text.toLowerCase().match(/\b\w{3,}\b/g) || []; const wordFreq = new Map(); words.forEach(word => { wordFreq.set(word, (wordFreq.get(word) || 0) + 1); }); const keywords = Array.from(wordFreq.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([word, freq]) => ({ keyword: word, weight: Math.min(freq / words.length * 10, 1.0) })); if (keywords.length === 0) { keywords.push({ keyword: 'content', weight: 0.5 }); } const embedding = this.generateEmbedding(keywords); return { keywords, concepts: [], embedding: this.quantizeEmbedding(embedding), summary: text.length > 200 ? `${text.substring(0, 200)}...` : (text || 'No content available'), searchIndex: text.substring(0, 500) }; } }