@shirokuma-library/mcp-knowledge-base
Version:
MCP server for AI-powered knowledge management with semantic search, graph analysis, and automatic enrichment
147 lines (144 loc) • 5.97 kB
JavaScript
import { spawn } from 'child_process';
export class ClaudeInterface {
async callClaude(prompt, input) {
return new Promise((resolve) => {
try {
const claudeProcess = spawn('claude', ['--model', 'sonnet', prompt], {
env: { ...process.env },
timeout: 30000
});
let stdout = '';
let stderr = '';
claudeProcess.stdin.write(input);
claudeProcess.stdin.end();
claudeProcess.stdout.on('data', (data) => {
stdout += data.toString();
});
claudeProcess.stderr.on('data', (data) => {
stderr += data.toString();
});
claudeProcess.on('close', (code) => {
if (code === 0 && stdout) {
resolve(stdout.trim());
}
else {
resolve('{}');
}
});
claudeProcess.on('error', () => {
resolve('{}');
});
}
catch {
resolve('{}');
}
});
}
async extractWeightedKeywords(content) {
const text = `${content.title} ${content.description} ${content.content}`.trim();
if (!text) {
return this.fallbackExtraction('');
}
const prompt = `Analyze this text and extract important keywords.
Rules:
1. Extract keywords in ENGLISH whenever possible (translate common concepts to English)
2. Break down compound words and technical terms:
- "GraphDB" → extract both "graph" and "database" as separate keywords
- "GraphRAG" → extract "graph", "rag", "retrieval"
- "MLOps" → extract "ml", "machine learning", "ops", "operations"
- "TensorFlow" → extract "tensor", "flow", "tensorflow"
3. Normalize to base/singular forms (e.g., "running" -> "run", "databases" -> "database")
4. Include both the original compound term AND its components as keywords
5. Assign weights: compound terms get 0.6-1.0, component parts get 0.4-0.8
6. Maximum 20 keywords total
7. Concepts should be high-level categories like "authentication", "database", "optimization", etc.
You MUST output valid JSON only with this exact structure:
{
"keywords": [{"keyword": "example", "weight": 0.9}],
"concepts": [{"concept": "category", "confidence": 0.8}],
"summary": "brief summary in English"
}
No additional text, only the JSON object.`;
try {
const result = await this.callClaude(prompt, text);
let cleanedResult = result;
if (result.includes('```json')) {
cleanedResult = result.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
}
const parsed = JSON.parse(cleanedResult);
if (!parsed.keywords || !Array.isArray(parsed.keywords) || parsed.keywords.length === 0) {
return this.fallbackExtraction(text);
}
const embedding = this.generateEmbedding(parsed.keywords);
const keywords = parsed.keywords.map((k) => ({
keyword: k.keyword || k.word,
weight: k.weight
}));
return {
keywords: keywords.slice(0, 15),
concepts: parsed.concepts || [],
embedding: this.quantizeEmbedding(embedding),
summary: parsed.summary || `${text.substring(0, 200)}...`,
searchIndex: text.substring(0, 500)
};
}
catch {
return this.fallbackExtraction(text);
}
}
quantizeEmbedding(embedding) {
const quantized = new Uint8Array(embedding.length);
for (let i = 0; i < embedding.length; i++) {
const clamped = Math.max(-1, Math.min(1, embedding[i]));
quantized[i] = Math.round((clamped + 1) * 127.5);
}
return Buffer.from(quantized);
}
generateEmbedding(keywords) {
const embedding = new Array(128).fill(0);
for (let i = 0; i < keywords.length && i < 10; i++) {
const word = (keywords[i].keyword || keywords[i].word || '').toLowerCase();
const weight = keywords[i].weight;
let hash = 0;
for (let j = 0; j < word.length; j++) {
hash = ((hash << 5) - hash + word.charCodeAt(j)) & 0xffffffff;
}
for (let dim = 0; dim < 8; dim++) {
const index = (Math.abs(hash) + dim * 16) % 128;
embedding[index] += weight * (0.5 + 0.5 * Math.sin(hash + dim));
}
}
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
if (magnitude > 0) {
for (let i = 0; i < embedding.length; i++) {
embedding[i] /= magnitude;
}
}
return embedding;
}
fallbackExtraction(text) {
const words = text.toLowerCase().match(/\b\w{3,}\b/g) || [];
const wordFreq = new Map();
words.forEach(word => {
wordFreq.set(word, (wordFreq.get(word) || 0) + 1);
});
const keywords = Array.from(wordFreq.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([word, freq]) => ({
keyword: word,
weight: Math.min(freq / words.length * 10, 1.0)
}));
if (keywords.length === 0) {
keywords.push({ keyword: 'content', weight: 0.5 });
}
const embedding = this.generateEmbedding(keywords);
return {
keywords,
concepts: [],
embedding: this.quantizeEmbedding(embedding),
summary: text.length > 200 ? `${text.substring(0, 200)}...` : (text || 'No content available'),
searchIndex: text.substring(0, 500)
};
}
}