UNPKG

mcp-magma-handbook

Version:

Enhanced MCP server with multi-query search, hybrid search, and collections for MAGMA computational algebra system

519 lines 21.4 kB
// Polyfill fetch for Node.js import fetch from 'node-fetch'; // @ts-ignore global.fetch = fetch; import { createClient } from '@supabase/supabase-js'; import { OpenAIEmbeddings } from '@langchain/openai'; import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; export class AdvancedMagmaKnowledgeBase { supabase; embeddings; textSplitter; isInitialized = false; // Cache for frequent queries queryCache = new Map(); CACHE_TTL = 1000 * 60 * 60; // 1 hour constructor() { const supabaseUrl = process.env.SUPABASE_URL; const supabaseKey = process.env.SUPABASE_KEY; if (!supabaseUrl || !supabaseKey) { throw new Error('SUPABASE_URL and SUPABASE_KEY must be set'); } this.supabase = createClient(supabaseUrl, supabaseKey); this.embeddings = new OpenAIEmbeddings({ modelName: 'text-embedding-3-small', // Supabase 호환성을 위해 small 사용 dimensions: 1536, }); // Advanced text splitter for better context preservation this.textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 2000, chunkOverlap: 400, separators: [ '\n\n\n\n', // Chapter breaks '\n\n\n', // Section breaks '\n\n', // Paragraph breaks '\nExample ', // Example sections '\n>', // MAGMA command prompts '\nFunction ', // Function definitions '\nIntrinsic ', // Intrinsic definitions ';', // MAGMA statement ends '\n', // Line breaks '.', // Sentence ends ' ', // Word breaks '' ], }); } async initialize() { if (this.isInitialized) return; try { // Test connection with new schema const { error } = await this.supabase .from('magma_documents_v2') .select('id') .limit(1); if (error) { console.error('V2 schema not found. Please run schema-v2.sql first.'); console.error('Error:', error.message); throw new Error('Database schema v2 not initialized. Please run supabase/schema-v2.sql'); } // Check document count const { count } = await this.supabase .from('magma_documents_v2') .select('*', { count: 'exact', head: true }); if (count === 0) { console.error('No documents in v2 database. Please run advanced indexing.'); } else { console.error(`Loaded advanced database with ${count} documents`); } this.isInitialized = true; } catch (error) { console.error('Failed to initialize advanced knowledge base:', error); throw error; } } async indexPDF(pdfPath) { console.log(`Advanced indexing PDF: ${pdfPath}`); // Load and process PDF const loader = new PDFLoader(pdfPath); const docs = await loader.load(); // Split documents with enhanced chunking const splitDocs = await this.textSplitter.splitDocuments(docs); // Generate embeddings in batches const batchSize = 50; const allProcessedDocs = []; for (let i = 0; i < splitDocs.length; i += batchSize) { const batch = splitDocs.slice(i, i + batchSize); const contents = batch.map(doc => this.cleanText(doc.pageContent)); console.log(`Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(splitDocs.length / batchSize)}`); // Generate embeddings for batch const embeddings = await this.embeddings.embedDocuments(contents); // Process documents with enhanced metadata const processedBatch = batch.map((doc, batchIndex) => { const cleanContent = contents[batchIndex]; const enhanced = this.enhanceMetadata(doc, cleanContent); return { content: doc.pageContent, content_clean: cleanContent, embedding: embeddings[batchIndex], metadata: enhanced.metadata, chapter: enhanced.chapter, section: enhanced.section, category: enhanced.category, has_code: enhanced.hasCode, has_example: enhanced.hasExample, word_count: enhanced.wordCount, }; }); allProcessedDocs.push(...processedBatch); } // Insert in batches const insertBatchSize = 25; for (let i = 0; i < allProcessedDocs.length; i += insertBatchSize) { const insertBatch = allProcessedDocs.slice(i, i + insertBatchSize); const { error } = await this.supabase .from('magma_documents_v2') .insert(insertBatch); if (error) { console.error('Error inserting batch:', error); throw error; } console.log(`Inserted batch ${Math.floor(i / insertBatchSize) + 1}/${Math.ceil(allProcessedDocs.length / insertBatchSize)}`); } // Extract and index functions await this.extractAndIndexFunctions(allProcessedDocs); console.log(`Advanced indexing completed: ${allProcessedDocs.length} documents`); } cleanText(content) { return content .replace(/\s+/g, ' ') // Normalize whitespace .replace(/[^\w\s\-\+\*\/\(\)\[\]{}.:;,]/g, ' ') // Remove special chars .replace(/\d{4,}/g, ' ') // Remove long numbers .trim(); } enhanceMetadata(doc, cleanContent) { const content = doc.pageContent; const lowerContent = content.toLowerCase(); const words = cleanContent.split(' ').filter(w => w.length > 2); // Extract chapter and section info const chapterMatch = content.match(/Chapter\s+(\d+[\w\s]*)/i) || content.match(/Ch\.\s*(\d+[\w\s]*)/i); const sectionMatch = content.match(/(\d+\.\d+(?:\.\d+)?)\s+([A-Z][^.]*)/); // Detect code and examples const hasCode = /^>\s+/m.test(content) || /intrinsic|procedure|function/i.test(content); const hasExample = /example\s+h\d+e\d+/i.test(content) || /^>\s+[A-Z]/m.test(content); return { metadata: { ...doc.metadata, chapter: chapterMatch?.[1]?.trim(), section: sectionMatch?.[0]?.trim(), hasCode, hasExample, wordCount: words.length, indexed_at: new Date().toISOString(), }, chapter: chapterMatch?.[1]?.trim() || 'Unknown', section: sectionMatch?.[0]?.trim() || '', category: this.categorizeContent(content), hasCode, hasExample, wordCount: words.length, }; } categorizeContent(content) { const lowerContent = content.toLowerCase(); // Advanced pattern matching for categories const patterns = { function: [ /intrinsic\s+[A-Z][a-zA-Z]*\s*\(/, /procedure\s+[A-Z][a-zA-Z]*\s*\(/, /function\s+[A-Z][a-zA-Z]*\s*\(/, /^[A-Z][a-zA-Z]*\s*\([^)]*\)\s*$/m, ], example: [ /example\s+h\d+e\d+/i, /^>\s+[A-Z]/m, /^> /m, ], syntax: [ /::=/, /grammar/i, /syntax/i, /<[a-z-]+>/, ], algorithm: [ /algorithm/i, /procedure/i, /method/i, /step\s+\d+/i, ], theory: [ /theorem/i, /lemma/i, /proposition/i, /proof/i, /corollary/i, ] }; for (const [category, patternList] of Object.entries(patterns)) { if (patternList.some(pattern => pattern.test(content))) { return category; } } return 'general'; } async extractAndIndexFunctions(documents) { console.log('Extracting function definitions...'); const functions = []; for (const doc of documents) { const functionMatches = this.extractFunctionDefinitions(doc.content); for (const func of functionMatches) { functions.push({ function_name: func.name, function_signature: func.signature, description: func.description, category: func.category, chapter: doc.chapter, usage_examples: func.examples, related_functions: func.related, document_id: null, // Will be set after document insertion }); } } if (functions.length > 0) { const { error } = await this.supabase .from('magma_functions') .insert(functions); if (error) { console.error('Error inserting functions:', error); } else { console.log(`Indexed ${functions.length} function definitions`); } } } extractFunctionDefinitions(content) { const functions = []; // Pattern for intrinsic definitions const intrinsicPattern = /intrinsic\s+([A-Z][a-zA-Z]*)\s*\(([^)]*)\)\s*->?\s*([^{]*)\s*\{([^}]*)\}/gi; let match; while ((match = intrinsicPattern.exec(content)) !== null) { functions.push({ name: match[1], signature: `${match[1]}(${match[2]})`, description: match[4].trim(), category: 'intrinsic', examples: [], related: [], }); } // Pattern for function calls in examples const examplePattern = />\s+([A-Z][a-zA-Z]*)\s*\([^)]*\)/g; let exampleMatch; while ((exampleMatch = examplePattern.exec(content)) !== null) { const functionName = exampleMatch[1]; const fullMatch = exampleMatch[0]; if (functionName && !functions.some(f => f.name === functionName)) { functions.push({ name: functionName, signature: fullMatch.replace('> ', ''), description: 'Function found in examples', category: 'function', examples: [fullMatch], related: [], }); } } return functions; } // Hybrid search combining vector similarity and BM25 async hybridSearch(query, limit = 5, category = 'all', vectorWeight = 0.6, // BM25를 더 중요하게 (코딩 이론에서는 키워드가 중요) bm25Weight = 0.4) { const cacheKey = `hybrid:${query}:${limit}:${category}:${vectorWeight}:${bm25Weight}`; // Check cache first if (this.queryCache.has(cacheKey)) { const cached = this.queryCache.get(cacheKey); if (Date.now() - cached.timestamp < this.CACHE_TTL) { return cached.results; } } // Generate query embedding const queryEmbedding = await this.embeddings.embedQuery(query); // Call hybrid search function const { data, error } = await this.supabase.rpc('search_magma_hybrid', { query_text: query, query_embedding: queryEmbedding, similarity_threshold: 0.4, bm25_weight: bm25Weight, vector_weight: vectorWeight, match_count: limit, category_filter: category === 'all' ? null : category }); if (error) { console.error('Hybrid search error:', error); throw error; } const results = data.map((row) => ({ content: row.content, metadata: row.metadata, score: row.combined_score, vectorSimilarity: row.vector_similarity, bm25Score: row.bm25_score, rank: row.rank, })); // Cache results this.queryCache.set(cacheKey, { results, timestamp: Date.now(), }); return results; } // Function-specific search with fuzzy matching async searchFunctions(functionQuery, limit = 10) { const { data, error } = await this.supabase.rpc('search_magma_functions', { function_query: functionQuery, similarity_threshold: 0.3, match_count: limit }); if (error) { console.error('Function search error:', error); throw error; } return data || []; } // Enhanced search with query expansion async enhancedSearch(query, limit = 5, category = 'all') { // Expand query with synonyms and related terms const expandedQuery = this.expandQuery(query); // Perform hybrid search const results = await this.hybridSearch(expandedQuery, limit * 2, category); // Re-rank results based on additional criteria const rerankedResults = this.rerankResults(results, query); return rerankedResults.slice(0, limit); } expandQuery(query) { const synonyms = { // 코딩 이론 확장 'hamming': ['error', 'correction', 'linear', 'code', 'generator'], 'reed': ['solomon', 'polynomial', 'evaluation', 'error'], 'bch': ['cyclic', 'polynomial', 'primitive', 'code'], 'code': ['algorithm', 'implementation', 'function', 'linear', 'block'], 'generator': ['matrix', 'basis', 'span', 'linear'], 'matrix': ['linear', 'transformation', 'operator', 'generator'], // 군론 확장 'group': ['algebra', 'structure', 'set', 'permutation', 'symmetric'], 'permutation': ['symmetric', 'alternating', 'cycle', 'transposition'], 'sylow': ['subgroup', 'theorem', 'prime', 'power'], // 체론 확장 'field': ['ring', 'domain', 'arithmetic', 'finite', 'galois'], 'finite': ['field', 'galois', 'primitive', 'polynomial'], 'polynomial': ['expression', 'equation', 'formula', 'irreducible'], // 타원곡선 확장 'elliptic': ['curve', 'point', 'addition', 'weierstrass', 'jacobian'], 'curve': ['elliptic', 'algebraic', 'geometry', 'point', 'rational'], }; let expanded = query; const words = query.toLowerCase().split(' '); for (const word of words) { if (synonyms[word]) { expanded += ' ' + synonyms[word].join(' '); } } return expanded; } rerankResults(results, originalQuery) { return results.map(result => { let bonusScore = 0; // Bonus for exact function name matches const queryWords = originalQuery.toLowerCase().split(' '); for (const word of queryWords) { if (result.content.includes(word + '(')) { bonusScore += 0.1; } } // Bonus for code examples if (result.metadata.hasCode) { bonusScore += 0.05; } // Bonus for examples if (result.metadata.hasExample) { bonusScore += 0.03; } return { ...result, score: result.score + bonusScore, }; }).sort((a, b) => b.score - a.score); } // Backwards compatibility async search(query, limit = 5, category = 'all') { return this.enhancedSearch(query, limit, category); } async getExamples(topic, complexity = 'basic') { const results = await this.hybridSearch(`${topic} example code`, 10, 'example'); return results .map(result => this.extractExampleFromContent(result, topic)) .filter(Boolean) .filter(ex => complexity === 'all' || ex?.complexity === complexity); } async explainCode(code, context) { const functions = this.extractFunctions(code); // Search for function documentation const functionSearches = await Promise.all(functions.map(func => this.searchFunctions(func))); const functionDocs = functionSearches.flat(); // Search for contextual information const contextResults = await this.hybridSearch(`${code} ${context || ''}`, 5, 'function'); return this.buildCodeExplanation(code, functionDocs, contextResults, context); } extractExampleFromContent(result, topic) { // Implementation for extracting examples (similar to previous version) const codeMatches = result.content.match(/```magma([\s\S]*?)```|>(.*?)$/gm); if (codeMatches) { return { title: this.extractTitle(result.content), code: this.cleanMagmaCode(codeMatches[0]), explanation: this.extractExplanation(result.content), complexity: this.assessComplexity(codeMatches[0]), source: result.metadata.source, page: result.metadata.page, }; } return null; } extractFunctions(code) { const functionPattern = /([A-Z][a-zA-Z0-9]*)\s*\(/g; const matches = code.match(functionPattern) || []; return [...new Set(matches.map(m => m.replace('(', '').trim()))]; } buildCodeExplanation(code, functionDocs, contextResults, context) { let explanation = '# MAGMA Code Explanation\n\n'; explanation += '## Code:\n```magma\n' + code + '\n```\n\n'; if (context) { explanation += `## Context:\n${context}\n\n`; } explanation += '## Analysis:\n'; const lines = code.split('\n').filter(line => line.trim()); for (const line of lines) { if (line.trim().startsWith('//')) continue; const analysis = this.analyzeLine(line, functionDocs); if (analysis) { explanation += `- ${analysis}\n`; } } if (functionDocs.length > 0) { explanation += '\n## Function Documentation:\n'; const uniqueFunctions = [...new Set(functionDocs.map(f => f.function_name))]; for (const func of uniqueFunctions.slice(0, 5)) { const funcDoc = functionDocs.find(f => f.function_name === func); if (funcDoc) { explanation += `\n### ${func}\n`; explanation += `**Signature**: \`${funcDoc.function_signature}\`\n`; explanation += `**Description**: ${funcDoc.description}\n`; } } } return explanation; } analyzeLine(line, functionDocs) { const trimmed = line.trim(); if (trimmed.includes(':=')) { const [varName, value] = trimmed.split(':=').map(s => s.trim()); return `Assigns \`${value}\` to variable \`${varName}\``; } const funcMatch = trimmed.match(/([A-Z][a-zA-Z0-9]*)\s*\(/); if (funcMatch) { const funcName = funcMatch[1]; const funcDoc = functionDocs.find(f => f.function_name === funcName); if (funcDoc) { return `Calls \`${funcName}\`: ${funcDoc.description.substring(0, 100)}...`; } return `Calls function \`${funcName}\``; } return null; } extractTitle(content) { const lines = content.split('\n'); for (const line of lines) { if (line.trim() && !line.startsWith('>') && line.length < 100) { return line.trim(); } } return 'MAGMA Example'; } cleanMagmaCode(code) { return code .replace(/```magma/g, '') .replace(/```/g, '') .replace(/^>\s*/gm, '') .trim(); } extractExplanation(content) { const parts = content.split(/```magma[\s\S]*?```|>.*$/gm); return parts .map(p => p.trim()) .filter(p => p.length > 20) .join(' ') .substring(0, 200); } assessComplexity(code) { const lines = code.split('\n').length; const hasLoops = /for|while/.test(code); const hasFunctions = /function|procedure/.test(code); const complexFunctions = /Factorization|IsIrreducible|GaloisGroup/.test(code); if (complexFunctions || (hasLoops && hasFunctions) || lines > 20) { return 'advanced'; } else if (hasLoops || hasFunctions || lines > 10) { return 'intermediate'; } return 'basic'; } } //# sourceMappingURL=advanced-knowledge-base.js.map