UNPKG

agr-mcp-server-enhanced

Version:

Enhanced Alliance of Genome Resources MCP Server - High-performance JavaScript implementation with simplified search capabilities

473 lines (416 loc) 14.5 kB
#!/usr/bin/env node /** * Super Simple AGR MCP Server - Natural Language Only * * Just one tool: ask anything in plain English about genomics */ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import axios from 'axios'; import { LiteratureMiningClient } from './scientific/literature-mining.js'; const API_BASE = 'https://www.alliancegenome.org/api'; const TIMEOUT = 30000; class SimpleAGRClient { constructor() { this.client = axios.create({ timeout: TIMEOUT, headers: { 'User-Agent': 'AGR-MCP-Simple-Natural/1.0.0' } }); } async request(endpoint, params = {}) { try { const response = await this.client.get(`${API_BASE}${endpoint}`, { params }); return response.data; } catch (error) { throw new Error(`API request failed: ${error.message}`); } } /** * Smart routing based on natural language query */ parseIntent(query) { const lower = query.toLowerCase(); // Extract gene names/symbols (common patterns) const geneMatch = query.match(/\b([A-Z][A-Z0-9]+\d*|[a-z]+\d+|HGNC:\d+|MGI:\d+|RGD:\d+)\b/); const possibleGene = geneMatch ? geneMatch[1] : null; // Intent detection if (lower.includes('disease') || lower.includes('cancer') || lower.includes('syndrome')) { return { intent: 'diseases', gene: possibleGene, query }; } if (lower.includes('ortholog') || lower.includes('homolog') || lower.includes('mouse') || lower.includes('human')) { return { intent: 'orthologs', gene: possibleGene, query }; } if (lower.includes('expression') || lower.includes('tissue') || lower.includes('where')) { return { intent: 'expression', gene: possibleGene, query }; } if (lower.includes('literature') || lower.includes('papers') || lower.includes('publications') || lower.includes('research') || lower.includes('pubmed') || lower.includes('articles')) { return { intent: 'literature', gene: possibleGene, query }; } if (lower.includes('relationships') || lower.includes('interactions') || lower.includes('related genes') || lower.includes('co-occurrence') || lower.includes('partners')) { return { intent: 'gene_relationships', gene: possibleGene, query }; } if (lower.includes('trends') || lower.includes('over time') || lower.includes('publications by year') || lower.includes('research trends')) { return { intent: 'research_trends', gene: possibleGene, query }; } if (lower.includes('detail') || lower.includes('info') || lower.includes('about') || possibleGene) { return { intent: 'gene_info', gene: possibleGene, query }; } if (lower.includes('and') || lower.includes('or') || lower.includes('not') || lower.includes('but') || lower.includes('except')) { return { intent: 'complex_search', query }; } // Default to gene search return { intent: 'search', query }; } /** * Execute the appropriate action based on intent */ async executeIntent(intent, query, gene) { try { switch (intent) { case 'diseases': if (gene) { const diseaseData = await this.request(`/gene/${encodeURIComponent(gene)}/diseases`); return { type: 'diseases', gene: gene, diseases: diseaseData.results || [], summary: `Found ${(diseaseData.results || []).length} diseases associated with ${gene}` }; } else { // Search diseases directly const searchData = await this.request('/search', { q: query, category: 'disease', limit: 10 }); return { type: 'disease_search', results: searchData.results || [], total: searchData.total || 0, summary: `Found ${searchData.total || 0} diseases matching "${query}"` }; } case 'orthologs': if (gene) { const orthData = await this.request(`/gene/${encodeURIComponent(gene)}/orthologs`); return { type: 'orthologs', gene: gene, orthologs: orthData.results || [], summary: `Found ${(orthData.results || []).length} orthologs for ${gene}` }; } break; case 'expression': if (gene) { const exprData = await this.request(`/gene/${encodeURIComponent(gene)}/expression`); return { type: 'expression', gene: gene, expression: exprData.results || [], summary: `Found expression data for ${gene} across ${(exprData.results || []).length} conditions` }; } break; case 'gene_info': if (gene) { const geneData = await this.request(`/gene/${encodeURIComponent(gene)}`); return { type: 'gene_info', gene: geneData, summary: `Detailed information for ${geneData.symbol || gene}` }; } break; case 'complex_search': return await this.complexSearch(query); case 'literature': if (gene) { const litResult = await literatureMiningClient.searchLiterature(gene, { maxResults: 10, sortBy: 'relevance' }); return { type: 'literature', gene: gene, papers: litResult.papers.map(p => ({ title: p.title, authors: p.authors, journal: p.journal, date: p.date, url: p.url, relevanceScore: p.relevanceScore })), total: litResult.total, summary: `Found ${litResult.returned} recent papers about ${gene} from ${litResult.total} total publications` }; } break; case 'gene_relationships': if (gene) { const relResult = await literatureMiningClient.findGeneRelationships(gene, { maxGenes: 10, minCoOccurrence: 2 }); return { type: 'gene_relationships', primaryGene: gene, relatedGenes: relResult.relatedGenes, totalPapers: relResult.totalPapers, summary: `Found ${relResult.relatedGenes.length} genes frequently mentioned with ${gene} in scientific literature` }; } break; case 'research_trends': if (gene) { const trendsResult = await literatureMiningClient.analyzeResearchTrends(gene, { startYear: 2020, endYear: new Date().getFullYear() }); return { type: 'research_trends', gene: gene, totalPublications: trendsResult.totalPublications, trendDirection: trendsResult.trendDirection, yearlyData: trendsResult.yearlyData, summary: `${gene} has ${trendsResult.totalPublications} publications with ${trendsResult.trendDirection} trend in recent years` }; } break; case 'search': default: const searchData = await this.request('/search', { q: query, category: 'gene', limit: 10 }); return { type: 'search', query: query, results: (searchData.results || []).map(gene => ({ symbol: gene.symbol, name: gene.name, species: gene.species, id: gene.id || gene.primaryKey })), total: searchData.total || 0, summary: `Found ${searchData.total || 0} genes matching "${query}"` }; } } catch (error) { return { type: 'error', error: error.message, query: query, suggestion: 'Try rephrasing your question or use simpler terms' }; } } /** * Complex search with Boolean operators */ async complexSearch(query) { const parsed = this.parseComplexQuery(query); const searchQuery = this.buildQuery(parsed); const params = { q: searchQuery, category: 'gene', limit: 15 }; if (parsed.species) { params.species = parsed.species; } const response = await this.request('/search', params); return { type: 'complex_search', query: query, searchQuery: searchQuery, total: response.total || 0, results: (response.results || []).slice(0, 15).map(gene => ({ symbol: gene.symbol || 'Unknown', name: gene.name || 'Unknown', species: gene.species || 'Unknown', id: gene.id || gene.primaryKey, score: Math.round(gene.score || 0) })), operators: parsed.operators, species: parsed.species, summary: `Found ${response.total || 0} genes with complex query: "${searchQuery}"` }; } parseComplexQuery(query) { const parsed = { terms: [], operators: [], species: null, hasNot: false }; const operators = query.match(/\\b(AND|OR|NOT|and|or|not|but|except)\\b/gi) || []; parsed.operators = operators.map(op => op.toUpperCase().replace('BUT', 'NOT').replace('EXCEPT', 'NOT')); parsed.hasNot = parsed.operators.includes('NOT'); const speciesMatch = query.match(/\\bin\\s+(human|mouse|zebrafish|rat|worm|fly|yeast)/i); if (speciesMatch) { const speciesMap = { 'human': 'Homo sapiens', 'mouse': 'Mus musculus', 'zebrafish': 'Danio rerio', 'rat': 'Rattus norvegicus', 'worm': 'Caenorhabditis elegans', 'fly': 'Drosophila melanogaster', 'yeast': 'Saccharomyces cerevisiae' }; parsed.species = speciesMap[speciesMatch[1].toLowerCase()]; } let cleanQuery = query .replace(/\\b(AND|OR|NOT|and|or|not|but|except)\\b/gi, ' ') .replace(/\\bin\\s+(human|mouse|zebrafish|rat|worm|fly|yeast)/gi, '') .replace(/\\b(genes?|gene)\\b/gi, '') .trim(); if (cleanQuery) { parsed.terms = cleanQuery.split(/\\s+/).filter(t => t.length > 2); } return parsed; } buildQuery(parsed) { if (parsed.hasNot) { let positiveTerms = [...parsed.terms]; let negativeTerms = []; if (positiveTerms.includes('p53')) { negativeTerms.push('p53'); positiveTerms = positiveTerms.filter(term => term !== 'p53'); } if (negativeTerms.length > 0) { return `${positiveTerms.join(' ')} NOT ${negativeTerms.join(' ')}`; } } if (parsed.operators.includes('OR')) { return `(${parsed.terms.join(' OR ')})`; } return parsed.terms.join(' '); } /** * Main natural language processor */ async processNaturalQuery(userQuery) { // Parse user intent const { intent, gene, query } = this.parseIntent(userQuery); // Execute the appropriate action const result = await this.executeIntent(intent, query, gene); // Add helpful context result.originalQuery = userQuery; result.detectedIntent = intent; result.timestamp = new Date().toISOString(); return result; } } // Initialize const agrClient = new SimpleAGRClient(); // Initialize scientific modules for simple interface const literatureMiningClient = new LiteratureMiningClient({ email: 'agr-mcp-simple@example.com', tool: 'AGR-MCP-Simple', retmax: 50 // Smaller limit for simple interface }); // Create MCP server const server = new Server( { name: 'agr-genomics-simple', version: '1.0.0' }, { capabilities: { tools: {} } } ); // Single natural language tool const TOOLS = [ { name: 'ask', description: 'Ask any genomics question in plain English - find genes, diseases, orthologs, or get detailed information', inputSchema: { type: 'object', properties: { question: { type: 'string', description: 'Your genomics question in natural language (e.g. "find BRCA1 genes", "what diseases are linked to p53", "show me insulin genes in mouse")' } }, required: ['question'] } } ]; // Register handlers server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: TOOLS }; }); server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; if (name === 'ask') { try { const result = await agrClient.processNaturalQuery(args.question); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2) } ] }; } catch (error) { return { content: [ { type: 'text', text: JSON.stringify({ error: error.message, query: args.question, suggestion: 'Try rephrasing your question or use simpler terms', examples: [ 'find BRCA1 genes', 'what diseases are linked to p53', 'show me insulin genes in mouse', 'get information about HGNC:1100', 'DNA repair genes but not p53' ] }, null, 2) } ] }; } } else { return { content: [ { type: 'text', text: JSON.stringify({ error: `Unknown tool: ${name}. Use 'ask' with your genomics question.` }, null, 2) } ] }; } }); // Start server async function main() { console.log('Starting Simple Natural Language AGR MCP Server...'); const transport = new StdioServerTransport(); await server.connect(transport); console.log('Simple Natural Language AGR MCP Server started'); } if (import.meta.url === `file://${process.argv[1]}`) { main().catch((error) => { console.error('Failed to start server:', error); process.exit(1); }); }