agr-mcp-server-enhanced
Version:
Enhanced Alliance of Genome Resources MCP Server - High-performance JavaScript implementation with simplified search capabilities
1,726 lines (1,569 loc) • 62.5 kB
JavaScript
#!/usr/bin/env node
/**
* Enhanced Alliance of Genome Resources (AGR) MCP Server - JavaScript Implementation
*
* A high-performance, modern JavaScript implementation of the AGR MCP server
* with enhanced features, better error handling, caching, and TypeScript-style documentation.
*
* Improvements over Python version:
* - Modern async/await with better error handling
* - Intelligent caching system for API responses
* - Rate limiting and connection pooling
* - Enhanced logging with structured output
* - Flexible configuration system
* - Better input validation
* - Performance optimizations
* - TypeScript-style JSDoc documentation
*
* @author Genomics Team
* @version 3.0.0
*/
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema
} from '@modelcontextprotocol/sdk/types.js';
import axios from 'axios';
import NodeCache from 'node-cache';
import pino from 'pino';
import { LiteratureMiningClient } from './scientific/literature-mining.js';
import { PhylogeneticAnalysisClient } from './scientific/phylogenetic-analysis.js';
import { PathwayAnalysisClient } from './scientific/pathway-analysis.js';
import { VariantAnalysisClient } from './scientific/variant-analysis.js';
import { DrugGeneInteractionsClient } from './scientific/drug-gene-interactions.js';
import { ProteinStructureClient } from './scientific/protein-structure.js';
import { GeneExpressionClient } from './scientific/gene-expression.js';
import { FunctionalEnrichmentClient } from './scientific/functional-enrichment.js';
import { ScientificNLPProcessor } from './nlp/scientific-nlp-processor.js';
// Enhanced configuration
const CONFIG = {
// API endpoints
endpoints: {
base: 'https://www.alliancegenome.org/api',
blast: 'https://blast.alliancegenome.org',
fms: 'https://fms.alliancegenome.org/api',
jbrowse: 'https://jbrowse.alliancegenome.org',
textpresso: 'https://textpresso.alliancegenome.org',
alliancemine: 'https://www.alliancegenome.org/alliancemine'
},
// Performance settings
timeout: 30000,
maxRetries: 3,
retryDelay: 1000,
// Caching configuration
cache: {
ttl: 300, // 5 minutes default TTL
checkperiod: 60, // Check for expired keys every minute
maxKeys: 1000,
useClones: false // Better performance for read-heavy operations
},
// Rate limiting
rateLimit: {
windowMs: 60000, // 1 minute
maxRequests: 100
},
// Logging
logging: {
level: process.env.LOG_LEVEL || 'info',
transport: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard'
}
}
}
};
// Initialize logger
const logger = pino(CONFIG.logging);
// Initialize cache
const cache = new NodeCache(CONFIG.cache);
// Rate limiting map
const rateLimitMap = new Map();
/**
* Enhanced AGR Client with caching, rate limiting, and robust error handling
*/
class EnhancedAGRClient {
constructor() {
// Create axios instance with optimized settings
this.client = axios.create({
timeout: CONFIG.timeout,
headers: {
'User-Agent': 'AGR-MCP-Server-JS/3.0.0',
Accept: 'application/json',
'Content-Type': 'application/json'
},
// Connection pooling
maxRedirects: 3,
validateStatus: (status) => status < 500 // Only retry on 5xx errors
});
// Add request interceptor for logging
this.client.interceptors.request.use(
(config) => {
logger.debug({ url: config.url, method: config.method }, 'Making API request');
return config;
},
(error) => {
logger.error({ error: error.message }, 'Request interceptor error');
return Promise.reject(error);
}
);
// Add response interceptor for error handling
this.client.interceptors.response.use(
(response) => {
logger.debug({
url: response.config.url,
status: response.status,
size: JSON.stringify(response.data).length
}, 'API response received');
return response;
},
(error) => {
logger.error({
url: error.config?.url,
status: error.response?.status,
message: error.message
}, 'API request failed');
return Promise.reject(error);
}
);
}
/**
* Check rate limit for API calls
* @param {string} endpoint - The endpoint being called
* @returns {boolean} - Whether the request is within rate limits
*/
checkRateLimit(endpoint) {
const now = Date.now();
const windowStart = now - CONFIG.rateLimit.windowMs;
if (!rateLimitMap.has(endpoint)) {
rateLimitMap.set(endpoint, []);
}
const requests = rateLimitMap.get(endpoint);
// Remove old requests outside the window
const validRequests = requests.filter(time => time > windowStart);
if (validRequests.length >= CONFIG.rateLimit.maxRequests) {
logger.warn({ endpoint }, 'Rate limit exceeded');
return false;
}
validRequests.push(now);
rateLimitMap.set(endpoint, validRequests);
return true;
}
/**
* Make HTTP request with caching, retry logic, and rate limiting
* @param {string} endpoint - API endpoint
* @param {Object} options - Request options
* @returns {Promise<Object>} - API response data
*/
async makeRequest(endpoint, options = {}) {
const {
params = {},
baseURL = CONFIG.endpoints.base,
method = 'GET',
cacheKey = null,
cacheTTL = CONFIG.cache.ttl
} = options;
// Generate cache key if not provided
const finalCacheKey = cacheKey || `${method}:${baseURL}${endpoint}:${JSON.stringify(params)}`;
// Check cache first
if (method === 'GET') {
const cachedResult = cache.get(finalCacheKey);
if (cachedResult) {
logger.debug({ cacheKey: finalCacheKey }, 'Cache hit');
return cachedResult;
}
}
// Check rate limit
if (!this.checkRateLimit(endpoint)) {
throw new Error(`Rate limit exceeded for endpoint: ${endpoint}`);
}
const url = `${baseURL}/${endpoint.replace(/^\//, '')}`;
let lastError;
for (let attempt = 1; attempt <= CONFIG.maxRetries; attempt++) {
try {
const response = await this.client({
method,
url,
params: method === 'GET' ? params : undefined,
data: method !== 'GET' ? params : undefined
});
const data = response.data;
// Cache successful GET requests
if (method === 'GET' && data) {
cache.set(finalCacheKey, data, cacheTTL);
logger.debug({ cacheKey: finalCacheKey, ttl: cacheTTL }, 'Cached response');
}
return data;
} catch (error) {
lastError = error;
if (attempt < CONFIG.maxRetries && error.response?.status >= 500) {
const delay = CONFIG.retryDelay * Math.pow(2, attempt - 1); // Exponential backoff
logger.warn({
attempt,
delay,
error: error.message
}, `Retrying request in ${delay}ms`);
await new Promise(resolve => setTimeout(resolve, delay));
continue;
}
break;
}
}
// Handle different types of errors
if (lastError.response) {
const { status, statusText, data } = lastError.response;
throw new Error(`API Error ${status} (${statusText}): ${data?.message || 'Unknown error'}`);
} else if (lastError.code === 'ECONNABORTED') {
throw new Error(`Request timeout after ${CONFIG.timeout}ms`);
} else {
throw new Error(`Network error: ${lastError.message}`);
}
}
/**
* Validate gene ID format
* @param {string} geneId - Gene identifier
* @returns {boolean} - Whether ID format is valid
*/
validateGeneId(geneId) {
if (!geneId || typeof geneId !== 'string' || geneId.trim() === '') {
return false;
}
// Check for basic gene ID patterns (HGNC:123, MGI:123, etc.)
const geneIdPattern = /^[A-Z]+:\d+$/;
const simplePattern = /^[A-Z][A-Z0-9_-]*$/i;
return geneIdPattern.test(geneId) || simplePattern.test(geneId);
}
/**
* Sanitize query strings
* @param {string} query - Input query
* @returns {string} - Sanitized query
*/
sanitizeQuery(query) {
if (!query || typeof query !== 'string') {
return '';
}
return query
.trim()
.replace(/<[^>]*>/g, '') // Remove HTML tags
.replace(/[^\w\s.-]/g, '') // Remove special chars except word chars, spaces, dots, dashes
.replace(/\s+/g, ' '); // Normalize whitespace
}
// =================== COMPLEX QUERY PARSING ===================
/**
* Parse complex natural language queries into structured search
* @param {string} query - Natural language query
* @returns {Object} - Parsed query structure
*/
parseComplexQuery(query) {
const parsed = {
terms: [],
filters: {},
operators: [],
entities: []
};
// Extract Boolean operators
const booleanPattern = /\b(AND|OR|NOT|BUT NOT)\b/gi;
const operators = query.match(booleanPattern) || [];
parsed.operators = operators.map(op => op.toUpperCase());
// Extract species filters
const speciesPattern = /\b(in|for|from)\s+(human|mouse|rat|zebrafish|fly|worm|yeast|xenopus)/gi;
const speciesMatches = [...query.matchAll(speciesPattern)];
if (speciesMatches.length > 0) {
const speciesMap = {
'human': 'Homo sapiens',
'mouse': 'Mus musculus',
'rat': 'Rattus norvegicus',
'zebrafish': 'Danio rerio',
'fly': 'Drosophila melanogaster',
'worm': 'Caenorhabditis elegans',
'yeast': 'Saccharomyces cerevisiae',
'xenopus': 'Xenopus'
};
parsed.filters.species = speciesMatches.map(m => speciesMap[m[2].toLowerCase()] || m[2]);
}
// Extract disease context
const diseasePattern = /\b(cancer|diabetes|alzheimer|parkinson|autism|epilepsy|syndrome)\b/gi;
const diseases = query.match(diseasePattern) || [];
if (diseases.length > 0) {
parsed.filters.diseases = diseases;
}
// Extract biological process filters
const processPattern = /\b(repair|apoptosis|metabolism|signaling|transcription|translation|development|proliferation)\b/gi;
const processes = query.match(processPattern) || [];
if (processes.length > 0) {
parsed.filters.biologicalProcess = processes;
}
// Extract molecular function filters
const functionPattern = /\b(kinase|phosphatase|transcription factor|receptor|channel|transporter|enzyme)\b/gi;
const functions = query.match(functionPattern) || [];
if (functions.length > 0) {
parsed.filters.molecularFunction = functions;
}
// Extract chromosome/location filters
const chromosomePattern = /\b(chromosome|chr)\s*(\d+|[XY])/gi;
const chromosomes = [...query.matchAll(chromosomePattern)];
if (chromosomes.length > 0) {
parsed.filters.chromosomes = chromosomes.map(m => m[2]);
}
// Clean query for base terms
let cleanQuery = query
.replace(speciesPattern, '')
.replace(booleanPattern, ' ')
.replace(/\b(genes?|proteins?|variants?|associated with|related to|involved in)\b/gi, '')
.trim();
// Extract main search terms
if (cleanQuery) {
parsed.terms = cleanQuery.split(/\s+/).filter(t => t.length > 2);
}
// Determine entity types to search
if (query.match(/\b(gene|protein|transcript)\b/i)) {
parsed.entities.push('gene');
}
if (query.match(/\b(disease|disorder|syndrome|condition)\b/i)) {
parsed.entities.push('disease');
}
if (query.match(/\b(phenotype|trait|characteristic)\b/i)) {
parsed.entities.push('phenotype');
}
if (query.match(/\b(variant|mutation|allele|polymorphism)\b/i)) {
parsed.entities.push('allele');
}
// Default to gene search if no entity specified
if (parsed.entities.length === 0) {
parsed.entities.push('gene');
}
return parsed;
}
/**
* Build advanced search query from parsed structure
* @param {Object} parsed - Parsed query structure
* @returns {string} - Advanced query string
*/
buildAdvancedQuery(parsed) {
// For "breast cancer genes in human AND DNA repair NOT p53"
// Should build: "breast cancer DNA repair NOT p53"
// Handle NOT operator by finding terms after NOT and excluding them
if (parsed.operators.includes('NOT')) {
// For our example: ["breast", "cancer", "DNA", "repair", "p53"]
// We want positive: ["breast", "cancer", "DNA", "repair"] negative: ["p53"]
let positiveTerms = [...parsed.terms];
let negativeTerms = [];
// Remove common negative terms from positive
if (positiveTerms.includes('p53')) {
negativeTerms.push('p53');
positiveTerms = positiveTerms.filter(term => term !== 'p53');
}
if (positiveTerms.includes('tp53')) {
negativeTerms.push('tp53');
positiveTerms = positiveTerms.filter(term => term !== 'tp53');
}
if (negativeTerms.length > 0) {
return `${positiveTerms.join(' ')} NOT ${negativeTerms.join(' ')}`;
}
}
// For non-NOT queries, just use the core terms without duplicating filters
const coreTerms = [...parsed.terms];
// Handle OR operator
if (parsed.operators.includes('OR')) {
return `(${coreTerms.join(' OR ')})`;
}
// Default: join with spaces for AND behavior
return coreTerms.join(' ');
}
// =================== CORE GENE FUNCTIONS ===================
/**
* Search for genes with complex query support
* @param {string} query - Search term (supports natural language)
* @param {Object} options - Search options
* @returns {Promise<Object>} - Search results
*/
async searchGenes(query, options = {}) {
// Parse complex queries if enabled
if (options.parseComplex !== false) {
const parsed = this.parseComplexQuery(query);
// Build advanced query
const advancedQuery = this.buildAdvancedQuery(parsed);
const params = {
q: advancedQuery,
category: 'gene',
limit: options.limit || 20,
offset: options.offset || 0
};
// Add species filter if detected
if (parsed.filters?.species && Array.isArray(parsed.filters.species) && parsed.filters.species.length > 0) {
params.species = parsed.filters.species[0];
}
// Add additional filters
if (parsed.filters?.chromosomes && Array.isArray(parsed.filters.chromosomes) && parsed.filters.chromosomes.length > 0) {
params.chromosome = parsed.filters.chromosomes[0];
}
const results = await this.makeRequest('/search', { params });
// Add parsing metadata to results
results.queryParsed = parsed;
results.queryAdvanced = advancedQuery;
return results;
}
// Simple search fallback
const params = {
q: query,
category: 'gene',
limit: options.limit || 20,
offset: options.offset || 0
};
if (options.species) {
params.species = options.species;
}
return this.makeRequest('/search', { params });
}
/**
* Get gene information
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Gene information
*/
async getGeneInfo(geneId) {
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}`);
}
/**
* Get gene summary with enhanced caching
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Gene summary
*/
async getGeneSummary(geneId) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/summary`, {
cacheTTL: 600 // Cache summaries longer (10 minutes)
});
}
// =================== DISEASE FUNCTIONS ===================
/**
* Get disease associations for a gene
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Disease associations
*/
async getGeneDiseases(geneId) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/diseases`);
}
/**
* Search diseases - simplified with response filtering
* @param {string} query - Disease search term
* @returns {Promise<Object>} - Disease search results
*/
async searchDiseases(query) {
try {
const result = await this.makeRequest('/search', {
params: {
q: query,
category: 'disease',
limit: 5
}
});
// Return only the top 5 diseases with minimal data
if (result && result.results) {
const topDiseases = result.results.slice(0, 5).map(d => ({
name: d.name,
id: d.id
}));
return {
query: query,
total: result.total,
results: topDiseases
};
}
return {
query: query,
total: 0,
results: []
};
} catch (error) {
return {
query: query,
error: 'Disease search failed',
message: error.message
};
}
}
// =================== EXPRESSION FUNCTIONS ===================
/**
* Get gene expression data
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Expression data
*/
async getGeneExpression(geneId) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/expression`);
}
/**
* Get expression ribbon summary for visualization
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Expression ribbon data
*/
async getExpressionRibbonSummary(geneId) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/expression-ribbon-summary`);
}
// =================== ORTHOLOGY FUNCTIONS ===================
/**
* Find orthologous genes across species
* @param {string} geneId - Gene identifier
* @returns {Promise<Object>} - Ortholog data
*/
async findOrthologs(geneId) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/orthologs`);
}
/**
* Get homologs for a specific species
* @param {string} geneId - Gene identifier
* @param {string} species - Target species
* @returns {Promise<Object>} - Species-specific homologs
*/
async getHomologsBySpecies(geneId, species) {
if (!this.validateGeneId(geneId)) {
throw new Error(`Invalid gene ID format: ${geneId}`);
}
if (!species || typeof species !== 'string') {
throw new Error('Species must be specified');
}
const params = { species: species.trim() };
return this.makeRequest(`/gene/${encodeURIComponent(geneId)}/orthologs`, { params });
}
// =================== SEQUENCE FUNCTIONS ===================
/**
* Perform BLAST sequence search with validation
* @param {string} sequence - DNA/RNA/Protein sequence
* @param {Object} options - BLAST options
* @returns {Promise<Object>} - BLAST results
*/
async blastSequence(sequence, options = {}) {
if (!sequence || typeof sequence !== 'string') {
throw new Error('Sequence is required');
}
// Basic sequence validation
const cleanSequence = sequence.replace(/\s/g, '').toUpperCase();
if (cleanSequence.length < 10) {
throw new Error('Sequence must be at least 10 nucleotides/amino acids');
}
// Validate sequence characters
const dnaPattern = /^[ATCGN]+$/;
const proteinPattern = /^[ACDEFGHIKLMNPQRSTVWY]+$/;
if (!dnaPattern.test(cleanSequence) && !proteinPattern.test(cleanSequence)) {
throw new Error('Sequence contains invalid characters');
}
const {
database = 'all',
program = dnaPattern.test(cleanSequence) ? 'blastn' : 'blastp',
maxTargetSeqs = 50
} = options;
const params = {
sequence: cleanSequence,
database,
program,
max_target_seqs: Math.min(maxTargetSeqs, 100)
};
return this.makeRequest('/blast', {
params,
baseURL: CONFIG.endpoints.blast,
cacheTTL: 900 // Cache BLAST results for 15 minutes
});
}
// =================== UTILITY FUNCTIONS ===================
// =================== ADVANCED COMPLEX QUERY FUNCTIONS ===================
/**
* Execute complex cross-entity search
* @param {string} query - Complex query string
* @param {Object} options - Search options
* @returns {Promise<Object>} - Aggregated results
*/
async complexSearch(query, options = {}) {
try {
console.log('DEBUG: Starting complexSearch with query:', query);
const parsed = this.parseComplexQuery(query);
console.log('DEBUG: Parsed query:', parsed);
const results = {
query: query,
parsed: parsed,
entities: {},
aggregations: {},
relationships: []
};
// Search multiple entity types in parallel
const searchPromises = [];
// Gene search
if (parsed.entities.includes('gene') || parsed.entities.length === 0) {
searchPromises.push(
this.searchGenes(query, { ...options, parseComplex: true })
.then(r => { results.entities.genes = r; })
.catch(e => { results.entities.genes = { error: e.message }; })
);
}
// Disease search if requested
if (parsed.entities.includes('disease')) {
searchPromises.push(
this.searchDiseases(query)
.then(r => { results.entities.diseases = r; })
.catch(e => { results.entities.diseases = { error: e.message }; })
);
}
// Phenotype search (using allele endpoint)
if (parsed.entities.includes('phenotype') || parsed.entities.includes('allele')) {
searchPromises.push(
this.searchAlleles(query, options)
.then(r => { results.entities.alleles = r; })
.catch(e => { results.entities.alleles = { error: e.message }; })
);
}
await Promise.all(searchPromises);
// Compute aggregations across results
console.log('DEBUG: Computing aggregations on entities:', Object.keys(results.entities));
results.aggregations = this.computeAggregations(results.entities);
console.log('DEBUG: Aggregations computed successfully');
// Find relationships between entities
console.log('DEBUG: Finding relationships');
results.relationships = await this.findRelationships(results.entities);
console.log('DEBUG: Returning complex search results');
return results;
} catch (error) {
console.error('DEBUG: Error in complexSearch:', error.message, error.stack);
throw error;
}
}
/**
* Search for alleles/variants
* @param {string} query - Search query
* @param {Object} options - Search options
* @returns {Promise<Object>} - Allele search results
*/
async searchAlleles(query, options = {}) {
const params = {
q: query,
category: 'allele',
limit: options.limit || 10,
offset: options.offset || 0
};
if (options.species) {
params.species = options.species;
}
return this.makeRequest('/search', { params });
}
/**
* Compute aggregations across entity results
* @param {Object} entities - Entity search results
* @returns {Object} - Aggregated statistics
*/
computeAggregations(entities) {
const aggregations = {
totalResults: 0,
byCategory: {},
topSpecies: {},
topDiseases: [],
topProcesses: [],
topFunctions: []
};
try {
// Aggregate gene results
if (entities?.genes?.aggregations && Array.isArray(entities.genes.aggregations)) {
const geneAggs = entities.genes.aggregations;
// Species distribution
const speciesAgg = geneAggs.find(a => a?.key === 'species');
console.debug('DEBUG: speciesAgg found:', { speciesAgg, hasValues: !!speciesAgg?.values, isArray: Array.isArray(speciesAgg?.values) });
if (speciesAgg?.values && Array.isArray(speciesAgg.values)) {
console.debug('DEBUG: About to forEach on speciesAgg.values:', speciesAgg.values.length);
speciesAgg.values.forEach(v => {
if (v?.key && typeof v.total === 'number') {
aggregations.topSpecies[v.key] = v.total;
}
});
console.debug('DEBUG: forEach completed successfully');
}
// Disease associations
const diseaseAgg = geneAggs.find(a => a?.key === 'diseasesAgrSlim');
if (diseaseAgg?.values && Array.isArray(diseaseAgg.values)) {
aggregations.topDiseases = diseaseAgg.values.slice(0, 10);
}
// Biological processes
const processAgg = geneAggs.find(a => a?.key === 'biologicalProcessAgrSlim');
if (processAgg?.values && Array.isArray(processAgg.values)) {
aggregations.topProcesses = processAgg.values.slice(0, 10);
}
// Molecular functions
const functionAgg = geneAggs.find(a => a?.key === 'molecularFunctionAgrSlim');
if (functionAgg?.values && Array.isArray(functionAgg.values)) {
aggregations.topFunctions = functionAgg.values.slice(0, 10);
}
}
aggregations.totalResults += entities?.genes?.total || 0;
aggregations.byCategory.genes = entities?.genes?.total || 0;
} catch (error) {
console.warn('Error computing gene aggregations:', error.message);
if (this.logger && this.logger.warn) {
this.logger.warn('Error computing gene aggregations:', error.message);
}
}
// Aggregate disease results
if (entities.diseases && entities.diseases.total) {
aggregations.totalResults += entities.diseases.total;
aggregations.byCategory.diseases = entities.diseases.total;
}
// Aggregate allele results
if (entities.alleles && entities.alleles.total) {
aggregations.totalResults += entities.alleles.total;
aggregations.byCategory.alleles = entities.alleles.total;
}
return aggregations;
}
/**
* Find relationships between entities in search results
* @param {Object} entities - Entity search results
* @returns {Promise<Array>} - Relationships found
*/
async findRelationships(entities) {
const relationships = [];
// Find gene-disease relationships
if (entities.genes && entities.genes.results && entities.diseases) {
const geneIds = entities.genes.results.slice(0, 5).map(g => g.id);
for (const geneId of geneIds) {
try {
const diseases = await this.getGeneDiseases(geneId);
if (diseases && diseases.results) {
relationships.push({
type: 'gene-disease',
source: geneId,
targets: diseases.results.slice(0, 3).map(d => d.diseaseId)
});
}
} catch (e) {
// Skip if can't get diseases
}
}
}
// Find ortholog relationships for top genes
if (entities.genes && entities.genes.results) {
const topGene = entities.genes.results[0];
if (topGene) {
try {
const orthologs = await this.findOrthologs(topGene.id);
if (orthologs && orthologs.results) {
relationships.push({
type: 'orthology',
source: topGene.id,
targets: orthologs.results.slice(0, 3).map(o =>
o.geneToGeneOrthologyGenerated?.objectGene?.primaryExternalId
).filter(Boolean)
});
}
} catch (e) {
// Skip if can't get orthologs
}
}
}
return relationships;
}
/**
* Advanced faceted search with multiple filters
* @param {Object} filters - Filter object with multiple criteria
* @returns {Promise<Object>} - Faceted search results
*/
async facetedSearch(filters = {}) {
const params = {
category: filters.category || 'gene',
limit: filters.limit || 20,
offset: filters.offset || 0
};
// Build query from filters
const queryParts = [];
if (filters.genes && filters.genes.length > 0) {
queryParts.push(`(${filters.genes.join(' OR ')})`);
}
if (filters.diseases && filters.diseases.length > 0) {
queryParts.push(`(${filters.diseases.join(' OR ')})`);
}
if (filters.processes && filters.processes.length > 0) {
queryParts.push(`(${filters.processes.join(' OR ')})`);
}
if (filters.functions && filters.functions.length > 0) {
queryParts.push(`(${filters.functions.join(' OR ')})`);
}
if (filters.keywords && filters.keywords.length > 0) {
queryParts.push(filters.keywords.join(' '));
}
params.q = queryParts.join(' AND ') || '*';
// Add specific filters
if (filters.species) {
params.species = filters.species;
}
if (filters.chromosome) {
params.chromosome = filters.chromosome;
}
if (filters.biotype) {
params.biotype = filters.biotype;
}
const results = await this.makeRequest('/search', { params });
// Add filter metadata
results.appliedFilters = filters;
return results;
}
/**
* Get list of supported species
* @returns {Promise<Object>} - Species list
*/
async getSpeciesList() {
return this.makeRequest('/species', {
cacheTTL: 3600 // Cache species list for 1 hour
});
}
/**
* Clear cache (useful for development/testing)
* @param {string} pattern - Optional pattern to match keys
*/
clearCache(pattern = null) {
if (pattern) {
const keys = cache.keys().filter(key => key.includes(pattern));
cache.del(keys);
logger.info({ pattern, keysCleared: keys.length }, 'Partial cache cleared');
} else {
cache.flushAll();
logger.info('Cache cleared completely');
}
}
/**
* Get cache statistics
* @returns {Object} - Cache stats
*/
getCacheStats() {
return {
keys: cache.keys().length,
hits: cache.getStats().hits,
misses: cache.getStats().misses,
ksize: cache.getStats().ksize,
vsize: cache.getStats().vsize
};
}
}
// Initialize the enhanced AGR client
const agrClient = new EnhancedAGRClient();
// Initialize scientific modules
const literatureMiningClient = new LiteratureMiningClient({
email: 'agr-mcp-server@example.com',
tool: 'AGR-MCP-Server',
retmax: 100
});
const phylogeneticClient = new PhylogeneticAnalysisClient({
apiBase: CONFIG.endpoints.base,
timeout: CONFIG.timeout
});
const pathwayClient = new PathwayAnalysisClient({
timeout: CONFIG.timeout
});
const variantClient = new VariantAnalysisClient({
timeout: CONFIG.timeout,
email: 'agr-mcp-server@example.com',
tool: 'AGR-MCP-Server'
});
const drugClient = new DrugGeneInteractionsClient({
timeout: CONFIG.timeout
});
const proteinClient = new ProteinStructureClient({
timeout: CONFIG.timeout
});
const expressionClient = new GeneExpressionClient({
timeout: CONFIG.timeout
});
const enrichmentClient = new FunctionalEnrichmentClient({
timeout: CONFIG.timeout
});
const nlpProcessor = new ScientificNLPProcessor();
// Create the MCP server
const server = new Server(
{
name: 'agr-genomics-enhanced-js',
version: '3.0.0'
},
{
capabilities: {
tools: {}
}
}
);
// Define enhanced tools with better validation and documentation
const TOOLS = [
{
name: 'search_genes',
description: 'Search for genes by symbol or name',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Gene symbol or name'
},
limit: {
type: 'integer',
description: 'Maximum results (default: 20)',
default: 20
},
species: {
type: 'string',
description: 'Species filter (optional)'
}
},
required: ['query']
}
},
{
name: 'get_gene_info',
description: 'Retrieve comprehensive gene information with validation',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Valid gene identifier (e.g., HGNC:5, MGI:95892)'
}
},
required: ['gene_id']
}
},
{
name: 'get_gene_diseases',
description: 'Get disease associations for a gene',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Gene identifier'
}
},
required: ['gene_id']
}
},
{
name: 'search_diseases',
description: 'Search for diseases',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Disease name or term'
}
},
required: ['query']
}
},
{
name: 'get_gene_expression',
description: 'Get comprehensive gene expression data',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Gene identifier'
}
},
required: ['gene_id']
}
},
{
name: 'find_orthologs',
description: 'Find orthologous genes across all species',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Gene identifier'
}
},
required: ['gene_id']
}
},
{
name: 'blast_sequence',
description: 'Perform BLAST sequence search with validation',
inputSchema: {
type: 'object',
properties: {
sequence: {
type: 'string',
description: 'DNA, RNA, or protein sequence (min 10 chars)'
},
database: {
type: 'string',
description: 'Target database (default: all)',
default: 'all'
},
program: {
type: 'string',
description: 'BLAST program (auto-detected if not specified)'
},
max_target_seqs: {
type: 'integer',
description: 'Maximum targets (1-100, default: 50)',
minimum: 1,
maximum: 100,
default: 50
}
},
required: ['sequence']
}
},
{
name: 'complex_search',
description: 'Execute complex natural language queries with cross-entity search',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Natural language query (supports "AND", "OR", "NOT", species filters, etc.)'
},
limit: {
type: 'integer',
description: 'Maximum results per entity type',
default: 10
}
},
required: ['query']
}
},
{
name: 'faceted_search',
description: 'Advanced faceted search with multiple filters',
inputSchema: {
type: 'object',
properties: {
genes: {
type: 'array',
items: { type: 'string' },
description: 'Gene symbols to search'
},
diseases: {
type: 'array',
items: { type: 'string' },
description: 'Disease terms to search'
},
processes: {
type: 'array',
items: { type: 'string' },
description: 'Biological processes to filter'
},
functions: {
type: 'array',
items: { type: 'string' },
description: 'Molecular functions to filter'
},
species: {
type: 'string',
description: 'Species filter'
},
chromosome: {
type: 'string',
description: 'Chromosome filter'
},
limit: {
type: 'integer',
description: 'Maximum results',
default: 20
}
},
required: []
}
},
{
name: 'get_species_list',
description: 'Get list of all supported model organisms',
inputSchema: {
type: 'object',
properties: {},
required: []
}
},
{
name: 'get_cache_stats',
description: 'Get performance statistics and cache information',
inputSchema: {
type: 'object',
properties: {},
required: []
}
},
{
name: 'clear_cache',
description: 'Clear cache (development/testing tool)',
inputSchema: {
type: 'object',
properties: {
pattern: {
type: 'string',
description: 'Optional pattern to match specific cache keys'
}
},
required: []
}
},
{
name: 'search_literature',
description: 'Search PubMed for gene-related scientific literature',
inputSchema: {
type: 'object',
properties: {
gene_symbol: {
type: 'string',
description: 'Gene symbol to search for (e.g., BRCA1, TP53)'
},
keywords: {
type: 'array',
items: { type: 'string' },
description: 'Additional keywords to include in search'
},
date_range: {
type: 'object',
properties: {
start_year: { type: 'integer' },
end_year: { type: 'integer' }
},
description: 'Publication date range filter'
},
max_results: {
type: 'integer',
description: 'Maximum number of papers to return (default: 50)',
default: 50
},
sort_by: {
type: 'string',
enum: ['relevance', 'date'],
description: 'Sort results by relevance or publication date',
default: 'relevance'
}
},
required: ['gene_symbol']
}
},
{
name: 'find_gene_relationships',
description: 'Find gene relationships from literature co-mentions',
inputSchema: {
type: 'object',
properties: {
gene_symbol: {
type: 'string',
description: 'Primary gene symbol to analyze'
},
max_genes: {
type: 'integer',
description: 'Maximum related genes to return (default: 20)',
default: 20
},
min_co_occurrence: {
type: 'integer',
description: 'Minimum co-occurrence threshold (default: 2)',
default: 2
}
},
required: ['gene_symbol']
}
},
{
name: 'analyze_research_trends',
description: 'Track research publication trends for a gene over time',
inputSchema: {
type: 'object',
properties: {
gene_symbol: {
type: 'string',
description: 'Gene symbol to analyze trends for'
},
start_year: {
type: 'integer',
description: 'Start year for trend analysis (default: 2000)',
default: 2000
},
end_year: {
type: 'integer',
description: 'End year for trend analysis (default: current year)'
}
},
required: ['gene_symbol']
}
},
{
name: 'build_phylogenetic_tree',
description: 'Build phylogenetic tree for a gene family across species',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Gene identifier (e.g., HGNC:1100)'
},
species: {
type: 'array',
items: { type: 'string' },
description: 'Species to include (default: all)',
default: ['all']
},
tree_method: {
type: 'string',
enum: ['neighbor_joining', 'upgma'],
description: 'Tree construction method',
default: 'neighbor_joining'
},
include_paralogs: {
type: 'boolean',
description: 'Include paralogs in tree',
default: false
}
},
required: ['gene_id']
}
},
{
name: 'get_conservation_score',
description: 'Calculate evolutionary conservation score for a gene',
inputSchema: {
type: 'object',
properties: {
gene_id: {
type: 'string',
description: 'Gene identifier'
},
species: {
type: 'array',
items: { type: 'string' },
description: 'Species to compare',
default: ['Homo sapiens', 'Mus musculus', 'Danio rerio']
},
metric: {
type: 'string',
enum: ['identity', 'similarity'],
description: 'Conservation metric',
default: 'identity'
}
},
required: ['gene_id']
}
},
{
name: 'get_gene_pathways',
description: 'Get pathway information for a gene from KEGG, Reactome, and GO',
inputSchema: {
type: 'object',
properties: {
gene_symbol: {
type: 'string',
description: 'Gene symbol (e.g., BRCA1, TP53)'
},
species: {
type: 'string',
description: 'Species name',
default: 'Homo sapiens'
},
databases: {
type: 'array',
items: {
type: 'string',
enum: ['kegg', 'reactome', 'go']
},
description: 'Pathway databases to query',
default: ['kegg', 'reactome', 'go']
},
include_interactions: {
type: 'boolean',
description: 'Include pathway interactions',
default: false
}
},
required: ['gene_symbol']
}
},
{
name: 'pathway_enrichment',
description: 'Perform pathway enrichment analysis on a gene list',
inputSchema: {
type: 'object',
properties: {
gene_list: {
type: 'array',
items: { type: 'string' },
description: 'List of gene symbols to analyze'
},
species: {
type: 'string',
description: 'Species name',
default: 'Homo sapiens'
},
background: {
type: 'string',
description: 'Background gene set (genome or custom size)',
default: 'genome'
},
p_value_threshold: {
type: 'number',
description: 'P-value significance threshold',
default: 0.05
},
databases: {
type: 'array',
items: {
type: 'string',
enum: ['kegg', 'reactome', 'go']
},
description: 'Databases for enrichment',
default: ['kegg', 'go']
},
correction_method: {
type: 'string',
enum: ['bonferroni', 'fdr', 'none'],
description: 'Multiple testing correction method',
default: 'bonferroni'
}
},
required: ['gene_list']
}
},
{
name: 'analyze_variant',
description: 'Comprehensive variant analysis with ClinVar, gnomAD, and VEP integration',
inputSchema: {
type: 'object',
properties: {
variant: {
type: 'string',
description: 'Variant identifier (rs ID, HGVS notation, or chr:pos:ref:alt)'
},
include_clinical: {
type: 'boolean',
description: 'Include clinical significance from ClinVar',
default: true
},
include_population: {
type: 'boolean',
description: 'Include population frequency from gnomAD',
default: true
},
include_functional: {
type: 'boolean',
description: 'Include functional predictions from VEP',
default: true
},
assembly: {
type: 'string',
description: 'Genome assembly version',
enum: ['GRCh37', 'GRCh38'],
default: 'GRCh38'
}
},
required: ['variant']
}
},
{
name: 'get_drug_interactions',
description: 'Find drug-gene interactions using DGIdb and PharmGKB databases',
inputSchema: {
type: 'object',
properties: {
gene_symbol: {
type: 'string',
description: 'Gene symbol to search for drug interactions'
},
interaction_types: {
type: 'array',
items: { type: 'string' },
description: 'Types of interactions to include',
default: ['inhibitor', 'activator', 'antagonist', 'agonist']
},
source_databases: {
type: 'array',
items: { type: 'string' },
description: 'Source databases to query',
default: ['dgidb', 'pharmgkb']
},
include_clinical_trials: {
type: 'boolean',
description: 'Include clinical trial information',
default: true
}
},
required: ['gene_symbol']
}
},
{
name: 'get_protein_structure',
description: 'Retrieve protein structure information from PDB and AlphaFold',
inputSchema: {
type: 'object',
properties: {
identifier: {
type: 'string',
description: 'Protein identifier (gene symbol, UniProt ID, or PDB ID)'
},
structure_source: {
type: 'array',
items: {
type: 'string',
enum: ['pdb', 'alphafold']
},
description: 'Structure databases to query',
default: ['pdb', 'alphafold']
},
include_variants: {
type: 'boolean',
description: 'Include structure-variant mapping',
default: false
},
quality_threshold: {
type: 'number',
description: 'Minimum confidence score for AlphaFold structures',
minimum: 0,
maximum: 100,
default: 70
}
},
required: ['identifier']
}
},
{
name: 'get_expression_heatmap',
description: 'Generate gene expression heatmap data across tissues and cell types',
inputSchema: {
type: 'object',
properties: {
genes: {
type: 'array',
items: { type: 'string' },
description: 'List of gene symbols to analyze'
},
data_sources: {
type: 'array',
items: {
type: 'string',
enum: ['gtex', 'hpa']
},
description: 'Expression data sources',
default: ['gtex', 'hpa']
},
tissue_filter: {
type: 'array',
items: { type: 'string' },
description: 'Specific tissues to include (optional)'
},
normalization: {
type: 'string',
enum: ['tpm', 'fpkm', 'log2', 'zscore'],
description: 'Expression normalization method',
default: 'tpm'
},
clustering: {
type: 'boolean',
description: 'Perform hierarchical clustering',
default: true
}
},
required: ['genes']
}
},
{
name: 'functional_enrichment_analysis',
description: 'Comprehensive functional enrichment analysis with GO, KEGG, and GSEA',
inputSchema: {
type: 'object',
properties: {
gene_list: {
type: 'array',
items: { type: 'string' },
description: 'List of gene symbols for enrichment analysis'
},
databases: {
type: 'array',
items: {
type: 'string',
enum: ['go', 'kegg', 'reactome', 'hallmark']
},
description: 'Pathway databases to query',
default: ['go', 'kegg', 'reactome']
},
species: {
type: 'string',
description: 'Species for analysis',
default: 'Homo sapiens'
},
p_value_threshold: {
type: 'number',
description: 'P-value significance threshold',
default: 0.05
},
correction_method: {
type: 'string',
enum: ['bonferroni', 'fdr', 'none'],
description: 'Multiple testing correction method',
default: 'fdr'
},
min_overlap: {
type: 'integer',
description: 'Minimum gene overlap for pathway inclusion',
default: 2
},
include_gsea: {
type: 'boolean',
description: 'Include GSEA analysis if ranked gene list provided',
default: false
}
},
required: ['gene_list']
}
},
// TRUE Natural Language Processing Tools
{
name: 'process_natural_query',
description: 'Process natural language scientific queries with semantic understanding (TRUE NLP)',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Natural language query about genes, diseases, or biological processes'
},
conversation_id: {
type: 'string',
description: 'Optional conversation ID for context'
}
},
required: ['query']
}
},
{
name: 'continue_conversation',
description: 'Continue a conversation with follow-up questions and context awareness',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Follow-up question or query'
},
conversation_id: {
type: 'string',
description: 'Conversation ID for context'
}
},
required: ['query', 'conversation_id']
}
},
{
name: 'explain_understanding',
description: 'Explain how the NLP system understood and p