UNPKG

pubmed_mcp_server2

Version:

Advanced Model Context Protocol server for PubMed database access with MeSH optimization and citation analysis

1,140 lines 48.3 kB
import { parseString } from 'xml2js'; // PubMed E-utilities API base URLs const ESEARCH_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'; const EFETCH_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'; const ESUMMARY_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'; const ELINK_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'; // Note: Now using E-utilities efetch for PMC full text instead of BioC API // Literature Citation Exporter API const LIT_CITATION_URL = 'https://api.ncbi.nlm.nih.gov/lit/ctxp/v1'; // XML parser utility function parseXML(xml) { return new Promise((resolve, reject) => { parseString(xml, { explicitArray: false }, (err, result) => { if (err) { reject(err); } else { resolve(result); } }); }); } // Build query URL with parameters function buildUrl(baseUrl, params) { const url = new URL(baseUrl); Object.entries(params).forEach(([key, value]) => { url.searchParams.set(key, value.toString()); }); return url.toString(); } // Search PubMed articles export async function searchPubMed(query, maxResults = 20, startIndex = 0) { const params = { db: 'pubmed', term: query, retmax: maxResults, retstart: startIndex, retmode: 'xml', tool: 'mcp-pubmed-server', email: 'user@example.com' // Replace with actual email }; try { const url = buildUrl(ESEARCH_URL, params); const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const xmlData = await response.text(); const parsed = await parseXML(xmlData); const eSearchResult = parsed.eSearchResult; const idList = eSearchResult.IdList?.Id || []; return { idList: Array.isArray(idList) ? idList : [idList].filter(Boolean), count: parseInt(eSearchResult.Count || '0'), retMax: parseInt(eSearchResult.RetMax || '0'), retStart: parseInt(eSearchResult.RetStart || '0'), queryTranslation: eSearchResult.QueryTranslation }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`PubMed search failed: ${errorMessage}`); } } // Get article summaries by PMIDs export async function getArticleSummaries(pmids) { if (pmids.length === 0) return []; const params = { db: 'pubmed', id: pmids.join(','), retmode: 'xml', tool: 'mcp-pubmed-server', email: 'user@example.com' }; try { const url = buildUrl(ESUMMARY_URL, params); const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const xmlData = await response.text(); const parsed = await parseXML(xmlData); const docSums = parsed.eSummaryResult?.DocSum || []; const summaries = Array.isArray(docSums) ? docSums : [docSums]; return summaries.map((docSum) => { const items = Array.isArray(docSum.Item) ? docSum.Item : [docSum.Item]; const itemMap = {}; items.forEach((item) => { if (item && item.$.Name) { itemMap[item.$.Name] = item._; } }); // Parse authors const authorList = itemMap.AuthorList || ''; const authors = authorList.split(',').map((author) => author.trim()).filter(Boolean); return { pmid: docSum.Id, title: itemMap.Title || 'No title available', authors: authors, journal: itemMap.Source || 'Unknown journal', publicationDate: itemMap.PubDate || 'Unknown date', doi: itemMap.DOI, pmcId: itemMap.PMCID }; }); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to get article summaries: ${errorMessage}`); } } // Get full article details by PMIDs export async function getArticleDetails(pmids) { if (pmids.length === 0) return []; const params = { db: 'pubmed', id: pmids.join(','), retmode: 'xml', rettype: 'abstract', tool: 'mcp-pubmed-server', email: 'user@example.com' }; try { const url = buildUrl(EFETCH_URL, params); const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const xmlData = await response.text(); const parsed = await parseXML(xmlData); const pubmedArticles = parsed.PubmedArticleSet?.PubmedArticle || []; const articles = Array.isArray(pubmedArticles) ? pubmedArticles : [pubmedArticles]; return articles.map((article) => { const medlineCitation = article.MedlineCitation; const pmid = medlineCitation.PMID._ || medlineCitation.PMID; const articleData = medlineCitation.Article; // Extract title const title = articleData.ArticleTitle || 'No title available'; // Extract authors const authorList = articleData.AuthorList?.Author || []; const authors = (Array.isArray(authorList) ? authorList : [authorList]) .map((author) => { if (author.ForeName && author.LastName) { return `${author.ForeName} ${author.LastName}`; } else if (author.CollectiveName) { return author.CollectiveName; } return 'Unknown Author'; }) .filter(Boolean); // Extract journal info const journal = articleData.Journal?.Title || 'Unknown journal'; // Extract publication date const pubDate = articleData.Journal?.JournalIssue?.PubDate; let publicationDate = 'Unknown date'; if (pubDate) { const year = pubDate.Year || ''; const month = pubDate.Month || ''; const day = pubDate.Day || ''; publicationDate = [year, month, day].filter(Boolean).join(' '); } // Extract abstract const abstractTexts = articleData.Abstract?.AbstractText || []; let abstract = ''; if (Array.isArray(abstractTexts)) { abstract = abstractTexts.map((text) => { if (typeof text === 'string') return text; if (text._ && text.$.Label) return `${text.$.Label}: ${text._}`; return text._ || text; }).join('\n\n'); } else if (typeof abstractTexts === 'string') { abstract = abstractTexts; } else if (abstractTexts._) { abstract = abstractTexts._; } // Extract DOI and PMC ID const articleIds = article.PubmedData?.ArticleIdList?.ArticleId || []; const ids = Array.isArray(articleIds) ? articleIds : [articleIds]; let doi = ''; let pmcId = ''; ids.forEach((id) => { if (id.$.IdType === 'doi') { doi = id._; } else if (id.$.IdType === 'pmc') { pmcId = id._; } }); return { pmid, title, authors, journal, publicationDate, abstract, doi, pmcId, url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/` }; }); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to get article details: ${errorMessage}`); } } // Get full abstract for specific PMIDs export async function getFullAbstract(pmids) { if (pmids.length === 0) return []; const params = { db: 'pubmed', id: pmids.join(','), retmode: 'xml', rettype: 'abstract', tool: 'mcp-pubmed-server', email: 'user@example.com' }; try { const url = buildUrl(EFETCH_URL, params); const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const xmlData = await response.text(); const parsed = await parseXML(xmlData); const pubmedArticles = parsed.PubmedArticleSet?.PubmedArticle || []; const articles = Array.isArray(pubmedArticles) ? pubmedArticles : [pubmedArticles]; return articles.map((article) => { const medlineCitation = article.MedlineCitation; const pmid = medlineCitation.PMID._ || medlineCitation.PMID; const articleData = medlineCitation.Article; // Extract title const title = articleData.ArticleTitle || 'No title available'; // Extract authors const authorList = articleData.AuthorList?.Author || []; const authors = (Array.isArray(authorList) ? authorList : [authorList]) .map((author) => { if (author.ForeName && author.LastName) { return `${author.ForeName} ${author.LastName}`; } else if (author.CollectiveName) { return author.CollectiveName; } return 'Unknown Author'; }) .filter(Boolean); // Extract journal info const journal = articleData.Journal?.Title || 'Unknown journal'; // Extract publication date const pubDate = articleData.Journal?.JournalIssue?.PubDate; let publicationDate = 'Unknown date'; if (pubDate) { const year = pubDate.Year || ''; const month = pubDate.Month || ''; const day = pubDate.Day || ''; publicationDate = [year, month, day].filter(Boolean).join(' '); } // Extract FULL abstract (without truncation) const abstractTexts = articleData.Abstract?.AbstractText || []; let fullAbstract = ''; if (Array.isArray(abstractTexts)) { fullAbstract = abstractTexts.map((text) => { if (typeof text === 'string') return text; if (text._ && text.$.Label) return `${text.$.Label}: ${text._}`; return text._ || text; }).join('\n\n'); } else if (typeof abstractTexts === 'string') { fullAbstract = abstractTexts; } else if (abstractTexts._) { fullAbstract = abstractTexts._; } // Extract DOI and PMC ID const articleIds = article.PubmedData?.ArticleIdList?.ArticleId || []; const ids = Array.isArray(articleIds) ? articleIds : [articleIds]; let doi = ''; let pmcId = ''; ids.forEach((id) => { if (id.$.IdType === 'doi') { doi = id._; } else if (id.$.IdType === 'pmc') { pmcId = id._; } }); return { pmid, title, authors, journal, publicationDate, fullAbstract, doi, pmcId }; }); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to get full abstracts: ${errorMessage}`); } } // Get full text from PMC for articles with PMC ID using E-utilities export async function getFullText(pmcIds) { if (pmcIds.length === 0) return []; const results = []; // Process each PMC ID individually for (const pmcId of pmcIds) { try { // Clean PMC ID (remove PMC prefix if present) const cleanPmcId = pmcId.replace(/^PMC/, ''); // Use E-utilities efetch API for PMC database const params = { db: 'pmc', id: cleanPmcId, retmode: 'xml', tool: 'mcp-pubmed-server', email: 'user@example.com' }; const url = buildUrl(EFETCH_URL, params); const response = await fetch(url); if (!response.ok) { console.warn(`Failed to fetch full text for PMC${cleanPmcId}: ${response.status}`); continue; } const xmlData = await response.text(); // Check if we got an error response if (xmlData.includes('Error occurred') || xmlData.includes('esearchresult')) { console.warn(`No full text available for PMC${cleanPmcId}`); continue; } const parsed = await parseXML(xmlData); // Parse NLM XML format const articleSet = parsed['pmc-articleset']; if (!articleSet || !articleSet.article) { console.warn(`No article found in PMC${cleanPmcId}`); continue; } const article = Array.isArray(articleSet.article) ? articleSet.article[0] : articleSet.article; const front = article.front; const body = article.body; // Extract basic metadata const articleMeta = front?.[0]?.['article-meta']?.[0] || front?.['article-meta']; // Extract title let title = 'Unknown title'; const titleGroup = articleMeta?.['title-group']; if (titleGroup) { const articleTitle = Array.isArray(titleGroup) ? titleGroup[0]?.['article-title'] : titleGroup['article-title']; if (articleTitle) { title = Array.isArray(articleTitle) ? articleTitle[0] : articleTitle; // Clean up XML content in title if (typeof title === 'object' && title._) { title = title._; } } } // Extract PMID let pmid = ''; const articleIds = articleMeta?.['article-id']; if (articleIds) { const ids = Array.isArray(articleIds) ? articleIds : [articleIds]; const pmidEntry = ids.find((id) => id.$?.['pub-id-type'] === 'pmid'); if (pmidEntry) { pmid = pmidEntry._ || pmidEntry; } } // Extract sections from body const sections = []; let fullText = ''; if (body) { const bodyContent = Array.isArray(body) ? body[0] : body; // Function to extract text from any element recursively function extractText(element) { if (typeof element === 'string') { return element; } if (typeof element === 'object') { if (element._) { return element._; } let text = ''; Object.values(element).forEach((value) => { if (Array.isArray(value)) { value.forEach((item) => { text += extractText(item) + ' '; }); } else { text += extractText(value) + ' '; } }); return text.trim(); } return ''; } // Function to process sections function processSections(element, sectionTitle = 'Content') { if (element.sec) { const secs = Array.isArray(element.sec) ? element.sec : [element.sec]; secs.forEach((section) => { // Extract section title let secTitle = sectionTitle; if (section.title) { const titleText = extractText(section.title); if (titleText.trim()) { secTitle = titleText.trim(); } } // Extract section content let secContent = ''; // Get paragraphs if (section.p) { const paragraphs = Array.isArray(section.p) ? section.p : [section.p]; paragraphs.forEach((para) => { const paraText = extractText(para); if (paraText.trim()) { secContent += paraText.trim() + '\n\n'; } }); } // Process nested sections if (section.sec) { processSections(section, secTitle); } else if (secContent.trim()) { sections.push({ title: secTitle, content: secContent.trim() }); fullText += `${secTitle}\n${secContent}\n`; } }); } // Also check for direct paragraphs if (element.p) { const paragraphs = Array.isArray(element.p) ? element.p : [element.p]; let content = ''; paragraphs.forEach((para) => { const paraText = extractText(para); if (paraText.trim()) { content += paraText.trim() + '\n\n'; } }); if (content.trim()) { sections.push({ title: sectionTitle, content: content.trim() }); fullText += `${sectionTitle}\n${content}\n`; } } } // Process the body content processSections(bodyContent); } // If no sections were found, try to extract any available text if (sections.length === 0 && fullText.trim() === '') { // Fallback extraction function for the entire article function fallbackExtractText(element) { if (typeof element === 'string') { return element; } if (typeof element === 'object') { if (element._) { return element._; } let text = ''; Object.values(element).forEach((value) => { if (Array.isArray(value)) { value.forEach((item) => { text += fallbackExtractText(item) + ' '; }); } else if (typeof value === 'object' || typeof value === 'string') { text += fallbackExtractText(value) + ' '; } }); return text.trim(); } return ''; } const allText = fallbackExtractText(article); if (allText.trim()) { fullText = allText.trim(); sections.push({ title: 'Full Article Content', content: fullText }); } } if (fullText.trim() || sections.length > 0) { results.push({ pmid, pmcId: `PMC${cleanPmcId}`, title, fullText: fullText.trim(), sections }); } else { console.warn(`No extractable content found for PMC${cleanPmcId}`); } } catch (error) { console.warn(`Error processing PMC${pmcId}: ${error}`); continue; } } return results; } // Export citations in RIS format using Literature Citation Exporter API export async function exportRIS(pmids) { if (pmids.length === 0) { return { pmids: [], risData: '', successCount: 0, errorCount: 0, errors: [] }; } const errors = []; let allRISData = ''; let successCount = 0; // Process in batches to respect rate limits (3 requests/second) const batchSize = 10; // Process 10 PMIDs at a time const delayBetweenRequests = 400; // 400ms delay to stay under 3 req/sec for (let i = 0; i < pmids.length; i += batchSize) { const batch = pmids.slice(i, i + batchSize); try { // Construct URL for Literature Citation Exporter API const url = `${LIT_CITATION_URL}/pubmed/?format=ris&id=${batch.join(',')}`; const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const risData = await response.text(); // Check if we got valid RIS data if (risData.trim() && !risData.includes('Error') && !risData.includes('error')) { allRISData += risData; if (!risData.endsWith('\n')) { allRISData += '\n'; } // Count successful entries by counting 'TY -' lines const tyCount = (risData.match(/^TY -/gm) || []).length; successCount += tyCount; } else { errors.push(`No valid RIS data for PMIDs: ${batch.join(', ')}`); } // Rate limiting: wait between requests to respect 3 requests/second limit if (i + batchSize < pmids.length) { await new Promise(resolve => setTimeout(resolve, delayBetweenRequests)); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); errors.push(`Failed to fetch RIS for PMIDs ${batch.join(', ')}: ${errorMessage}`); } } return { pmids, risData: allRISData, successCount, errorCount: pmids.length - successCount, errors }; } // Combined search and fetch function export async function searchAndFetchArticles(query, maxResults = 10) { try { // First, search for article IDs const searchResult = await searchPubMed(query, maxResults); if (searchResult.idList.length === 0) { return []; } // Then fetch full details for the articles const articles = await getArticleDetails(searchResult.idList); return articles; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Search and fetch failed: ${errorMessage}`); } } // Get citation count for specific PMIDs using elink export async function getCitationCounts(pmids) { if (pmids.length === 0) return []; console.error(`getCitationCounts called with ${pmids.length} PMIDs: ${pmids.join(', ')}`); const results = []; // Process each PMID individually to get accurate citation data for (const pmid of pmids) { try { // First get article title for display const articleDetails = await getArticleDetails([pmid]); const title = articleDetails.length > 0 ? articleDetails[0].title : 'Unknown title'; // Use elink to find articles that cite this PMID const params = { dbfrom: 'pubmed', db: 'pubmed', id: pmid, linkname: 'pubmed_pubmed_citedin', retmode: 'xml', tool: 'mcp-pubmed-server', email: 'user@example.com' }; const url = buildUrl(ELINK_URL, params); const response = await fetch(url); if (!response.ok) { results.push({ pmid, title, citationCount: 0, citingPmids: [], error: `HTTP error! status: ${response.status}` }); continue; } const xmlData = await response.text(); const parsed = await parseXML(xmlData); // Parse elink response const linkSets = parsed.eLinkResult?.LinkSet || []; const linkSetArray = Array.isArray(linkSets) ? linkSets : [linkSets]; let citingPmids = []; // Find the linkset with pubmed_pubmed_citedin for (const linkSet of linkSetArray) { if (linkSet.LinkSetDb) { const linkSetDbs = Array.isArray(linkSet.LinkSetDb) ? linkSet.LinkSetDb : [linkSet.LinkSetDb]; for (const linkSetDb of linkSetDbs) { if (linkSetDb.LinkName === 'pubmed_pubmed_citedin') { const links = linkSetDb.Link || []; const linkArray = Array.isArray(links) ? links : [links]; citingPmids = linkArray.map((link) => link.Id).filter(Boolean); break; } } } } results.push({ pmid, title, citationCount: citingPmids.length, citingPmids: citingPmids.slice(0, 100) // Limit to first 100 citing PMIDs for performance }); // Rate limiting to be respectful to NCBI servers await new Promise(resolve => setTimeout(resolve, 200)); // 200ms delay } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); results.push({ pmid, title: 'Unknown title', citationCount: 0, citingPmids: [], error: `Failed to get citation count: ${errorMessage}` }); } } return results; } // Common medical term mappings to MeSH terms const MESH_MAPPINGS = { // COVID-19 related 'covid': ['COVID-19', 'SARS-CoV-2'], 'covid-19': ['COVID-19', 'SARS-CoV-2'], 'coronavirus': ['COVID-19', 'SARS-CoV-2', 'Coronavirus'], 'sars-cov-2': ['SARS-CoV-2'], // Vaccination related 'vaccine': ['Vaccination', 'Vaccines', 'Immunization'], 'vaccination': ['Vaccination', 'Immunization'], 'immunization': ['Immunization', 'Vaccination'], 'immunisation': ['Immunization', 'Vaccination'], // Heart disease 'heart attack': ['Myocardial Infarction'], 'myocardial infarction': ['Myocardial Infarction'], 'heart disease': ['Heart Disease', 'Cardiovascular Diseases'], 'cardiac': ['Heart', 'Cardiovascular System'], 'cardiovascular': ['Cardiovascular Diseases'], // Cancer 'cancer': ['Neoplasms'], 'tumor': ['Neoplasms'], 'tumour': ['Neoplasms'], 'carcinoma': ['Carcinoma'], 'oncology': ['Medical Oncology', 'Neoplasms'], // Diabetes 'diabetes': ['Diabetes Mellitus'], 'diabetic': ['Diabetes Mellitus'], 'insulin': ['Insulin'], 'blood sugar': ['Blood Glucose'], 'glucose': ['Glucose', 'Blood Glucose'], // Mental health 'depression': ['Depression', 'Depressive Disorder'], 'anxiety': ['Anxiety', 'Anxiety Disorders'], 'mental health': ['Mental Health'], 'psychiatric': ['Mental Disorders', 'Psychiatry'], 'psychology': ['Psychology'], // Age groups 'elderly': ['Aged', 'Aged, 80 and over'], 'older adults': ['Aged'], 'seniors': ['Aged'], 'children': ['Child'], 'pediatric': ['Child', 'Pediatrics'], 'paediatric': ['Child', 'Pediatrics'], 'infant': ['Infant'], 'adolescent': ['Adolescent'], // Treatment types 'treatment': ['Therapeutics', 'Therapy'], 'therapy': ['Therapy'], 'drug': ['Pharmaceutical Preparations', 'Drug Therapy'], 'medication': ['Pharmaceutical Preparations'], 'surgery': ['Surgical Procedures, Operative'], 'operation': ['Surgical Procedures, Operative'], // Study types 'clinical trial': ['Clinical Trials as Topic', 'Randomized Controlled Trials as Topic'], 'randomized': ['Randomized Controlled Trials as Topic'], 'rct': ['Randomized Controlled Trials as Topic'], 'meta-analysis': ['Meta-Analysis as Topic'], 'systematic review': ['Systematic Reviews as Topic'], 'cohort': ['Cohort Studies'], 'case-control': ['Case-Control Studies'], // Common symptoms 'pain': ['Pain'], 'fever': ['Fever'], 'cough': ['Cough'], 'fatigue': ['Fatigue'], 'headache': ['Headache'], // Effectiveness terms 'effectiveness': ['Treatment Outcome', 'Efficacy'], 'efficacy': ['Treatment Outcome'], 'outcome': ['Treatment Outcome'], 'results': ['Treatment Outcome'] }; // Field tag mappings const FIELD_TAGS = { 'title': '[ti]', 'abstract': '[ab]', 'author': '[au]', 'journal': '[ta]', 'text word': '[tw]', 'mesh': '[MeSH Terms]', 'major': '[MeSH Major Topic]', 'publication type': '[pt]', 'language': '[la]', 'publication date': '[pdat]' }; // Optimize search query by adding MeSH terms and proper formatting export async function optimizeSearchQuery(originalQuery) { const improvements = []; const meshTermsUsed = []; const fieldTagsUsed = []; // Convert to lowercase for processing const lowerQuery = originalQuery.toLowerCase(); // Split query into words and phrases const words = lowerQuery.split(/\s+/); const queryParts = []; // Track which terms we've processed to avoid duplicates const processedTerms = new Set(); // Process each word and find MeSH mappings for (let i = 0; i < words.length; i++) { const word = words[i].replace(/[^\w\s-]/g, ''); // Remove punctuation except hyphens // Check for exact matches first if (MESH_MAPPINGS[word] && !processedTerms.has(word)) { const meshTerms = MESH_MAPPINGS[word]; const meshQuery = meshTerms.map(term => `"${term}"[MeSH Terms]`).join(' OR '); if (meshTerms.length > 1) { queryParts.push(`(${meshQuery})`); } else { queryParts.push(meshQuery); } meshTermsUsed.push(...meshTerms); fieldTagsUsed.push('[MeSH Terms]'); processedTerms.add(word); improvements.push(`Added MeSH terms for "${word}": ${meshTerms.join(', ')}`); } // Check for multi-word phrases else if (i < words.length - 1) { const twoWordPhrase = `${word} ${words[i + 1]}`; const threeWordPhrase = i < words.length - 2 ? `${word} ${words[i + 1]} ${words[i + 2]}` : ''; if (MESH_MAPPINGS[threeWordPhrase] && !processedTerms.has(threeWordPhrase)) { const meshTerms = MESH_MAPPINGS[threeWordPhrase]; const meshQuery = meshTerms.map(term => `"${term}"[MeSH Terms]`).join(' OR '); if (meshTerms.length > 1) { queryParts.push(`(${meshQuery})`); } else { queryParts.push(meshQuery); } meshTermsUsed.push(...meshTerms); fieldTagsUsed.push('[MeSH Terms]'); processedTerms.add(threeWordPhrase); improvements.push(`Added MeSH terms for "${threeWordPhrase}": ${meshTerms.join(', ')}`); i += 2; // Skip next two words } else if (MESH_MAPPINGS[twoWordPhrase] && !processedTerms.has(twoWordPhrase)) { const meshTerms = MESH_MAPPINGS[twoWordPhrase]; const meshQuery = meshTerms.map(term => `"${term}"[MeSH Terms]`).join(' OR '); if (meshTerms.length > 1) { queryParts.push(`(${meshQuery})`); } else { queryParts.push(meshQuery); } meshTermsUsed.push(...meshTerms); fieldTagsUsed.push('[MeSH Terms]'); processedTerms.add(twoWordPhrase); improvements.push(`Added MeSH terms for "${twoWordPhrase}": ${meshTerms.join(', ')}`); i += 1; // Skip next word } else if (!processedTerms.has(word)) { // Keep original word with text word tag for broader search queryParts.push(`"${word}"[tw]`); fieldTagsUsed.push('[tw]'); processedTerms.add(word); } } else if (!processedTerms.has(word)) { // Single word with no MeSH mapping queryParts.push(`"${word}"[tw]`); fieldTagsUsed.push('[tw]'); processedTerms.add(word); } } // Join query parts with AND let optimizedQuery = queryParts.join(' AND '); // Add general improvements if (meshTermsUsed.length > 0) { improvements.push(`Applied MeSH standardization for better precision`); } if (fieldTagsUsed.includes('[tw]')) { improvements.push(`Added text word tags for comprehensive search`); } // Add parentheses for complex queries if (queryParts.length > 2) { improvements.push(`Structured query with proper boolean logic`); } // If no improvements were made, provide a basic optimization if (improvements.length === 0) { optimizedQuery = `"${originalQuery}"[tw]`; improvements.push(`Added text word field tag for better search targeting`); fieldTagsUsed.push('[tw]'); } // Get estimated results by running a quick search let estimatedResults; try { const searchResult = await searchPubMed(optimizedQuery, 1); estimatedResults = searchResult.count; improvements.push(`Estimated ${estimatedResults.toLocaleString()} results with optimized query`); } catch (error) { // If search fails, don't include estimated results } return { originalQuery, optimizedQuery, improvements, meshTermsUsed: [...new Set(meshTermsUsed)], // Remove duplicates fieldTagsUsed: [...new Set(fieldTagsUsed)], // Remove duplicates estimatedResults }; } // Find similar articles using PubMed's ELink API export async function findSimilarArticles(pmid, maxResults = 10) { try { // Step 1: Use ELink to get similar articles from PubMed const elinkParams = { dbfrom: 'pubmed', db: 'pubmed', id: pmid, linkname: 'pubmed_pubmed', // Use the full similar articles linkname cmd: 'neighbor', retmode: 'xml', retmax: maxResults + 10, // Request extra to account for filtering tool: 'mcp-pubmed-server', email: 'user@example.com' }; const elinkUrl = buildUrl(ELINK_URL, elinkParams); const elinkResponse = await fetch(elinkUrl); if (!elinkResponse.ok) { throw new Error(`HTTP error! status: ${elinkResponse.status}`); } const elinkXml = await elinkResponse.text(); const elinkParsed = await parseXML(elinkXml); // Debug: Log the raw response structure console.error('ELink Response:', JSON.stringify(elinkParsed, null, 2).substring(0, 500)); // Extract similar PMIDs with scores // Check if we have any LinkSet if (!elinkParsed.eLinkResult || !elinkParsed.eLinkResult.LinkSet) { console.error('No LinkSet in response'); return []; } const linkSets = Array.isArray(elinkParsed.eLinkResult.LinkSet) ? elinkParsed.eLinkResult.LinkSet : [elinkParsed.eLinkResult.LinkSet]; const linkSet = linkSets[0]; if (!linkSet) { console.error('Empty LinkSet'); return []; } // Check if we have LinkSetDb (contains the similar articles) if (!linkSet.LinkSetDb) { console.error('No LinkSetDb found - article may not have similar articles'); return []; } const linkSetDbs = Array.isArray(linkSet.LinkSetDb) ? linkSet.LinkSetDb : [linkSet.LinkSetDb]; // Find the linkSetDb with our linkname let targetLinkSetDb = null; for (const lsdb of linkSetDbs) { if (lsdb.LinkName === 'pubmed_pubmed' || lsdb.LinkName === 'pubmed_pubmed_five') { targetLinkSetDb = lsdb; break; } } if (!targetLinkSetDb || !targetLinkSetDb.Link) { console.error('No similar articles links found'); return []; } const similarLinks = Array.isArray(targetLinkSetDb.Link) ? targetLinkSetDb.Link : [targetLinkSetDb.Link]; console.error(`Found ${similarLinks.length} similar articles for PMID ${pmid}`); // Get PMIDs and scores const similarPmids = similarLinks .slice(0, maxResults + 1) // Get one extra in case we need to filter out the original .map((link) => { // Handle both direct ID and nested structure const linkId = link.Id || link; return { pmid: linkId.toString(), score: link.Score ? link.Score.toString() : undefined }; }) .filter((item) => item.pmid && item.pmid !== pmid) // Exclude the original article .slice(0, maxResults); // Ensure we don't exceed maxResults if (similarPmids.length === 0) { return []; } // Step 2: Get detailed information for similar articles const pmidList = similarPmids.map(item => item.pmid); const articles = await getArticleDetails(pmidList); // Step 3: Combine article details with similarity scores const results = articles.map((article, index) => { const similarItem = similarPmids.find(item => item.pmid === article.pmid); return { pmid: article.pmid, title: article.title, authors: article.authors, journal: article.journal, publicationDate: article.publicationDate, abstract: article.abstract, similarityScore: similarItem?.score ? parseFloat(similarItem.score) : undefined, doi: article.doi, pmcId: article.pmcId }; }); // Sort by similarity score (higher is better) results.sort((a, b) => { if (a.similarityScore && b.similarityScore) { return b.similarityScore - a.similarityScore; } return 0; }); return results; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Failed to find similar articles: ${errorMessage}`); } } // Utility function to delay execution function delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } // Batch processing function export async function batchProcess(pmids, operations, maxConcurrency = 3) { const taskId = `batch_${Date.now()}`; const batchOperations = []; // Initialize operations for each PMID for (const pmid of pmids) { for (const operation of operations) { batchOperations.push({ pmid, operation: operation, status: 'pending' }); } } const results = {}; // Group operations by type for efficiency const operationGroups = {}; for (const pmid of pmids) { for (const operation of operations) { if (!operationGroups[operation]) { operationGroups[operation] = []; } operationGroups[operation].push(pmid); } } try { // Process each operation type for (const [operation, pmidList] of Object.entries(operationGroups)) { console.error(`Processing ${operation} for ${pmidList.length} PMIDs...`); // Update status to processing batchOperations .filter(op => op.operation === operation) .forEach(op => op.status = 'processing'); try { switch (operation) { case 'abstract': // Process in chunks to respect rate limits results.abstracts = []; for (let i = 0; i < pmidList.length; i += 20) { const chunk = pmidList.slice(i, i + 20); const abstracts = await getFullAbstract(chunk); results.abstracts.push(...abstracts); if (i + 20 < pmidList.length) { await delay(300); // Rate limiting } } break; case 'citations': results.citations = []; console.error(`Processing citations for ${pmidList.length} PMIDs...`); for (let i = 0; i < pmidList.length; i += 10) { const chunk = pmidList.slice(i, i + 10); console.error(`Processing citation chunk ${i / 10 + 1}: PMIDs ${chunk.join(', ')}`); const citations = await getCitationCounts(chunk); console.error(`Got ${citations.length} citation results for chunk`); results.citations.push(...citations); if (i + 10 < pmidList.length) { await delay(400); // Rate limiting } } console.error(`Total citation results: ${results.citations.length}`); break; case 'similar': results.similar = {}; for (const pmid of pmidList) { try { const similar = await findSimilarArticles(pmid, 5); // Limit to 5 for batch results.similar[pmid] = similar; await delay(300); // Rate limiting } catch (error) { console.error(`Error finding similar articles for ${pmid}:`, error); results.similar[pmid] = []; } } break; case 'ris_export': // Process in chunks for RIS export const risChunks = []; for (let i = 0; i < pmidList.length; i += 50) { const chunk = pmidList.slice(i, i + 50); try { const risResult = await exportRIS(chunk); if (risResult.risData) { risChunks.push(risResult.risData); } if (i + 50 < pmidList.length) { await delay(500); // Rate limiting } } catch (error) { console.error(`Error exporting RIS for chunk:`, error); } } results.risExports = risChunks.join('\n\n'); break; case 'full_text': results.fullTexts = []; // Filter PMIDs that have PMC IDs first const articlesWithPMC = await getArticleDetails(pmidList); const pmcIds = articlesWithPMC .filter(article => article.pmcId) .map(article => article.pmcId); if (pmcIds.length > 0) { for (let i = 0; i < pmcIds.length; i += 10) { const chunk = pmcIds.slice(i, i + 10); try { const fullTexts = await getFullText(chunk); results.fullTexts.push(...fullTexts); if (i + 10 < pmcIds.length) { await delay(600); // Rate limiting } } catch (error) { console.error(`Error getting full text for chunk:`, error); } } } break; } // Mark operations as completed batchOperations .filter(op => op.operation === operation) .forEach(op => op.status = 'completed'); } catch (error) { console.error(`Error processing ${operation}:`, error); // Mark operations as failed batchOperations .filter(op => op.operation === operation) .forEach(op => { op.status = 'error'; op.error = error instanceof Error ? error.message : String(error); }); } } // Calculate summary const summary = { total: batchOperations.length, completed: batchOperations.filter(op => op.status === 'completed').length, failed: batchOperations.filter(op => op.status === 'error').length, processing: batchOperations.filter(op => op.status === 'processing').length }; return { taskId, operations: batchOperations, summary, results }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new Error(`Batch processing failed: ${errorMessage}`); } } //# sourceMappingURL=pubmed-api.js.map