UNPKG

@cyanheads/pubmed-mcp-server

Version:

Search PubMed/Europe PMC, fetch articles and full text (PMC/EPMC/Unpaywall), citations, MeSH terms via MCP. STDIO or Streamable HTTP.

284 lines 13.6 kB
/** * @fileoverview PubMed fetch tool. Fetches full article metadata by PubMed IDs, * including abstracts, authors, journal info, and MeSH terms. * @module src/mcp-server/tools/definitions/fetch-articles.tool */ import { tool, z } from '@cyanheads/mcp-ts-core'; import { JsonRpcErrorCode } from '@cyanheads/mcp-ts-core/errors'; import { NCBI_SERVICE_ERRORS } from '../../../services/error-contracts.js'; import { getNcbiService } from '../../../services/ncbi/ncbi-service.js'; import { parseFullArticle } from '../../../services/ncbi/parsing/article-parser.js'; import { ensureArray } from '../../../services/ncbi/parsing/xml-helpers.js'; import { conceptMeta, EDAM_DATA_RETRIEVAL, EDAM_PUBMED_ID, SCHEMA_SCHOLARLY_ARTICLE, } from './_concepts.js'; import { pmidStringSchema } from './_schemas.js'; const AuthorSchema = z .object({ lastName: z.string().optional().describe('Last name'), firstName: z.string().optional().describe('First/given name'), initials: z.string().optional().describe('Author initials'), collectiveName: z.string().optional().describe('Group/collective author name'), affiliationIndices: z .array(z.number()) .optional() .describe('Indices into the top-level affiliations array'), orcid: z.string().optional().describe('ORCID identifier'), }) .describe('Author record'); const JournalPublicationDateSchema = z .object({ year: z.string().optional().describe('Publication year'), month: z.string().optional().describe('Publication month'), day: z.string().optional().describe('Publication day'), medlineDate: z.string().optional().describe('Non-standard date string (e.g. "2000 Spring")'), }) .describe('Journal publication date'); const JournalInfoSchema = z .object({ title: z.string().optional().describe('Full journal title'), isoAbbreviation: z.string().optional().describe('ISO journal abbreviation'), issn: z.string().optional().describe('Print ISSN'), eIssn: z.string().optional().describe('Electronic ISSN'), volume: z.string().optional().describe('Volume number'), issue: z.string().optional().describe('Issue number'), pages: z.string().optional().describe('Page range (e.g. "48-55")'), publicationDate: JournalPublicationDateSchema.optional(), }) .describe('Journal information'); const MeshQualifierSchema = z .object({ qualifierName: z.string().describe('Qualifier/subheading name'), qualifierUi: z.string().optional().describe('Qualifier unique ID'), isMajorTopic: z.boolean().describe('Whether this qualifier is a major topic'), }) .describe('MeSH qualifier/subheading'); const MeshTermSchema = z .object({ descriptorName: z.string().optional().describe('MeSH descriptor name'), descriptorUi: z.string().optional().describe('MeSH descriptor unique ID'), isMajorTopic: z.boolean().describe('Whether this is a major topic of the article'), qualifiers: z.array(MeshQualifierSchema).optional().describe('MeSH qualifiers/subheadings'), }) .describe('MeSH descriptor term'); const GrantSchema = z .object({ grantId: z.string().optional().describe('Grant identifier'), acronym: z.string().optional().describe('Grant acronym'), agency: z.string().optional().describe('Funding agency'), country: z.string().optional().describe('Agency country'), }) .describe('Grant record'); const ArticleDateSchema = z .object({ dateType: z.string().optional().describe('Date type'), year: z.string().optional().describe('Year'), month: z.string().optional().describe('Month'), day: z.string().optional().describe('Day'), }) .describe('Dated article event'); const FetchedArticleSchema = z .object({ pmid: z.string().optional().describe('PubMed ID'), title: z.string().optional().describe('Article title'), abstractText: z.string().optional().describe('Abstract text'), affiliations: z.array(z.string()).optional().describe('Deduplicated author affiliations'), authors: z.array(AuthorSchema).optional().describe('Author list'), journalInfo: JournalInfoSchema.optional(), doi: z.string().optional().describe('DOI'), pmcId: z.string().optional().describe('PMC ID'), pubmedUrl: z.string().optional().describe('PubMed article URL'), pmcUrl: z.string().optional().describe('PMC full text URL'), publicationTypes: z.array(z.string()).optional().describe('Publication types'), keywords: z.array(z.string()).optional().describe('Keywords'), meshTerms: z.array(MeshTermSchema).optional().describe('MeSH terms'), grantList: z.array(GrantSchema).optional().describe('Grant information'), articleDates: z.array(ArticleDateSchema).optional().describe('Article dates'), }) .describe('Parsed PubMed article'); export const fetchArticlesTool = tool('pubmed_fetch_articles', { description: 'Fetch full article metadata by PubMed IDs. Returns detailed article information including abstract, authors, journal, MeSH terms.', annotations: { readOnlyHint: true, openWorldHint: true }, _meta: conceptMeta([SCHEMA_SCHOLARLY_ARTICLE, EDAM_DATA_RETRIEVAL, EDAM_PUBMED_ID]), sourceUrl: 'https://github.com/cyanheads/pubmed-mcp-server/blob/main/src/mcp-server/tools/definitions/fetch-articles.tool.ts', errors: [ ...NCBI_SERVICE_ERRORS, { reason: 'invalid_efetch_response', code: JsonRpcErrorCode.SerializationError, when: 'NCBI EFetch returned a payload missing the PubmedArticleSet wrapper.', recovery: 'Retry once; if it persists, NCBI returned malformed data — try fewer PMIDs at once.', }, ], input: z.object({ pmids: z.array(pmidStringSchema).min(1).max(200).describe('PubMed IDs to fetch'), includeMesh: z.boolean().default(true).describe('Include MeSH terms'), includeGrants: z.boolean().default(false).describe('Include grant information'), }), output: z.object({ articles: z.array(FetchedArticleSchema).describe('Parsed articles'), totalReturned: z.number().describe('Number of articles returned'), unavailablePmids: z .array(z.string()) .optional() .describe('PMIDs that returned no article data'), }), async handler(input, ctx) { ctx.log.info('Executing pubmed_fetch', { pmidCount: input.pmids.length }); const xmlData = await getNcbiService().eFetch({ db: 'pubmed', id: input.pmids.join(','), retmode: 'xml' }, { retmode: 'xml', usePost: input.pmids.length >= 100, signal: ctx.signal }); if (!xmlData || !('PubmedArticleSet' in xmlData)) { throw ctx.fail('invalid_efetch_response', 'Invalid EFetch response from NCBI: missing PubmedArticleSet', { requestedPmids: input.pmids.length, ...ctx.recoveryFor('invalid_efetch_response') }); } const rawArticles = xmlData.PubmedArticleSet?.PubmedArticle; const xmlArticles = rawArticles ? ensureArray(rawArticles) : []; const articles = xmlArticles .filter((a) => a?.MedlineCitation) .map((a) => { const parsed = parseFullArticle(a, { includeMesh: input.includeMesh, includeGrants: input.includeGrants, }); return { ...parsed, pubmedUrl: `https://pubmed.ncbi.nlm.nih.gov/${parsed.pmid}/`, ...(parsed.pmcId && { pmcUrl: `https://www.ncbi.nlm.nih.gov/pmc/articles/${parsed.pmcId}/`, }), }; }); const returnedPmids = new Set(articles.map((a) => a.pmid).filter(Boolean)); const unavailable = input.pmids.filter((id) => !returnedPmids.has(id)); ctx.log.info('pubmed_fetch completed', { requested: input.pmids.length, returned: articles.length, }); return { articles, totalReturned: articles.length, ...(unavailable.length > 0 && { unavailablePmids: unavailable }), }; }, format: (result) => { const lines = [`## PubMed Articles`, `**Articles Returned:** ${result.totalReturned}`]; if (result.unavailablePmids?.length) { lines.push(`**Unavailable PMIDs:** ${result.unavailablePmids.join(', ')}`); } if (result.totalReturned === 0) { lines.push(`\n> No articles were returned. These PMIDs may be invalid, unpublished, or withdrawn. Try \`pubmed_search_articles\` to discover valid PMIDs.`); } for (const a of result.articles) { lines.push(`\n### ${a.title ?? a.pmid ?? 'Unknown'}`); if (a.authors?.length) { lines.push(`\n**Authors (${a.authors.length}):**`); for (const au of a.authors) { lines.push(`- ${formatAuthor(au)}`); } } if (a.affiliations?.length) { lines.push(`\n**Affiliations:**`); for (const [i, aff] of a.affiliations.entries()) { lines.push(`- [${i}] ${aff}`); } } const ji = a.journalInfo; if (ji) { const parts = []; if (ji.title) parts.push(ji.title); if (ji.isoAbbreviation && ji.isoAbbreviation !== ji.title) { parts.push(ji.title ? `(${ji.isoAbbreviation})` : ji.isoAbbreviation); } const pubDateStr = formatPublicationDate(ji.publicationDate); if (pubDateStr) parts.push(pubDateStr); if (ji.volume) parts.push(`**${ji.volume}**${ji.issue ? `(${ji.issue})` : ''}`); if (ji.pages) parts.push(ji.pages); if (ji.issn) parts.push(`ISSN ${ji.issn}`); if (ji.eIssn) parts.push(`eISSN ${ji.eIssn}`); if (parts.length) lines.push(`\n**Journal:** ${parts.join(', ')}`); } if (a.publicationTypes?.length) lines.push(`**Type:** ${a.publicationTypes.join(', ')}`); if (a.pmid) lines.push(`**PMID:** ${a.pmid}`); if (a.doi) lines.push(`**DOI:** ${a.doi}`); if (a.pmcId) lines.push(`**PMCID:** ${a.pmcId}`); if (a.pubmedUrl) lines.push(`**PubMed:** ${a.pubmedUrl}`); if (a.pmcUrl) lines.push(`**PMC:** ${a.pmcUrl}`); if (a.articleDates?.length) { lines.push(`**Article Dates:** ${a.articleDates.map(formatArticleDate).join('; ')}`); } if (a.abstractText) lines.push(`\n#### Abstract\n${a.abstractText}`); if (a.keywords?.length) lines.push(`\n**Keywords:** ${a.keywords.join(', ')}`); if (a.meshTerms?.length) { lines.push(`\n#### MeSH Terms`); for (const m of a.meshTerms) { const descriptor = m.descriptorUi ? `${m.descriptorName} [${m.descriptorUi}]` : m.descriptorName; const major = m.isMajorTopic ? ' (major)' : ''; const qualifiers = m.qualifiers?.length ? ` (${m.qualifiers .map((q) => { const name = q.qualifierUi ? `${q.qualifierName} [${q.qualifierUi}]` : q.qualifierName; return `${name}${q.isMajorTopic ? ' (major)' : ''}`; }) .join(', ')})` : ''; lines.push(`- ${descriptor}${major}${qualifiers}`); } } if (a.grantList?.length) { lines.push(`\n#### Grants`); for (const g of a.grantList) { const grantId = g.grantId && g.acronym ? `${g.grantId} (${g.acronym})` : (g.grantId ?? g.acronym ?? ''); const parts = [grantId, g.agency, g.country].filter(Boolean); lines.push(`- ${parts.join(' — ')}`); } } } return [{ type: 'text', text: lines.join('\n') }]; }, }); function formatAuthor(au) { const parts = []; if (au.collectiveName) parts.push(`${au.collectiveName} (collective)`); const name = [au.firstName, au.lastName].filter(Boolean).join(' '); if (name) parts.push(name); else if (au.initials) parts.push(au.initials); if (au.initials && name) parts.push(`(${au.initials})`); if (au.affiliationIndices?.length) { parts.push(`[aff ${au.affiliationIndices.join(',')}]`); } if (au.orcid) parts.push(`· ORCID ${au.orcid}`); return parts.join(' ') || 'Unknown'; } function formatPublicationDate(pd) { if (!pd) return; const ymd = [pd.year, pd.month, pd.day].filter(Boolean).join(' '); if (pd.medlineDate && ymd) return `${pd.medlineDate} (${ymd})`; return pd.medlineDate || ymd || undefined; } function formatArticleDate(ad) { const datePart = [ad.year, ad.month, ad.day].filter(Boolean).join('-'); return ad.dateType ? `${ad.dateType} ${datePart}` : datePart; } //# sourceMappingURL=fetch-articles.tool.js.map