UNPKG

@cyanheads/pubmed-mcp-server

Version:

A Model Context Protocol (MCP) server enabling AI agents to intelligently search, retrieve, and analyze biomedical literature from PubMed via NCBI E-utilities. Built on the mcp-ts-template for robust, production-ready performance.

311 lines (310 loc) 13.6 kB
/** * @fileoverview Logic for the fetch_pubmed_content MCP tool. * Handles EFetch queries for specific PMIDs and formats the results. * This tool can fetch various details from PubMed including abstracts, full XML, * MEDLINE text, and citation data. * @module src/mcp-server/tools/fetchPubMedContent/logic */ import { z } from "zod"; import { getNcbiService } from "../../../services/NCBI/ncbiService.js"; import { BaseErrorCode, McpError } from "../../../types-global/errors.js"; import { logger, requestContextService, sanitizeInputForLogging, } from "../../../utils/index.js"; import { ensureArray, extractAbstractText, extractArticleDates, extractAuthors, extractDoi, extractGrants, extractJournalInfo, extractKeywords, extractMeshTerms, extractPmid, extractPublicationTypes, getText, } from "../../../utils/parsing/ncbi-parsing/index.js"; export const FetchPubMedContentInputSchema = z .object({ pmids: z .array(z.string().regex(/^\d+$/)) .max(200, "Max 200 PMIDs per call if not using history.") .optional() .describe("An array of PubMed Unique Identifiers (PMIDs) for which to fetch content. Use this OR queryKey/webEnv."), queryKey: z .string() .optional() .describe("Query key from ESearch history server. If used, webEnv must also be provided. Use this OR pmids."), webEnv: z .string() .optional() .describe("Web environment from ESearch history server. If used, queryKey must also be provided. Use this OR pmids."), retstart: z .number() .int() .min(0) .optional() .describe("Sequential index of the first record to retrieve (0-based). Used with queryKey/webEnv."), retmax: z .number() .int() .min(1) .optional() .describe("Maximum number of records to retrieve. Used with queryKey/webEnv."), detailLevel: z .enum(["abstract_plus", "full_xml", "medline_text", "citation_data"]) .optional() .default("abstract_plus") .describe("Specifies the level of detail for the fetched content. Options: 'abstract_plus' (parsed details including abstract, authors, journal, DOI, etc.), 'full_xml' (raw PubMedArticle XML), 'medline_text' (MEDLINE format), 'citation_data' (minimal parsed data for citations). Defaults to 'abstract_plus'."), includeMeshTerms: z .boolean() .optional() .default(true) .describe("Applies to 'abstract_plus' and 'citation_data' if parsed from XML."), includeGrantInfo: z .boolean() .optional() .default(false) .describe("Applies to 'abstract_plus' if parsed from XML."), outputFormat: z .enum(["json", "raw_text"]) .optional() .default("json") .describe("Specifies the final output format of the tool. \n- 'json' (default): Wraps the data in a standard JSON object. \n- 'raw_text': Returns raw text for 'medline_text' or 'full_xml' detailLevels. For other detailLevels, 'outputFormat' defaults to 'json'."), }) .superRefine((data, ctx) => { if (data.queryKey && !data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "webEnv is required if queryKey is provided.", path: ["webEnv"], }); } if (!data.queryKey && data.webEnv) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "queryKey is required if webEnv is provided.", path: ["queryKey"], }); } if ((!data.pmids || data.pmids.length === 0) && !(data.queryKey && data.webEnv)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Either pmids (non-empty array) or both queryKey and webEnv must be provided.", path: ["pmids"], }); } if (data.pmids && data.pmids.length > 0 && (data.queryKey || data.webEnv)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "Cannot use pmids and queryKey/webEnv simultaneously. Please choose one method.", path: ["pmids"], }); } if ((data.retstart !== undefined || data.retmax !== undefined) && !(data.queryKey && data.webEnv)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: "retstart/retmax can only be used with queryKey and webEnv.", path: ["retstart"], }); } }); function parsePubMedArticleSet(xmlData, input, parentContext) { const articles = []; const operationContext = requestContextService.createRequestContext({ parentRequestId: parentContext.requestId, operation: "parsePubMedArticleSet", }); if (!xmlData || typeof xmlData !== "object" || !("PubmedArticleSet" in xmlData)) { throw new McpError(BaseErrorCode.PARSING_ERROR, "Invalid or unexpected structure for xmlData in parsePubMedArticleSet.", { ...operationContext, xmlDataType: typeof xmlData, xmlDataPreview: sanitizeInputForLogging(JSON.stringify(xmlData).substring(0, 200)), }); } const typedXmlData = xmlData; const articleSet = typedXmlData.PubmedArticleSet; if (!articleSet || !articleSet.PubmedArticle) { logger.warning("PubmedArticleSet or PubmedArticle array not found in EFetch XML response.", operationContext); return articles; } const pubmedArticlesXml = ensureArray(articleSet.PubmedArticle); for (const articleXml of pubmedArticlesXml) { if (!articleXml || typeof articleXml !== "object") continue; const medlineCitation = articleXml.MedlineCitation; if (!medlineCitation) continue; const pmid = extractPmid(medlineCitation); if (!pmid) continue; const articleNode = medlineCitation.Article; const parsedArticle = { pmid: pmid, title: articleNode?.ArticleTitle ? getText(articleNode.ArticleTitle) : undefined, abstractText: articleNode?.Abstract ? extractAbstractText(articleNode.Abstract) : undefined, authors: articleNode?.AuthorList ? extractAuthors(articleNode.AuthorList) : undefined, journalInfo: articleNode?.Journal ? extractJournalInfo(articleNode.Journal, medlineCitation) : undefined, publicationTypes: articleNode?.PublicationTypeList ? extractPublicationTypes(articleNode.PublicationTypeList) : undefined, keywords: articleNode?.KeywordList ? extractKeywords(articleNode.KeywordList) : undefined, doi: articleNode ? extractDoi(articleNode) : undefined, articleDates: articleNode?.ArticleDate ? extractArticleDates(articleNode) : undefined, }; if (input.includeMeshTerms) { parsedArticle.meshTerms = medlineCitation.MeshHeadingList ? extractMeshTerms(medlineCitation.MeshHeadingList) : undefined; } if (input.includeGrantInfo) { parsedArticle.grantList = articleNode?.GrantList ? extractGrants(articleNode.GrantList) : undefined; } articles.push(parsedArticle); } return articles; } export async function fetchPubMedContentLogic(input, parentRequestContext) { const toolLogicContext = requestContextService.createRequestContext({ parentRequestId: parentRequestContext.requestId, operation: "fetchPubMedContentLogic", input: sanitizeInputForLogging(input), }); const validationResult = FetchPubMedContentInputSchema.safeParse(input); if (!validationResult.success) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, validationResult.error.errors[0]?.message || "Invalid input", { ...toolLogicContext, details: validationResult.error.flatten() }); } const ncbiService = getNcbiService(); logger.info("Executing fetch_pubmed_content tool", toolLogicContext); const eFetchParams = { db: "pubmed" }; let usingHistory = false; if (input.queryKey && input.webEnv) { usingHistory = true; eFetchParams.query_key = input.queryKey; eFetchParams.WebEnv = input.webEnv; if (input.retstart !== undefined) eFetchParams.retstart = String(input.retstart); if (input.retmax !== undefined) eFetchParams.retmax = String(input.retmax); } else if (input.pmids && input.pmids.length > 0) { eFetchParams.id = input.pmids.join(","); } let serviceRetmode = "xml"; let rettype; switch (input.detailLevel) { case "full_xml": serviceRetmode = "xml"; break; case "medline_text": serviceRetmode = "text"; rettype = "medline"; break; case "abstract_plus": case "citation_data": serviceRetmode = "xml"; break; } eFetchParams.retmode = serviceRetmode; if (rettype) eFetchParams.rettype = rettype; const eFetchBase = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"; const eFetchQueryString = new URLSearchParams(eFetchParams).toString(); const eFetchUrl = `${eFetchBase}?${eFetchQueryString}`; const shouldReturnRawXml = input.detailLevel === "full_xml" && input.outputFormat === "raw_text"; const eFetchResponseData = await ncbiService.eFetch(eFetchParams, toolLogicContext, { retmode: serviceRetmode, rettype, returnRawXml: shouldReturnRawXml }); let finalOutputText; let articlesCount = 0; if (input.detailLevel === "medline_text") { const medlineText = String(eFetchResponseData); const foundPmidsInMedline = new Set(); const pmidRegex = /^PMID- (\d+)/gm; let match; while ((match = pmidRegex.exec(medlineText)) !== null) { foundPmidsInMedline.add(match[1]); } articlesCount = foundPmidsInMedline.size; if (input.outputFormat === "raw_text") { finalOutputText = medlineText; } else { const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInMedline.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: [{ medlineText }], notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else if (input.detailLevel === "full_xml") { if (input.outputFormat === "raw_text") { finalOutputText = String(eFetchResponseData); articlesCount = (finalOutputText.match(/<PubmedArticle>/g) || []).length; } else { const articlesXml = ensureArray(eFetchResponseData?.PubmedArticleSet?.PubmedArticle || []); articlesCount = articlesXml.length; const foundPmidsInXml = new Set(); const articlesPayload = articlesXml.map((articleXml) => { const pmid = extractPmid(articleXml.MedlineCitation) || "unknown_pmid"; if (pmid !== "unknown_pmid") foundPmidsInXml.add(pmid); return { pmid, fullXmlContent: articleXml }; }); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInXml.has(pmid)) || []; finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesPayload, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } } else { const parsedArticles = parsePubMedArticleSet(eFetchResponseData, input, toolLogicContext); articlesCount = parsedArticles.length; const foundPmids = new Set(parsedArticles.map((p) => p.pmid)); const notFoundPmids = input.pmids?.filter((pmid) => !foundPmids.has(pmid)) || []; let articlesToReturn = parsedArticles; if (input.detailLevel === "citation_data") { articlesToReturn = parsedArticles.map((article) => ({ pmid: article.pmid, title: article.title, authors: article.authors?.map((a) => ({ lastName: a.lastName, initials: a.initials, })), journalInfo: { title: article.journalInfo?.title, isoAbbreviation: article.journalInfo?.isoAbbreviation, volume: article.journalInfo?.volume, issue: article.journalInfo?.issue, pages: article.journalInfo?.pages, year: article.journalInfo?.publicationDate?.year, }, doi: article.doi, ...(input.includeMeshTerms && { meshTerms: article.meshTerms }), })); } finalOutputText = JSON.stringify({ requestedPmids: input.pmids || "N/A (history query)", articles: articlesToReturn, notFoundPmids, eFetchDetails: { urls: [eFetchUrl] }, }); } logger.notice("Successfully executed fetch_pubmed_content tool.", { ...toolLogicContext, articlesReturned: articlesCount, }); return { content: finalOutputText, articlesReturned: articlesCount, eFetchUrl, }; }