@cyanheads/pubmed-mcp-server
Version:
A Model Context Protocol (MCP) server enabling AI agents to intelligently search, retrieve, and analyze biomedical literature from PubMed via NCBI E-utilities. Built on the mcp-ts-template for robust, production-ready performance.
311 lines (310 loc) • 13.6 kB
JavaScript
/**
* @fileoverview Logic for the fetch_pubmed_content MCP tool.
* Handles EFetch queries for specific PMIDs and formats the results.
* This tool can fetch various details from PubMed including abstracts, full XML,
* MEDLINE text, and citation data.
* @module src/mcp-server/tools/fetchPubMedContent/logic
*/
import { z } from "zod";
import { getNcbiService } from "../../../services/NCBI/ncbiService.js";
import { BaseErrorCode, McpError } from "../../../types-global/errors.js";
import { logger, requestContextService, sanitizeInputForLogging, } from "../../../utils/index.js";
import { ensureArray, extractAbstractText, extractArticleDates, extractAuthors, extractDoi, extractGrants, extractJournalInfo, extractKeywords, extractMeshTerms, extractPmid, extractPublicationTypes, getText, } from "../../../utils/parsing/ncbi-parsing/index.js";
export const FetchPubMedContentInputSchema = z
.object({
pmids: z
.array(z.string().regex(/^\d+$/))
.max(200, "Max 200 PMIDs per call if not using history.")
.optional()
.describe("An array of PubMed Unique Identifiers (PMIDs) for which to fetch content. Use this OR queryKey/webEnv."),
queryKey: z
.string()
.optional()
.describe("Query key from ESearch history server. If used, webEnv must also be provided. Use this OR pmids."),
webEnv: z
.string()
.optional()
.describe("Web environment from ESearch history server. If used, queryKey must also be provided. Use this OR pmids."),
retstart: z
.number()
.int()
.min(0)
.optional()
.describe("Sequential index of the first record to retrieve (0-based). Used with queryKey/webEnv."),
retmax: z
.number()
.int()
.min(1)
.optional()
.describe("Maximum number of records to retrieve. Used with queryKey/webEnv."),
detailLevel: z
.enum(["abstract_plus", "full_xml", "medline_text", "citation_data"])
.optional()
.default("abstract_plus")
.describe("Specifies the level of detail for the fetched content. Options: 'abstract_plus' (parsed details including abstract, authors, journal, DOI, etc.), 'full_xml' (raw PubMedArticle XML), 'medline_text' (MEDLINE format), 'citation_data' (minimal parsed data for citations). Defaults to 'abstract_plus'."),
includeMeshTerms: z
.boolean()
.optional()
.default(true)
.describe("Applies to 'abstract_plus' and 'citation_data' if parsed from XML."),
includeGrantInfo: z
.boolean()
.optional()
.default(false)
.describe("Applies to 'abstract_plus' if parsed from XML."),
outputFormat: z
.enum(["json", "raw_text"])
.optional()
.default("json")
.describe("Specifies the final output format of the tool. \n- 'json' (default): Wraps the data in a standard JSON object. \n- 'raw_text': Returns raw text for 'medline_text' or 'full_xml' detailLevels. For other detailLevels, 'outputFormat' defaults to 'json'."),
})
.superRefine((data, ctx) => {
if (data.queryKey && !data.webEnv) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "webEnv is required if queryKey is provided.",
path: ["webEnv"],
});
}
if (!data.queryKey && data.webEnv) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "queryKey is required if webEnv is provided.",
path: ["queryKey"],
});
}
if ((!data.pmids || data.pmids.length === 0) &&
!(data.queryKey && data.webEnv)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Either pmids (non-empty array) or both queryKey and webEnv must be provided.",
path: ["pmids"],
});
}
if (data.pmids && data.pmids.length > 0 && (data.queryKey || data.webEnv)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Cannot use pmids and queryKey/webEnv simultaneously. Please choose one method.",
path: ["pmids"],
});
}
if ((data.retstart !== undefined || data.retmax !== undefined) &&
!(data.queryKey && data.webEnv)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "retstart/retmax can only be used with queryKey and webEnv.",
path: ["retstart"],
});
}
});
function parsePubMedArticleSet(xmlData, input, parentContext) {
const articles = [];
const operationContext = requestContextService.createRequestContext({
parentRequestId: parentContext.requestId,
operation: "parsePubMedArticleSet",
});
if (!xmlData ||
typeof xmlData !== "object" ||
!("PubmedArticleSet" in xmlData)) {
throw new McpError(BaseErrorCode.PARSING_ERROR, "Invalid or unexpected structure for xmlData in parsePubMedArticleSet.", {
...operationContext,
xmlDataType: typeof xmlData,
xmlDataPreview: sanitizeInputForLogging(JSON.stringify(xmlData).substring(0, 200)),
});
}
const typedXmlData = xmlData;
const articleSet = typedXmlData.PubmedArticleSet;
if (!articleSet || !articleSet.PubmedArticle) {
logger.warning("PubmedArticleSet or PubmedArticle array not found in EFetch XML response.", operationContext);
return articles;
}
const pubmedArticlesXml = ensureArray(articleSet.PubmedArticle);
for (const articleXml of pubmedArticlesXml) {
if (!articleXml || typeof articleXml !== "object")
continue;
const medlineCitation = articleXml.MedlineCitation;
if (!medlineCitation)
continue;
const pmid = extractPmid(medlineCitation);
if (!pmid)
continue;
const articleNode = medlineCitation.Article;
const parsedArticle = {
pmid: pmid,
title: articleNode?.ArticleTitle
? getText(articleNode.ArticleTitle)
: undefined,
abstractText: articleNode?.Abstract
? extractAbstractText(articleNode.Abstract)
: undefined,
authors: articleNode?.AuthorList
? extractAuthors(articleNode.AuthorList)
: undefined,
journalInfo: articleNode?.Journal
? extractJournalInfo(articleNode.Journal, medlineCitation)
: undefined,
publicationTypes: articleNode?.PublicationTypeList
? extractPublicationTypes(articleNode.PublicationTypeList)
: undefined,
keywords: articleNode?.KeywordList
? extractKeywords(articleNode.KeywordList)
: undefined,
doi: articleNode ? extractDoi(articleNode) : undefined,
articleDates: articleNode?.ArticleDate
? extractArticleDates(articleNode)
: undefined,
};
if (input.includeMeshTerms) {
parsedArticle.meshTerms = medlineCitation.MeshHeadingList
? extractMeshTerms(medlineCitation.MeshHeadingList)
: undefined;
}
if (input.includeGrantInfo) {
parsedArticle.grantList = articleNode?.GrantList
? extractGrants(articleNode.GrantList)
: undefined;
}
articles.push(parsedArticle);
}
return articles;
}
export async function fetchPubMedContentLogic(input, parentRequestContext) {
const toolLogicContext = requestContextService.createRequestContext({
parentRequestId: parentRequestContext.requestId,
operation: "fetchPubMedContentLogic",
input: sanitizeInputForLogging(input),
});
const validationResult = FetchPubMedContentInputSchema.safeParse(input);
if (!validationResult.success) {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, validationResult.error.errors[0]?.message || "Invalid input", { ...toolLogicContext, details: validationResult.error.flatten() });
}
const ncbiService = getNcbiService();
logger.info("Executing fetch_pubmed_content tool", toolLogicContext);
const eFetchParams = { db: "pubmed" };
let usingHistory = false;
if (input.queryKey && input.webEnv) {
usingHistory = true;
eFetchParams.query_key = input.queryKey;
eFetchParams.WebEnv = input.webEnv;
if (input.retstart !== undefined)
eFetchParams.retstart = String(input.retstart);
if (input.retmax !== undefined)
eFetchParams.retmax = String(input.retmax);
}
else if (input.pmids && input.pmids.length > 0) {
eFetchParams.id = input.pmids.join(",");
}
let serviceRetmode = "xml";
let rettype;
switch (input.detailLevel) {
case "full_xml":
serviceRetmode = "xml";
break;
case "medline_text":
serviceRetmode = "text";
rettype = "medline";
break;
case "abstract_plus":
case "citation_data":
serviceRetmode = "xml";
break;
}
eFetchParams.retmode = serviceRetmode;
if (rettype)
eFetchParams.rettype = rettype;
const eFetchBase = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";
const eFetchQueryString = new URLSearchParams(eFetchParams).toString();
const eFetchUrl = `${eFetchBase}?${eFetchQueryString}`;
const shouldReturnRawXml = input.detailLevel === "full_xml" && input.outputFormat === "raw_text";
const eFetchResponseData = await ncbiService.eFetch(eFetchParams, toolLogicContext, { retmode: serviceRetmode, rettype, returnRawXml: shouldReturnRawXml });
let finalOutputText;
let articlesCount = 0;
if (input.detailLevel === "medline_text") {
const medlineText = String(eFetchResponseData);
const foundPmidsInMedline = new Set();
const pmidRegex = /^PMID- (\d+)/gm;
let match;
while ((match = pmidRegex.exec(medlineText)) !== null) {
foundPmidsInMedline.add(match[1]);
}
articlesCount = foundPmidsInMedline.size;
if (input.outputFormat === "raw_text") {
finalOutputText = medlineText;
}
else {
const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInMedline.has(pmid)) || [];
finalOutputText = JSON.stringify({
requestedPmids: input.pmids || "N/A (history query)",
articles: [{ medlineText }],
notFoundPmids,
eFetchDetails: { urls: [eFetchUrl] },
});
}
}
else if (input.detailLevel === "full_xml") {
if (input.outputFormat === "raw_text") {
finalOutputText = String(eFetchResponseData);
articlesCount = (finalOutputText.match(/<PubmedArticle>/g) || []).length;
}
else {
const articlesXml = ensureArray(eFetchResponseData?.PubmedArticleSet?.PubmedArticle || []);
articlesCount = articlesXml.length;
const foundPmidsInXml = new Set();
const articlesPayload = articlesXml.map((articleXml) => {
const pmid = extractPmid(articleXml.MedlineCitation) || "unknown_pmid";
if (pmid !== "unknown_pmid")
foundPmidsInXml.add(pmid);
return { pmid, fullXmlContent: articleXml };
});
const notFoundPmids = input.pmids?.filter((pmid) => !foundPmidsInXml.has(pmid)) || [];
finalOutputText = JSON.stringify({
requestedPmids: input.pmids || "N/A (history query)",
articles: articlesPayload,
notFoundPmids,
eFetchDetails: { urls: [eFetchUrl] },
});
}
}
else {
const parsedArticles = parsePubMedArticleSet(eFetchResponseData, input, toolLogicContext);
articlesCount = parsedArticles.length;
const foundPmids = new Set(parsedArticles.map((p) => p.pmid));
const notFoundPmids = input.pmids?.filter((pmid) => !foundPmids.has(pmid)) || [];
let articlesToReturn = parsedArticles;
if (input.detailLevel === "citation_data") {
articlesToReturn = parsedArticles.map((article) => ({
pmid: article.pmid,
title: article.title,
authors: article.authors?.map((a) => ({
lastName: a.lastName,
initials: a.initials,
})),
journalInfo: {
title: article.journalInfo?.title,
isoAbbreviation: article.journalInfo?.isoAbbreviation,
volume: article.journalInfo?.volume,
issue: article.journalInfo?.issue,
pages: article.journalInfo?.pages,
year: article.journalInfo?.publicationDate?.year,
},
doi: article.doi,
...(input.includeMeshTerms && { meshTerms: article.meshTerms }),
}));
}
finalOutputText = JSON.stringify({
requestedPmids: input.pmids || "N/A (history query)",
articles: articlesToReturn,
notFoundPmids,
eFetchDetails: { urls: [eFetchUrl] },
});
}
logger.notice("Successfully executed fetch_pubmed_content tool.", {
...toolLogicContext,
articlesReturned: articlesCount,
});
return {
content: finalOutputText,
articlesReturned: articlesCount,
eFetchUrl,
};
}