UNPKG

@cyanheads/pubmed-mcp-server

Version:

A Model Context Protocol (MCP) server enabling AI agents to intelligently search, retrieve, and analyze biomedical literature from PubMed via NCBI E-utilities. Built on the mcp-ts-template for robust, production-ready performance.

296 lines (295 loc) 13 kB
/** * @fileoverview Helper functions for parsing ESummary results from NCBI. * Handles different ESummary XML structures and formats the data into * consistent ParsedBriefSummary objects. * @module src/utils/parsing/ncbi-parsing/eSummaryResultParser */ import { dateParser, logger, requestContextService, } from "../../../utils/index.js"; // Note: utils/index.js is the barrel file import { ensureArray, getAttribute, getText } from "./xmlGenericHelpers.js"; /** * Formats an array of ESummary authors into a string. * Limits to the first 3 authors and adds "et al." if more exist. * @param authors - Array of ESummary author objects (normalized). * @returns A string like "Doe J, Smith A, Brown B, et al." or empty if no authors. */ export function formatESummaryAuthors(authors) { if (!authors || authors.length === 0) return ""; return (authors .slice(0, 3) .map((author) => author.name) // Assumes author.name is the string representation .join(", ") + (authors.length > 3 ? ", et al." : "")); } /** * Standardizes date strings from ESummary to "YYYY-MM-DD" format. * Uses the dateParser utility. * @param dateStr - Date string from ESummary (e.g., "2023/01/15", "2023 Jan 15", "2023"). * @param parentContext - Optional parent request context for logging. * @returns A promise resolving to a standardized date string ("YYYY-MM-DD") or undefined if parsing fails. */ export async function standardizeESummaryDate(dateStr, parentContext) { if (dateStr === undefined || dateStr === null) return undefined; // Check for null as well const dateInputString = String(dateStr); // Ensure it's a string const currentContext = parentContext || requestContextService.createRequestContext({ operation: "standardizeESummaryDateInternal", inputDate: dateInputString, // Log the stringified version }); try { // Pass the stringified version to the date parser const parsedDate = await dateParser.parseDate(dateInputString, currentContext); if (parsedDate) { return parsedDate.toISOString().split("T")[0]; // Format as YYYY-MM-DD } logger.debug(`standardizeESummaryDate: dateParser could not parse "${dateInputString}", returning undefined.`, currentContext); } catch (e) { logger.warning(`standardizeESummaryDate: Error during dateParser.parseDate for "${dateInputString}", returning undefined.`, { ...currentContext, error: e instanceof Error ? e.message : String(e), }); } return undefined; // Return undefined if parsing fails } /** * Parses authors from an ESummary DocumentSummary structure. * Handles various ways authors might be represented. * Returns an array of normalized XmlESummaryAuthor objects. * Internal helper function. */ function parseESummaryAuthorsFromDocumentSummary(docSummary) { const authorsProp = docSummary.Authors; if (!authorsProp) return []; const parsedAuthors = []; const processRawAuthor = (rawAuthInput) => { let name = ""; let authtype; let clusterid; if (typeof rawAuthInput === "string") { name = rawAuthInput; } else if (rawAuthInput && typeof rawAuthInput === "object") { const authorObj = rawAuthInput; // Now typed // Try extracting text from the object itself (e.g., if it's { '#text': 'Author Name' }) name = getText(authorObj, ""); // If name is still empty, try common property names for author names if (!name) { name = getText(authorObj.Name || authorObj.name, ""); } authtype = getText(authorObj.AuthType || authorObj.authtype, undefined); clusterid = getText(authorObj.ClusterId || authorObj.clusterid, undefined); // Fallback for unhandled structures: log and try to stringify if (!name) { const authInputString = JSON.stringify(authorObj); logger.warning(`Unhandled author structure in parseESummaryAuthorsFromDocumentSummary. authInput: ${authInputString.substring(0, 100)}`, requestContextService.createRequestContext({ operation: "parseESummaryAuthorsFromDocumentSummary", detail: "Unhandled author structure", })); // As a last resort, if it's a simple object with a single value, that might be the name const keys = Object.keys(authorObj); if (keys.length === 1 && typeof authorObj[keys[0]] === "string") { name = authorObj[keys[0]]; } else if (authInputString.length < 100) { // Avoid overly long stringified objects name = authInputString; // Not ideal, but better than empty for debugging } } } if (name.trim()) { parsedAuthors.push({ name: name.trim(), authtype, clusterid, }); } }; if (Array.isArray(authorsProp)) { // authorsProp could be Array<string> or Array<XmlESummaryAuthorRaw> authorsProp.forEach(processRawAuthor); } else if (typeof authorsProp === "object" && "Author" in authorsProp && // authorsProp is { Author: ... } authorsProp.Author) { const rawAuthors = ensureArray(authorsProp.Author); rawAuthors.forEach(processRawAuthor); } else if (typeof authorsProp === "string") { try { // Attempt to parse if it looks like a JSON array string if (authorsProp.startsWith("[") && authorsProp.endsWith("]")) { const parsedJsonAuthors = JSON.parse(authorsProp); if (Array.isArray(parsedJsonAuthors)) { parsedJsonAuthors.forEach((authItem) => { if (typeof authItem === "string") { parsedAuthors.push({ name: authItem.trim() }); } else if (typeof authItem === "object" && authItem !== null && (authItem.name || authItem.Name)) { // If it's an object with a name property, treat as XmlESummaryAuthorRaw processRawAuthor(authItem); } }); if (parsedAuthors.length > 0) return parsedAuthors; // Return if JSON parsing yielded results } } } catch (e) { logger.debug(`Failed to parse Authors string as JSON: ${authorsProp.substring(0, 100)}`, requestContextService.createRequestContext({ operation: "parseESummaryAuthorsFromString", input: authorsProp.substring(0, 100), error: e instanceof Error ? e.message : String(e), })); } // Fallback: split string by common delimiters authorsProp .split(/[,;]/) .map((namePart) => namePart.trim()) .filter((namePart) => namePart) .forEach((namePart) => parsedAuthors.push({ name: namePart })); } return parsedAuthors.filter((author) => author.name); } /** * Parses a single ESummary DocumentSummary (newer XML format) into a raw summary object. * Internal helper function. */ function parseSingleDocumentSummary(docSummary) { const pmid = docSummary["@_uid"]; const authorsArray = parseESummaryAuthorsFromDocumentSummary(docSummary); let doiValue = getText(docSummary.DOI, undefined); if (!doiValue) { const articleIdsProp = docSummary.ArticleIds; if (articleIdsProp) { const idsArray = Array.isArray(articleIdsProp) ? articleIdsProp : ensureArray(articleIdsProp.ArticleId); const doiEntry = idsArray.find((id) => id.idtype === "doi"); if (doiEntry) { doiValue = getText(doiEntry.value, undefined); } } } return { pmid: String(pmid), title: getText(docSummary.Title, undefined), authors: formatESummaryAuthors(authorsArray), source: getText(docSummary.Source, undefined) || getText(docSummary.FullJournalName, undefined) || getText(docSummary.SO, undefined) || undefined, doi: doiValue, rawPubDate: getText(docSummary.PubDate, undefined), rawEPubDate: getText(docSummary.EPubDate, undefined), }; } /** * Parses a single ESummary DocSum (older XML item-based format) into a raw summary object. * Internal helper function. */ function parseSingleDocSumOldXml(docSum) { const pmid = docSum.Id; const items = ensureArray(docSum.Item); const getItemValue = (name, type) => { const namesToTry = ensureArray(name); for (const n of namesToTry) { const item = items.find((i) => i._Name === n && (type ? i._Type === type : true) && i._Type !== "ERROR"); if (item) { const textVal = getText(item); if (textVal !== undefined) return String(textVal); } } return undefined; }; const getAuthorList = () => { const authorListItem = items.find((i) => i._Name === "AuthorList" && i._Type === "List"); if (authorListItem && authorListItem.Item) { return ensureArray(authorListItem.Item) .filter((a) => a._Name === "Author" && a._Type === "String") .map((a) => ({ name: getText(a, "") })); } // Fallback for authors directly under DocSum items return items .filter((i) => i._Name === "Author" && i._Type === "String") .map((a) => ({ name: getText(a, "") })); }; const authorsArray = getAuthorList(); let doiFromItems = getItemValue("DOI", "String"); if (!doiFromItems) { const articleIdsItem = items.find((i) => i._Name === "ArticleIds" && i._Type === "List"); if (articleIdsItem && articleIdsItem.Item) { const ids = ensureArray(articleIdsItem.Item); const doiIdItem = ids.find((id) => getAttribute(id, "idtype") === "doi" || id._Name === "doi"); if (doiIdItem) { doiFromItems = getText(doiIdItem); } } } return { pmid: String(pmid), title: getItemValue("Title", "String"), authors: formatESummaryAuthors(authorsArray), source: getItemValue(["Source", "FullJournalName", "SO"], "String"), doi: doiFromItems, rawPubDate: getItemValue(["PubDate", "ArticleDate"], "Date"), rawEPubDate: getItemValue("EPubDate", "Date"), }; } /** * Extracts and formats brief summaries from ESummary XML result. * Handles both DocumentSummarySet (newer) and older DocSum structures. * Asynchronously standardizes dates. * @param eSummaryResult - The parsed XML object from ESummary (eSummaryResult part). * @param context - Request context for logging and passing to date standardization. * @returns A promise resolving to an array of parsed brief summary objects. */ export async function extractBriefSummaries(eSummaryResult, context) { if (!eSummaryResult) return []; const opContext = context || requestContextService.createRequestContext({ operation: "extractBriefSummariesInternal", }); if (eSummaryResult.ERROR) { logger.warning("ESummary result contains an error", { ...opContext, errorDetails: eSummaryResult.ERROR, }); return []; } let rawSummaries = []; if (eSummaryResult.DocumentSummarySet?.DocumentSummary) { const docSummaries = ensureArray(eSummaryResult.DocumentSummarySet.DocumentSummary); rawSummaries = docSummaries .map(parseSingleDocumentSummary) .filter((s) => s.pmid); } else if (eSummaryResult.DocSum) { const docSums = ensureArray(eSummaryResult.DocSum); rawSummaries = docSums.map(parseSingleDocSumOldXml).filter((s) => s.pmid); } const processedSummaries = []; for (const rawSummary of rawSummaries) { const pubDate = await standardizeESummaryDate(rawSummary.rawPubDate, opContext); const epubDate = await standardizeESummaryDate(rawSummary.rawEPubDate, opContext); processedSummaries.push({ pmid: rawSummary.pmid, title: rawSummary.title, authors: rawSummary.authors, source: rawSummary.source, doi: rawSummary.doi, pubDate, epubDate, }); } return processedSummaries; }