@cyanheads/pubmed-mcp-server
Version:
A Model Context Protocol (MCP) server enabling AI agents to intelligently search, retrieve, and analyze biomedical literature from PubMed via NCBI E-utilities. Built on the mcp-ts-template for robust, production-ready performance.
296 lines (295 loc) • 13 kB
JavaScript
/**
* @fileoverview Helper functions for parsing ESummary results from NCBI.
* Handles different ESummary XML structures and formats the data into
* consistent ParsedBriefSummary objects.
* @module src/utils/parsing/ncbi-parsing/eSummaryResultParser
*/
import { dateParser, logger, requestContextService, } from "../../../utils/index.js"; // Note: utils/index.js is the barrel file
import { ensureArray, getAttribute, getText } from "./xmlGenericHelpers.js";
/**
* Formats an array of ESummary authors into a string.
* Limits to the first 3 authors and adds "et al." if more exist.
* @param authors - Array of ESummary author objects (normalized).
* @returns A string like "Doe J, Smith A, Brown B, et al." or empty if no authors.
*/
export function formatESummaryAuthors(authors) {
if (!authors || authors.length === 0)
return "";
return (authors
.slice(0, 3)
.map((author) => author.name) // Assumes author.name is the string representation
.join(", ") + (authors.length > 3 ? ", et al." : ""));
}
/**
* Standardizes date strings from ESummary to "YYYY-MM-DD" format.
* Uses the dateParser utility.
* @param dateStr - Date string from ESummary (e.g., "2023/01/15", "2023 Jan 15", "2023").
* @param parentContext - Optional parent request context for logging.
* @returns A promise resolving to a standardized date string ("YYYY-MM-DD") or undefined if parsing fails.
*/
export async function standardizeESummaryDate(dateStr, parentContext) {
if (dateStr === undefined || dateStr === null)
return undefined; // Check for null as well
const dateInputString = String(dateStr); // Ensure it's a string
const currentContext = parentContext ||
requestContextService.createRequestContext({
operation: "standardizeESummaryDateInternal",
inputDate: dateInputString, // Log the stringified version
});
try {
// Pass the stringified version to the date parser
const parsedDate = await dateParser.parseDate(dateInputString, currentContext);
if (parsedDate) {
return parsedDate.toISOString().split("T")[0]; // Format as YYYY-MM-DD
}
logger.debug(`standardizeESummaryDate: dateParser could not parse "${dateInputString}", returning undefined.`, currentContext);
}
catch (e) {
logger.warning(`standardizeESummaryDate: Error during dateParser.parseDate for "${dateInputString}", returning undefined.`, {
...currentContext,
error: e instanceof Error ? e.message : String(e),
});
}
return undefined; // Return undefined if parsing fails
}
/**
* Parses authors from an ESummary DocumentSummary structure.
* Handles various ways authors might be represented.
* Returns an array of normalized XmlESummaryAuthor objects.
* Internal helper function.
*/
function parseESummaryAuthorsFromDocumentSummary(docSummary) {
const authorsProp = docSummary.Authors;
if (!authorsProp)
return [];
const parsedAuthors = [];
const processRawAuthor = (rawAuthInput) => {
let name = "";
let authtype;
let clusterid;
if (typeof rawAuthInput === "string") {
name = rawAuthInput;
}
else if (rawAuthInput && typeof rawAuthInput === "object") {
const authorObj = rawAuthInput; // Now typed
// Try extracting text from the object itself (e.g., if it's { '#text': 'Author Name' })
name = getText(authorObj, "");
// If name is still empty, try common property names for author names
if (!name) {
name = getText(authorObj.Name || authorObj.name, "");
}
authtype = getText(authorObj.AuthType || authorObj.authtype, undefined);
clusterid = getText(authorObj.ClusterId || authorObj.clusterid, undefined);
// Fallback for unhandled structures: log and try to stringify
if (!name) {
const authInputString = JSON.stringify(authorObj);
logger.warning(`Unhandled author structure in parseESummaryAuthorsFromDocumentSummary. authInput: ${authInputString.substring(0, 100)}`, requestContextService.createRequestContext({
operation: "parseESummaryAuthorsFromDocumentSummary",
detail: "Unhandled author structure",
}));
// As a last resort, if it's a simple object with a single value, that might be the name
const keys = Object.keys(authorObj);
if (keys.length === 1 &&
typeof authorObj[keys[0]] === "string") {
name = authorObj[keys[0]];
}
else if (authInputString.length < 100) {
// Avoid overly long stringified objects
name = authInputString; // Not ideal, but better than empty for debugging
}
}
}
if (name.trim()) {
parsedAuthors.push({
name: name.trim(),
authtype,
clusterid,
});
}
};
if (Array.isArray(authorsProp)) {
// authorsProp could be Array<string> or Array<XmlESummaryAuthorRaw>
authorsProp.forEach(processRawAuthor);
}
else if (typeof authorsProp === "object" &&
"Author" in authorsProp && // authorsProp is { Author: ... }
authorsProp.Author) {
const rawAuthors = ensureArray(authorsProp.Author);
rawAuthors.forEach(processRawAuthor);
}
else if (typeof authorsProp === "string") {
try {
// Attempt to parse if it looks like a JSON array string
if (authorsProp.startsWith("[") && authorsProp.endsWith("]")) {
const parsedJsonAuthors = JSON.parse(authorsProp);
if (Array.isArray(parsedJsonAuthors)) {
parsedJsonAuthors.forEach((authItem) => {
if (typeof authItem === "string") {
parsedAuthors.push({ name: authItem.trim() });
}
else if (typeof authItem === "object" &&
authItem !== null &&
(authItem.name || authItem.Name)) {
// If it's an object with a name property, treat as XmlESummaryAuthorRaw
processRawAuthor(authItem);
}
});
if (parsedAuthors.length > 0)
return parsedAuthors; // Return if JSON parsing yielded results
}
}
}
catch (e) {
logger.debug(`Failed to parse Authors string as JSON: ${authorsProp.substring(0, 100)}`, requestContextService.createRequestContext({
operation: "parseESummaryAuthorsFromString",
input: authorsProp.substring(0, 100),
error: e instanceof Error ? e.message : String(e),
}));
}
// Fallback: split string by common delimiters
authorsProp
.split(/[,;]/)
.map((namePart) => namePart.trim())
.filter((namePart) => namePart)
.forEach((namePart) => parsedAuthors.push({ name: namePart }));
}
return parsedAuthors.filter((author) => author.name);
}
/**
* Parses a single ESummary DocumentSummary (newer XML format) into a raw summary object.
* Internal helper function.
*/
function parseSingleDocumentSummary(docSummary) {
const pmid = docSummary["@_uid"];
const authorsArray = parseESummaryAuthorsFromDocumentSummary(docSummary);
let doiValue = getText(docSummary.DOI, undefined);
if (!doiValue) {
const articleIdsProp = docSummary.ArticleIds;
if (articleIdsProp) {
const idsArray = Array.isArray(articleIdsProp)
? articleIdsProp
: ensureArray(articleIdsProp.ArticleId);
const doiEntry = idsArray.find((id) => id.idtype === "doi");
if (doiEntry) {
doiValue = getText(doiEntry.value, undefined);
}
}
}
return {
pmid: String(pmid),
title: getText(docSummary.Title, undefined),
authors: formatESummaryAuthors(authorsArray),
source: getText(docSummary.Source, undefined) ||
getText(docSummary.FullJournalName, undefined) ||
getText(docSummary.SO, undefined) ||
undefined,
doi: doiValue,
rawPubDate: getText(docSummary.PubDate, undefined),
rawEPubDate: getText(docSummary.EPubDate, undefined),
};
}
/**
* Parses a single ESummary DocSum (older XML item-based format) into a raw summary object.
* Internal helper function.
*/
function parseSingleDocSumOldXml(docSum) {
const pmid = docSum.Id;
const items = ensureArray(docSum.Item);
const getItemValue = (name, type) => {
const namesToTry = ensureArray(name);
for (const n of namesToTry) {
const item = items.find((i) => i._Name === n &&
(type ? i._Type === type : true) &&
i._Type !== "ERROR");
if (item) {
const textVal = getText(item);
if (textVal !== undefined)
return String(textVal);
}
}
return undefined;
};
const getAuthorList = () => {
const authorListItem = items.find((i) => i._Name === "AuthorList" && i._Type === "List");
if (authorListItem && authorListItem.Item) {
return ensureArray(authorListItem.Item)
.filter((a) => a._Name === "Author" && a._Type === "String")
.map((a) => ({ name: getText(a, "") }));
}
// Fallback for authors directly under DocSum items
return items
.filter((i) => i._Name === "Author" && i._Type === "String")
.map((a) => ({ name: getText(a, "") }));
};
const authorsArray = getAuthorList();
let doiFromItems = getItemValue("DOI", "String");
if (!doiFromItems) {
const articleIdsItem = items.find((i) => i._Name === "ArticleIds" && i._Type === "List");
if (articleIdsItem && articleIdsItem.Item) {
const ids = ensureArray(articleIdsItem.Item);
const doiIdItem = ids.find((id) => getAttribute(id, "idtype") === "doi" ||
id._Name === "doi");
if (doiIdItem) {
doiFromItems = getText(doiIdItem);
}
}
}
return {
pmid: String(pmid),
title: getItemValue("Title", "String"),
authors: formatESummaryAuthors(authorsArray),
source: getItemValue(["Source", "FullJournalName", "SO"], "String"),
doi: doiFromItems,
rawPubDate: getItemValue(["PubDate", "ArticleDate"], "Date"),
rawEPubDate: getItemValue("EPubDate", "Date"),
};
}
/**
* Extracts and formats brief summaries from ESummary XML result.
* Handles both DocumentSummarySet (newer) and older DocSum structures.
* Asynchronously standardizes dates.
* @param eSummaryResult - The parsed XML object from ESummary (eSummaryResult part).
* @param context - Request context for logging and passing to date standardization.
* @returns A promise resolving to an array of parsed brief summary objects.
*/
export async function extractBriefSummaries(eSummaryResult, context) {
if (!eSummaryResult)
return [];
const opContext = context ||
requestContextService.createRequestContext({
operation: "extractBriefSummariesInternal",
});
if (eSummaryResult.ERROR) {
logger.warning("ESummary result contains an error", {
...opContext,
errorDetails: eSummaryResult.ERROR,
});
return [];
}
let rawSummaries = [];
if (eSummaryResult.DocumentSummarySet?.DocumentSummary) {
const docSummaries = ensureArray(eSummaryResult.DocumentSummarySet.DocumentSummary);
rawSummaries = docSummaries
.map(parseSingleDocumentSummary)
.filter((s) => s.pmid);
}
else if (eSummaryResult.DocSum) {
const docSums = ensureArray(eSummaryResult.DocSum);
rawSummaries = docSums.map(parseSingleDocSumOldXml).filter((s) => s.pmid);
}
const processedSummaries = [];
for (const rawSummary of rawSummaries) {
const pubDate = await standardizeESummaryDate(rawSummary.rawPubDate, opContext);
const epubDate = await standardizeESummaryDate(rawSummary.rawEPubDate, opContext);
processedSummaries.push({
pmid: rawSummary.pmid,
title: rawSummary.title,
authors: rawSummary.authors,
source: rawSummary.source,
doi: rawSummary.doi,
pubDate,
epubDate,
});
}
return processedSummaries;
}