@cyanheads/pubmed-mcp-server
Version:
Search PubMed/Europe PMC, fetch articles and full text (PMC/EPMC/Unpaywall), citations, MeSH terms via MCP. STDIO or Streamable HTTP.
500 lines • 16.8 kB
JavaScript
/**
* @fileoverview Hand-rolled citation formatters for PubMed articles.
* Supports APA 7th, MLA 9th, BibTeX, and RIS formats.
* Pure TypeScript, zero dependencies, Workers-compatible.
* @module src/services/ncbi/formatting/citation-formatter
*/
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/**
* Extract the publication year from a ParsedArticle.
* Prefers `journalInfo.publicationDate.year`; falls back to the earliest
* `articleDates` entry (typically the electronic pub date) before giving up.
* Returns 'n.d.' (no date) when no year is available.
*/
function getYear(article) {
const journalYear = article.journalInfo?.publicationDate?.year;
if (journalYear)
return journalYear;
const articleYear = article.articleDates?.find((d) => d.year)?.year;
return articleYear ?? 'n.d.';
}
/**
* Split a pages string like "45-67" into start and end components.
* Handles en-dashes, em-dashes, and hyphens. Expands PubMed's truncated-end
* convention (e.g., "737-8" → { start: "737", end: "738" }, "1639-41" →
* "1639"/"1641") so downstream RIS/BibTeX consumers see absolute page numbers.
*/
function splitPages(pages) {
if (!pages)
return {};
const parts = pages.split(/[-\u2013\u2014]/).map((p) => p.trim());
let [start, end] = parts;
if (start && end && end.length < start.length) {
end = start.slice(0, start.length - end.length) + end;
}
if (start && end)
return { start, end };
return start ? { start } : {};
}
/**
* Collapse internal whitespace (including embedded newlines from structured
* abstracts) to single spaces. Strict RIS parsers treat blank lines as record
* terminators, so abstract text must be flattened before emission.
*/
function collapseWhitespace(text) {
return text.replace(/\s+/g, ' ').trim();
}
/** PubMed `PublicationType` → BibTeX entry type. Defaults to `article`. */
const BIBTEX_ENTRY_TYPE = {
Book: 'book',
'Book Chapter': 'inbook',
Preprint: 'misc',
};
/** PubMed `PublicationType` → RIS reference type. Defaults to `JOUR`. */
const RIS_REFERENCE_TYPE = {
Book: 'BOOK',
'Book Chapter': 'CHAP',
Preprint: 'GEN',
};
function firstMappedType(types, map, fallback) {
if (!types?.length)
return fallback;
for (const t of types) {
const mapped = map[t];
if (mapped)
return mapped;
}
return fallback;
}
/**
* Escape characters that are special in LaTeX/BibTeX values.
* Handles: & % $ # _ { } ~ ^
*/
function escapeBibtex(text) {
return text.replace(/[\\&%$#_{}~^]/g, (ch) => {
switch (ch) {
case '\\':
return '\\textbackslash{}';
case '~':
return '\\textasciitilde{}';
case '^':
return '\\textasciicircum{}';
default:
return `\\${ch}`;
}
});
}
// ---------------------------------------------------------------------------
// Author formatters
// ---------------------------------------------------------------------------
/**
* Format a single author in APA style: `Last, F. M.`
* Collective/group authors return the group name directly.
*/
function formatAuthorApa(author) {
if (author.collectiveName)
return author.collectiveName;
const last = author.lastName ?? '';
// Prefer initials (already condensed), fall back to deriving from firstName
const initials = author.initials ??
author.firstName
?.split(/[\s-]+/)
.filter(Boolean)
.map((part) => `${part[0]}.`)
.join(' ');
if (!initials)
return last;
// Extract only letter characters (Unicode-aware: preserve Á, Ö, É, etc.),
// format each as "X." separated by spaces.
const formatted = Array.from(initials.replace(/[^\p{L}]/gu, ''))
.map((c) => `${c}.`)
.join(' ');
if (!last)
return formatted;
return `${last}, ${formatted}`;
}
/**
* Format the full author list for APA 7th edition.
* - 1 author: `Last, F. M.`
* - 2 authors: `Last, F. M., & Last, F. M.`
* - 3-20 authors: comma-separated, `& ` before last
* - 21+ authors: first 19, `...`, then last author
*/
function formatAuthorsApa(authors) {
const formatted = authors.map(formatAuthorApa);
if (formatted.length === 0)
return '';
if (formatted.length === 1)
return formatted[0] ?? '';
if (formatted.length === 2)
return `${formatted[0]}, & ${formatted[1]}`;
if (formatted.length <= 20) {
const allButLast = formatted.slice(0, -1).join(', ');
return `${allButLast}, & ${formatted.at(-1)}`;
}
// >20 authors: first 19, ellipsis, last
const first19 = formatted.slice(0, 19).join(', ');
return `${first19}, ... ${formatted.at(-1)}`;
}
/**
* Format a single author in MLA style.
* First listed author: `Last, First Middle.`
* Subsequent authors: `First Middle Last`
*/
function formatAuthorMla(author, isFirst) {
if (author.collectiveName)
return author.collectiveName;
const last = author.lastName ?? '';
const first = author.firstName ?? '';
if (!last && !first)
return '';
if (!first)
return last;
if (!last)
return first;
return isFirst ? `${last}, ${first}` : `${first} ${last}`;
}
/**
* Format the full author list for MLA 9th edition.
* - 1 author: `Last, First.`
* - 2 authors: `Last, First, and First Last.`
* - 3+ authors: `Last, First, et al.`
*/
function formatAuthorsMla(authors) {
const first = authors[0];
if (!first)
return '';
if (authors.length === 1)
return formatAuthorMla(first, true);
if (authors.length === 2) {
const second = authors[1];
return second
? `${formatAuthorMla(first, true)}, and ${formatAuthorMla(second, false)}`
: formatAuthorMla(first, true);
}
return `${formatAuthorMla(first, true)}, et al.`;
}
/**
* Format a single author in BibTeX style: `{Last}, {First}`
*/
function formatAuthorBibtex(author) {
if (author.collectiveName)
return `{${escapeBibtex(author.collectiveName)}}`;
const last = author.lastName ? escapeBibtex(author.lastName) : '';
const first = author.firstName ? escapeBibtex(author.firstName) : '';
if (!last && !first)
return '';
if (!first)
return `{${last}}`;
if (!last)
return first;
return `{${last}}, ${first}`;
}
// ---------------------------------------------------------------------------
// APA 7th Edition
// ---------------------------------------------------------------------------
/**
* Format a PubMed article as an APA 7th edition citation.
*
* Pattern:
* ```
* Authors (Year). Title. *Journal*, *Volume*(Issue), Pages. https://doi.org/DOI
* ```
*/
export function formatApa(article) {
const parts = [];
// Authors — ensure trailing period (individual author initials end with '.',
// but collective names do not, which would otherwise produce "Name (Year).")
const authorStr = article.authors?.length ? formatAuthorsApa(article.authors) : '';
if (authorStr) {
parts.push(authorStr.endsWith('.') ? authorStr : `${authorStr}.`);
}
// Year
const year = getYear(article);
parts.push(`(${year}).`);
// Title — use as-is from PubMed (sentence case already assumed)
if (article.title) {
// Strip trailing period from title if present; we add our own
const title = article.title.replace(/\.\s*$/, '');
parts.push(`${title}.`);
}
// Journal, volume, issue, pages
const journal = article.journalInfo;
if (journal?.title) {
let journalPart = `*${journal.title}*`;
if (journal.volume) {
journalPart += `, *${journal.volume}*`;
if (journal.issue) {
journalPart += `(${journal.issue})`;
}
}
if (journal.pages) {
journalPart += `, ${journal.pages}`;
}
journalPart += '.';
parts.push(journalPart);
}
// DOI — no trailing period after DOI URL
if (article.doi) {
parts.push(`https://doi.org/${article.doi}`);
}
return parts.join(' ');
}
// ---------------------------------------------------------------------------
// MLA 9th Edition
// ---------------------------------------------------------------------------
/**
* Format a PubMed article as an MLA 9th edition citation.
*
* Pattern:
* ```
* Last, First, et al. "Title." *Journal*, vol. 12, no. 3, 2024, pp. 45-67. DOI.
* ```
*/
export function formatMla(article) {
const parts = [];
// Authors
const authorStr = article.authors?.length ? formatAuthorsMla(article.authors) : '';
if (authorStr) {
// Ensure author string ends with period
parts.push(authorStr.endsWith('.') ? authorStr : `${authorStr}.`);
}
// Title in quotes
if (article.title) {
const title = article.title.replace(/\.\s*$/, '');
parts.push(`"${title}."`);
}
// Journal and publication details
const journal = article.journalInfo;
if (journal?.title) {
const detailParts = [];
detailParts.push(`*${journal.title}*`);
if (journal.volume) {
detailParts.push(`vol. ${journal.volume}`);
}
if (journal.issue) {
detailParts.push(`no. ${journal.issue}`);
}
const year = getYear(article);
if (year !== 'n.d.') {
detailParts.push(year);
}
if (journal.pages) {
// MLA 9 §6.56: "p." for a single page, "pp." for a range
const isRange = /[-\u2013\u2014]/.test(journal.pages);
detailParts.push(`${isRange ? 'pp.' : 'p.'} ${journal.pages}`);
}
parts.push(`${detailParts.join(', ')}.`);
}
// DOI
if (article.doi) {
parts.push(`https://doi.org/${article.doi}.`);
}
return parts.join(' ');
}
// ---------------------------------------------------------------------------
// BibTeX
// ---------------------------------------------------------------------------
/**
* Format a PubMed article as a BibTeX entry.
*
* ```bibtex
* @article{pmid12345678,
* author = {Last, First and Last, First},
* title = {Article Title},
* journal = {Journal Name},
* year = {2024},
* ...
* }
* ```
*/
export function formatBibtex(article) {
const key = `pmid${article.pmid}`;
const entryType = firstMappedType(article.publicationTypes, BIBTEX_ENTRY_TYPE, 'article');
const fields = [];
// Authors
if (article.authors?.length) {
const authorStr = article.authors.map(formatAuthorBibtex).filter(Boolean).join(' and ');
if (authorStr)
fields.push(['author', authorStr]);
}
// Title — strip trailing period; biblatex styles append their own
if (article.title) {
const title = article.title.replace(/\.\s*$/, '');
fields.push(['title', `{${escapeBibtex(title)}}`]);
}
// Journal
const journal = article.journalInfo;
if (journal?.title) {
fields.push(['journal', escapeBibtex(journal.title)]);
}
// Year
const year = getYear(article);
if (year !== 'n.d.') {
fields.push(['year', year]);
}
// Volume
if (journal?.volume) {
fields.push(['volume', escapeBibtex(journal.volume)]);
}
// Number (issue)
if (journal?.issue) {
fields.push(['number', escapeBibtex(journal.issue)]);
}
// Pages
if (journal?.pages) {
fields.push(['pages', escapeBibtex(journal.pages)]);
}
// ISSN
const issn = journal?.issn ?? journal?.eIssn;
if (issn) {
fields.push(['issn', escapeBibtex(issn)]);
}
// DOI
if (article.doi) {
fields.push(['doi', article.doi]);
}
// PMID
fields.push(['pmid', article.pmid]);
// PMCID
if (article.pmcId) {
fields.push(['pmcid', article.pmcId]);
}
// Keywords — merge article keywords with MeSH descriptor names
const keywordSet = new Set();
for (const k of article.keywords ?? [])
keywordSet.add(k);
for (const m of article.meshTerms ?? []) {
if (m.descriptorName)
keywordSet.add(m.descriptorName);
}
if (keywordSet.size > 0) {
fields.push(['keywords', escapeBibtex([...keywordSet].join(', '))]);
}
// Build entry
const maxKeyLen = Math.max(...fields.map(([k]) => k.length));
const fieldLines = fields.map(([k, v]) => ` ${k.padEnd(maxKeyLen)} = {${v}}`).join(',\n');
return `@${entryType}{${key},\n${fieldLines}\n}`;
}
// ---------------------------------------------------------------------------
// RIS
// ---------------------------------------------------------------------------
/**
* Format a PubMed article as a RIS record.
*
* Each tag is 2 characters, followed by two spaces, a dash, two spaces, then the value.
* Record ends with `ER - ` (trailing spaces per spec).
*/
export function formatRis(article) {
const lines = [];
const tag = (code, value) => {
if (value)
lines.push(`${code} - ${value}`);
};
// Type of reference — map from PubMed publication types
const refType = firstMappedType(article.publicationTypes, RIS_REFERENCE_TYPE, 'JOUR');
lines.push(`TY - ${refType}`);
// Authors — one AU tag per author
if (article.authors?.length) {
for (const author of article.authors) {
if (author.collectiveName) {
tag('AU', author.collectiveName);
}
else {
const last = author.lastName ?? '';
const first = author.firstName ?? '';
if (last || first) {
tag('AU', first ? `${last}, ${first}` : last);
}
}
}
}
// Title
tag('TI', article.title);
// Journal
const journal = article.journalInfo;
if (journal?.title) {
tag('JF', journal.title);
}
if (journal?.isoAbbreviation) {
tag('JO', journal.isoAbbreviation);
}
// Year
const year = getYear(article);
if (year !== 'n.d.') {
tag('PY', year);
}
// Volume & Issue
tag('VL', journal?.volume);
tag('IS', journal?.issue);
// Pages — split into start/end, expanding PubMed's truncated-end convention
if (journal?.pages) {
const { start, end } = splitPages(journal.pages);
tag('SP', start);
tag('EP', end);
}
// ISSN — prefer print ISSN, fall back to electronic
tag('SN', journal?.issn ?? journal?.eIssn);
// DOI (without URL prefix — RIS DO tag holds the bare DOI)
tag('DO', article.doi);
// Accession number (PMID)
tag('AN', article.pmid);
// PubMed URL
lines.push(`UR - https://pubmed.ncbi.nlm.nih.gov/${article.pmid}/`);
// PMC URL (when available)
if (article.pmcId) {
lines.push(`UR - https://pmc.ncbi.nlm.nih.gov/articles/${article.pmcId}/`);
}
// Keywords — merge article keywords with MeSH descriptor names
const keywordSet = new Set();
for (const k of article.keywords ?? [])
keywordSet.add(k);
for (const m of article.meshTerms ?? []) {
if (m.descriptorName)
keywordSet.add(m.descriptorName);
}
for (const kw of keywordSet) {
tag('KW', kw);
}
// Abstract — collapse internal whitespace so blank lines don't break strict
// RIS parsers that terminate records at blank lines
if (article.abstractText) {
tag('AB', collapseWhitespace(article.abstractText));
}
// End of record (trailing space per RIS spec)
lines.push('ER - ');
return lines.join('\n');
}
// ---------------------------------------------------------------------------
// Dispatchers
// ---------------------------------------------------------------------------
/**
* Format a single article in the requested citation style.
* Throws on unsupported style.
*/
export function formatCitation(article, style) {
switch (style) {
case 'apa':
return formatApa(article);
case 'mla':
return formatMla(article);
case 'bibtex':
return formatBibtex(article);
case 'ris':
return formatRis(article);
}
}
/**
* Format a single article in multiple citation styles.
* Returns a record keyed by style name.
*/
export function formatCitations(article, styles) {
const result = {};
for (const style of styles) {
result[style] = formatCitation(article, style);
}
return result;
}
//# sourceMappingURL=citation-formatter.js.map