@cyanheads/pubmed-mcp-server
Version:
Search PubMed/Europe PMC, fetch articles and full text (PMC/EPMC/Unpaywall), citations, MeSH terms via MCP. STDIO or Streamable HTTP.
358 lines • 16.3 kB
JavaScript
/**
* @fileoverview High-level service for interacting with NCBI E-utilities.
* Orchestrates the API client, request queue, and response handler to provide
* typed methods for each E-utility endpoint. Uses init/accessor pattern.
* @module src/services/ncbi/ncbi-service
*/
import { internalError, JsonRpcErrorCode, McpError, serializationError, timeout, validationError, } from '@cyanheads/mcp-ts-core/errors';
import { logger, requestContextService } from '@cyanheads/mcp-ts-core/utils';
import { getServerConfig } from '../../config/server-config.js';
import { recoveryFor } from '../../services/error-contracts.js';
import { NcbiApiClient } from './api-client.js';
import { NcbiRequestQueue } from './request-queue.js';
import { NcbiResponseHandler } from './response-handler.js';
import { NCBI_PMC_IDCONV_URL, } from './types.js';
/**
* Per-idType expected-format hints surfaced when PMC ID Converter rejects a
* batch with HTTP 400. Informational only — NCBI's API is the authority on
* what's accepted, so a stale hint here only weakens the error message, never
* blocks a valid request.
*/
const ID_CONVERT_FORMAT_HINTS = {
pmid: 'numeric digits, e.g. "23193287"',
pmcid: '"PMC" + digits, e.g. "PMC3531190"',
doi: 'starts with "10.", e.g. "10.1093/nar/gks1195"',
};
/** Sentinel reason used when the service-level deadline expires. */
class NcbiDeadlineExceeded extends Error {
constructor(deadlineMs) {
super(`NCBI request deadline (${deadlineMs}ms) exceeded`);
this.name = 'NcbiDeadlineExceeded';
}
}
/**
* Sleep that resolves after `ms`, or rejects immediately if `signal` aborts.
* Cleans up both timer and listener when either side wins.
*/
function abortableSleep(ms, signal) {
if (!signal)
return new Promise((r) => setTimeout(r, ms));
if (signal.aborted)
return Promise.reject(signal.reason);
return new Promise((resolve, reject) => {
const onAbort = () => {
clearTimeout(timer);
reject(signal.reason);
};
const timer = setTimeout(() => {
signal.removeEventListener('abort', onAbort);
resolve();
}, ms);
signal.addEventListener('abort', onAbort, { once: true });
});
}
/**
* Facade over NCBI's E-utility suite. Each public method corresponds to a
* single E-utility endpoint.
*/
export class NcbiService {
apiClient;
queue;
responseHandler;
maxRetries;
totalDeadlineMs;
constructor(apiClient, queue, responseHandler, maxRetries, totalDeadlineMs) {
this.apiClient = apiClient;
this.queue = queue;
this.responseHandler = responseHandler;
this.maxRetries = maxRetries;
this.totalDeadlineMs = totalDeadlineMs;
}
async eSearch(params, options) {
const response = await this.performRequest('esearch', params, {
retmode: 'xml',
...(options?.signal && { signal: options.signal }),
});
const esResult = response.eSearchResult;
return {
count: parseInt(esResult.Count, 10) || 0,
retmax: parseInt(esResult.RetMax, 10) || 0,
retstart: parseInt(esResult.RetStart, 10) || 0,
...(esResult.QueryKey !== undefined && { queryKey: esResult.QueryKey }),
...(esResult.WebEnv !== undefined && { webEnv: esResult.WebEnv }),
idList: (esResult.IdList?.Id ?? []).map(String),
queryTranslation: esResult.QueryTranslation,
...(esResult.ErrorList !== undefined && { errorList: esResult.ErrorList }),
...(esResult.WarningList !== undefined && { warningList: esResult.WarningList }),
};
}
async eSummary(params, options) {
const retmode = params.version === '2.0' && params.retmode === 'json' ? 'json' : 'xml';
const response = await this.performRequest('esummary', params, {
retmode,
...(options?.signal && { signal: options.signal }),
});
return response.eSummaryResult;
}
eFetch(params, options = { retmode: 'xml' }) {
const usePost = options.usePost || (typeof params.id === 'string' && params.id.split(',').length > 200);
return this.performRequest('efetch', params, { ...options, usePost });
}
eLink(params, options) {
return this.performRequest('elink', params, {
retmode: 'xml',
...(options?.signal && { signal: options.signal }),
});
}
async eSpell(params, options) {
const response = await this.performRequest('espell', params, {
retmode: 'xml',
...(options?.signal && { signal: options.signal }),
});
const spellResult = response.eSpellResult;
const original = spellResult.Query ?? params.term ?? '';
const corrected = spellResult.CorrectedQuery ?? '';
logger.debug('ESpell result parsed.', requestContextService.createRequestContext({
operation: 'NcbiESpell',
original,
corrected,
hasSuggestion: corrected.length > 0 && corrected !== original,
}));
return {
original,
corrected: corrected || original,
hasSuggestion: corrected.length > 0 && corrected !== original,
};
}
eInfo(params, options) {
return this.performRequest('einfo', params, {
retmode: 'xml',
...(options?.signal && { signal: options.signal }),
});
}
/**
* Look up PMIDs from partial citation strings via NCBI ECitMatch.
* Each citation can include journal, year, volume, first page, and author name.
*/
async eCitMatch(citations, options) {
const bdata = citations
.map((c) => `${c.journal ?? ''}|${c.year ?? ''}|${c.volume ?? ''}|${c.firstPage ?? ''}|${c.authorName ?? ''}|${c.key}|`)
.join('\r');
const text = await this.performRequest('ecitmatch.cgi', { db: 'pubmed', retmode: 'xml', bdata }, { retmode: 'text', ...(options?.signal && { signal: options.signal }) });
return text
.split(/[\r\n]+/)
.filter((line) => line.trim().length > 0)
.map((line) => {
const parts = line.split('|');
const key = parts[5]?.trim() ?? '';
const rawOutcome = parts[6]?.trim() ?? '';
if (/^\d+$/.test(rawOutcome)) {
return { key, matched: true, pmid: rawOutcome, status: 'matched' };
}
if (rawOutcome.startsWith('AMBIGUOUS')) {
const csv = /^AMBIGUOUS\s+([\d,\s]+)/.exec(rawOutcome)?.[1];
const candidatePmids = csv
? csv
.split(',')
.map((p) => p.trim())
.filter((p) => /^\d+$/.test(p))
: undefined;
return {
key,
matched: false,
pmid: null,
status: 'ambiguous',
detail: rawOutcome,
...(candidatePmids?.length && { candidatePmids }),
};
}
return {
key,
matched: false,
pmid: null,
status: 'not_found',
...(rawOutcome && { detail: rawOutcome }),
};
});
}
/**
* Convert between article identifiers (DOI, PMID, PMCID) using the PMC ID Converter API.
* Accepts up to 200 IDs in a single request. Only works for articles in PMC.
*/
async idConvert(ids, idtype, options) {
const params = {
ids: ids.join(','),
format: 'json',
...(idtype && { idtype }),
};
let text;
try {
text = await this.runWithDeadline((signal) => this.queue.enqueue(() => this.withRetry(() => this.apiClient.makeExternalRequest(NCBI_PMC_IDCONV_URL, params, signal), 'idconv', signal), 'idconv', params, signal), options?.signal);
}
catch (error) {
// PMC ID Converter returns 400 (InvalidParams) for malformed inputs and
// leaks the upstream HTML/text body into `data.body`. Rewrite to a typed
// validation error with idType-specific guidance and drop the leaky body.
if (error instanceof McpError && error.code === JsonRpcErrorCode.InvalidParams) {
const hint = ID_CONVERT_FORMAT_HINTS[idtype ?? ''];
const message = hint
? `PMC ID Converter rejected one or more inputs as malformed (idType="${idtype}"). Expected: ${hint}.`
: `PMC ID Converter rejected the input as malformed (idType="${idtype ?? 'unspecified'}").`;
throw validationError(message, { idType: idtype, idCount: ids.length }, { cause: error });
}
throw error;
}
let parsed;
try {
parsed = JSON.parse(text);
}
catch (error) {
throw serializationError('Failed to parse ID Converter JSON response.', {
reason: 'ncbi_invalid_response',
responseSnippet: text.substring(0, 200),
...recoveryFor('ncbi_invalid_response'),
}, { cause: error });
}
return parsed.records ?? [];
}
/** Error codes that are transient and worth retrying with backoff. */
static RETRYABLE_CODES = new Set([
JsonRpcErrorCode.ServiceUnavailable,
JsonRpcErrorCode.Timeout,
JsonRpcErrorCode.RateLimited,
]);
/** Maximum backoff delay per retry (prevents exponential explosion at high retry counts). */
static MAX_BACKOFF_MS = 30_000;
/**
* Wraps a task with a service-level deadline. Returns a combined AbortSignal
* (internal deadline OR'd with the caller's `ctx.signal`, if any) that the
* task must forward to both the HTTP call and any backoff sleep so cancellation
* interrupts the full retry chain — not just the next attempt.
*/
async runWithDeadline(task, callerSignal) {
const deadlineController = new AbortController();
const deadlineTimer = setTimeout(() => deadlineController.abort(new NcbiDeadlineExceeded(this.totalDeadlineMs)), this.totalDeadlineMs);
const signal = callerSignal
? AbortSignal.any([deadlineController.signal, callerSignal])
: deadlineController.signal;
try {
return await task(signal);
}
catch (error) {
if (error instanceof NcbiDeadlineExceeded) {
throw timeout(error.message, {
reason: 'ncbi_deadline_exceeded',
deadlineMs: this.totalDeadlineMs,
...recoveryFor('ncbi_deadline_exceeded'),
}, { cause: error });
}
throw error;
}
finally {
clearTimeout(deadlineTimer);
}
}
/**
* Retry wrapper for transient NCBI errors (ServiceUnavailable, Timeout, RateLimited).
* Non-transient McpErrors and unexpected plain Errors fail immediately.
* Uses capped exponential backoff with jitter. Backoff sleep is abortable via
* `signal`, so deadline expiration or caller cancel short-circuits the chain.
*/
async withRetry(execute, label, signal) {
for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
if (signal?.aborted)
throw signal.reason;
try {
return await execute();
}
catch (error) {
if (signal?.aborted)
throw signal.reason;
if (!(error instanceof McpError)) {
throw error;
}
if (!NcbiService.RETRYABLE_CODES.has(error.code)) {
throw error;
}
if (attempt < this.maxRetries) {
const baseDelay = Math.min(1000 * 2 ** attempt, NcbiService.MAX_BACKOFF_MS);
const jitter = baseDelay * (0.75 + 0.5 * Math.random()); // ±25%
const retryDelay = Math.round(jitter);
logger.warning(`NCBI request to ${label} failed. Retrying (${attempt + 1}/${this.maxRetries}) in ${retryDelay}ms.`, requestContextService.createRequestContext({
operation: 'NcbiRetry',
endpoint: label,
attempt: attempt + 1,
retryDelay,
}));
await abortableSleep(retryDelay, signal);
continue;
}
const attempts = this.maxRetries + 1;
const msg = error instanceof Error ? error.message : String(error);
// Tag transient ServiceUnavailable retries-exhausted with `ncbi_unreachable` so
// tool callers can switch on a stable reason. Other retryable codes (Timeout,
// RateLimited) keep their original code with no reason — `ncbi_deadline_exceeded`
// and `queue_full` are stamped at their own throw sites.
const reason = error.code === JsonRpcErrorCode.ServiceUnavailable ? 'ncbi_unreachable' : undefined;
throw new McpError(error.code, `${msg} (failed after ${attempts} attempts)`, {
...(reason && { reason, ...recoveryFor(reason) }),
endpoint: label,
attempts,
}, { cause: error });
}
}
throw internalError('Request failed after all retries.', {
reason: 'ncbi_unreachable',
endpoint: label,
...recoveryFor('ncbi_unreachable'),
});
}
/**
* Runs a request under a service-level deadline that bounds queue wait time
* + retry chain + HTTP execution. The deadline is constructed *outside* the
* queue so a backlog can't burn a request's budget before it even dispatches.
*
* The combined deadline+caller signal is threaded into the queue (cancels a
* still-waiting task), the retry chain (cancels pending backoff sleeps),
* and the HTTP fetch (cancels wedged requests).
*/
performRequest(endpoint, params, options) {
return this.runWithDeadline((signal) => this.queue.enqueue(() => this.withRetry(async () => {
const text = await this.apiClient.makeRequest(endpoint, params, {
...options,
signal,
});
return this.responseHandler.parseAndHandleResponse(text, endpoint, options);
}, endpoint, signal), endpoint, params, signal), options?.signal);
}
}
// ─── Init / Accessor ────────────────────────────────────────────────────────
let _service;
/** Initialize the NCBI service. Call from `setup()` in createApp. */
export function initNcbiService() {
const config = getServerConfig();
const apiClient = new NcbiApiClient({
toolIdentifier: config.toolIdentifier,
timeoutMs: config.timeoutMs,
...(config.apiKey && { apiKey: config.apiKey }),
...(config.adminEmail && { adminEmail: config.adminEmail }),
});
const queue = new NcbiRequestQueue(config.requestDelayMs, config.maxConcurrent);
const responseHandler = new NcbiResponseHandler();
_service = new NcbiService(apiClient, queue, responseHandler, config.maxRetries, config.totalDeadlineMs);
logger.info('NCBI service initialized.', requestContextService.createRequestContext({
operation: 'NcbiInit',
toolIdentifier: config.toolIdentifier,
hasApiKey: !!config.apiKey,
requestDelayMs: config.requestDelayMs,
maxConcurrent: config.maxConcurrent,
totalDeadlineMs: config.totalDeadlineMs,
}));
}
/** Get the initialized NCBI service. Throws if not initialized. */
export function getNcbiService() {
if (!_service)
throw new Error('NCBI service not initialized. Call initNcbiService() first.');
return _service;
}
//# sourceMappingURL=ncbi-service.js.map