UNPKG

paper-search-mcp-nodejs

Version:

A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.

491 lines 18.3 kB
/** * Web of Science API集成模块 * 支持 Web of Science Starter API 和 Web of Science Researcher API */ import axios from 'axios'; import { PaperFactory } from '../models/Paper.js'; import { PaperSource } from './PaperSource.js'; import { escapeQueryValue, validateQueryComplexity } from '../utils/SecurityUtils.js'; import { TIMEOUTS, USER_AGENT } from '../config/constants.js'; import { logDebug, logWarn } from '../utils/Logger.js'; export class WebOfScienceSearcher extends PaperSource { apiUrl; apiVersion; fallbackAttempted = false; preferredVersion; constructor(apiKey, apiVersion) { super('webofscience', 'https://api.clarivate.com/apis', apiKey); // Priority: constructor param > env var > default 'v2' this.preferredVersion = apiVersion || process.env.WOS_API_VERSION || 'v2'; this.apiVersion = this.preferredVersion; this.apiUrl = `${this.baseUrl}/wos-starter/${this.apiVersion}`; logDebug(`WoS API URL: ${this.apiUrl} (preferred: ${this.preferredVersion})`); } /** * Switch to fallback API version (v2 -> v1 or v1 -> v2) */ switchToFallbackVersion() { if (this.fallbackAttempted) { return false; // Already tried fallback } const fallbackVersion = this.apiVersion === 'v2' ? 'v1' : 'v2'; logWarn(`WoS API ${this.apiVersion} failed, switching to ${fallbackVersion}`); this.apiVersion = fallbackVersion; this.apiUrl = `${this.baseUrl}/wos-starter/${this.apiVersion}`; this.fallbackAttempted = true; return true; } /** * Reset fallback state (call after successful request) * This allows the next request to try the preferred version first */ resetFallbackState() { // Always reset on success, so next request can try preferred version if (this.fallbackAttempted && this.apiVersion !== this.preferredVersion) { // We're on fallback version, schedule return to preferred on next request this.fallbackAttempted = false; this.apiVersion = this.preferredVersion; this.apiUrl = `${this.baseUrl}/wos-starter/${this.apiVersion}`; } } getCapabilities() { return { search: true, download: false, fullText: false, citations: true, requiresApiKey: true, supportedOptions: ['maxResults', 'year', 'author', 'journal', 'sortBy', 'sortOrder'] }; } /** * 获取论文的参考文献ID列表 */ async getReferenceIds(uid) { if (!this.apiKey) return []; try { const response = await this.makeApiRequest(`/documents/${uid}/references`, { method: 'GET', params: { db: 'WOS', limit: 50 } }); const hits = response.data?.hits || []; return hits.map((hit) => hit.uid).filter(Boolean); } catch (error) { logDebug(`Error getting reference IDs for UT ${uid}:`, error); return []; } } /** * 获取引用此论文的文献ID列表 */ async getCitationIds(uid) { if (!this.apiKey) return []; try { const response = await this.makeApiRequest(`/documents/${uid}/citing`, { method: 'GET', params: { db: 'WOS', limit: 100 } }); const hits = response.data?.hits || []; return hits.map((hit) => hit.uid).filter(Boolean); } catch (error) { logDebug(`Error getting citation IDs for UT ${uid}:`, error); return []; } } /** * 获取论文详情(包含references和citations ID列表) */ async getPaperWithCitations(uid) { try { const query = uid.includes('/') ? `DO="${uid}"` : `UT="${uid}"`; const results = await this.search(query, { maxResults: 1 }); if (results.length === 0) return null; const paper = results[0]; const paperUid = paper.extra?.uid; if (paperUid) { const [refIds, citIds] = await Promise.all([ this.getReferenceIds(paperUid), this.getCitationIds(paperUid) ]); paper.references = refIds; paper.extra = { ...paper.extra, citationIds: citIds }; } return paper; } catch (error) { logDebug('Error getting paper with citations:', error); return null; } } /** * 搜索Web of Science论文 */ async search(query, options = {}) { if (!this.apiKey) { throw new Error('Web of Science API key is required'); } try { const searchParams = this.buildSearchQuery(query, options); const response = await this.makeApiRequest('/documents', { method: 'GET', params: searchParams }); return this.parseSearchResponse(response.data); } catch (error) { this.handleHttpError(error, 'search'); } } /** * Web of Science 通常不支持直接PDF下载 */ async downloadPdf(paperId, options) { throw new Error('Web of Science does not support direct PDF download. Please use the DOI or URL to access the paper through the publisher.'); } /** * Web of Science 通常不提供全文内容 */ async readPaper(paperId, options) { throw new Error('Web of Science does not provide full-text content. Only bibliographic metadata and abstracts are available.'); } /** * 根据DOI获取论文详细信息 */ async getPaperByDoi(doi) { try { const query = `DO="${doi}"`; const results = await this.search(query, { maxResults: 1 }); return results.length > 0 ? results[0] : null; } catch (error) { logDebug('Error getting paper by DOI from Web of Science:', error); return null; } } /** * 获取论文被引统计 */ async getCitationCount(paperId) { if (!this.apiKey) { throw new Error('Web of Science API key is required'); } try { const response = await this.makeApiRequest(`/documents/${paperId}`, { method: 'GET' }); const record = response.data?.Data?.[0]; const citationData = record?.dynamic_data?.citation_related?.tc_list?.silo_tc; return citationData ? parseInt(citationData.local_count, 10) : 0; } catch (error) { logDebug('Error getting citation count:', error); return 0; } } /** * 构建搜索查询参数 */ buildSearchQuery(query, options) { // 构建WOS查询字符串 - 支持多主题和复杂查询 let formattedQuery = this.buildWosQuery(query, options); const params = { q: formattedQuery, db: options.databases?.join(',') || 'WOS', limit: Math.min(options.maxResults || 10, 100), // WOS API限制最大100条 page: 1 }; // 添加排序参数 - 使用正确的API参数名 if (options.sortBy) { const sortField = this.mapSortField(options.sortBy); params.sortField = sortField; // WoS API使用sortField参数 // 添加排序顺序 if (options.sortOrder) { params.sortOrder = options.sortOrder.toUpperCase(); // API要求大写: ASC 或 DESC } } return params; } /** * 构建WOS格式的查询字符串 */ buildWosQuery(query, options) { const queryParts = []; // Validate query complexity first const complexityCheck = validateQueryComplexity(query, { maxLength: 1000, maxBooleanOperators: 10 }); if (!complexityCheck.valid) { throw new Error(complexityCheck.error); } // 处理主题搜索 - 支持多个关键词 if (query && query.trim()) { // 检查是否已经包含WOS字段标签 // Supported field tags: TI, IS, SO, VL, PG, CS, PY, FPY, DOP, AU, AI, UT, DO, DT, PMID, OG, TS, SUR const wosFieldTags = ['TS=', 'TI=', 'AU=', 'SO=', 'PY=', 'DO=', 'IS=', 'VL=', 'PG=', 'CS=', 'DT=', 'PMID=', 'FPY=', 'DOP=', 'AI=', 'UT=', 'OG=', 'SUR=']; const hasFieldTag = wosFieldTags.some(tag => query.toUpperCase().includes(tag)); if (hasFieldTag) { // 用户提供了带字段标签的查询,直接使用(不进行转义) queryParts.push(query); } else { // 简单查询,使用TS(Topic)字段 const escapedQuery = escapeQueryValue(query, 'wos'); queryParts.push(`TS=(${escapedQuery})`); } } // 添加年份过滤 if (options.year) { if (options.year.includes('-')) { // 年份范围 "2020-2023" const [startYear, endYear] = options.year.split('-'); queryParts.push(`PY=(${startYear.trim()}-${endYear.trim()})`); } else { // 单个年份 queryParts.push(`PY=${options.year}`); } } // 添加作者过滤 if (options.author) { const escapedAuthor = escapeQueryValue(options.author, 'wos'); queryParts.push(`AU=(${escapedAuthor})`); } // 添加期刊过滤 if (options.journal) { const escapedJournal = escapeQueryValue(options.journal, 'wos'); queryParts.push(`SO=(${escapedJournal})`); } // 添加ISSN/ISBN过滤 (IS field tag) if (options.issn) { queryParts.push(`IS=${options.issn}`); } // 添加卷号过滤 (VL field tag) if (options.volume) { queryParts.push(`VL=${options.volume}`); } // 添加页码过滤 (PG field tag) if (options.page) { queryParts.push(`PG=${options.page}`); } // 添加期号过滤 (CS field tag - Issue) if (options.issue) { queryParts.push(`CS=${options.issue}`); } // 添加文档类型过滤 (DT field tag) if (options.documentTypes && options.documentTypes.length > 0) { const dtQuery = options.documentTypes.map(dt => `"${dt}"`).join(' OR '); queryParts.push(`DT=(${dtQuery})`); } // 添加PubMed ID过滤 (PMID field tag) if (options.pmid) { queryParts.push(`PMID=${options.pmid}`); } // 添加DOI过滤 (DO field tag) if (options.doi) { queryParts.push(`DO="${options.doi}"`); } // 用AND连接所有查询部分 return queryParts.join(' AND '); } /** * 转义WOS查询中的特殊字符 */ escapeWosQuery(query) { if (!query) return ''; // 移除多余的引号和转义特殊字符 return query .replace(/"/g, '') // 移除引号 .replace(/[\(\)]/g, '') // 移除括号(API会自动添加) .trim(); } /** * 映射排序字段到WOS API格式 */ mapSortField(sortBy) { const fieldMap = { 'relevance': 'relevance', 'date': 'PD', // Publication Date - 更准确的日期排序字段 'citations': 'TC', // Times Cited 'title': 'TI', // Title 'author': 'AU', // Author 'journal': 'SO' // Source (Journal) }; return fieldMap[sortBy.toLowerCase()] || 'relevance'; } /** * 解析搜索响应 */ parseSearchResponse(data) { if (!data.hits || !Array.isArray(data.hits)) { logDebug('WoS: No hits found in response or hits is not an array'); return []; } if (process.env.NODE_ENV === 'development') { logDebug(`WoS: Found ${data.hits.length} hits out of ${data.metadata?.total || 0} total`); } return data.hits.map(record => this.parseWoSRecord(record)) .filter(paper => paper !== null); } /** * 解析单个WoS记录 */ parseWoSRecord(record) { try { // 提取基本信息 const title = record.title || 'No title available'; const authors = record.names?.authors?.map(author => author.displayName) || []; const abstractText = record.abstract || ''; // 提取出版信息 const year = record.source?.publishYear; const publishedDate = year ? new Date(year, 0, 1) : null; const journal = record.source?.sourceTitle || ''; // 提取DOI const doi = record.identifiers?.doi || ''; // 提取被引次数 const citationCount = record.citations?.[0]?.citingArticlesCount || 0; // 提取关键词 const keywords = record.keywords?.authorKeywords || []; // 构建URL const wosUrl = `https://www.webofscience.com/wos/woscc/full-record/${record.uid}`; return PaperFactory.create({ paperId: record.uid, title: this.cleanText(title), authors: authors, abstract: this.cleanText(abstractText), doi: doi, publishedDate: publishedDate, pdfUrl: '', // WoS通常不提供直接PDF链接 url: wosUrl, source: 'webofscience', categories: record.types || [], keywords: keywords, citationCount: citationCount, journal: journal, volume: record.source?.volume || undefined, issue: record.source?.issue || undefined, pages: record.source?.pages || undefined, year: year, extra: { uid: record.uid, doctype: record.types?.[0], sourceTypes: record.sourceTypes } }); } catch (error) { logDebug('Error parsing WoS record:', error); logDebug('Record data:', record); return null; } } /** * 提取页码信息 */ extractPages(pubInfo) { if (!pubInfo?.page) return undefined; const beginPage = pubInfo.page['@begin']; const endPage = pubInfo.page['@end']; if (beginPage && endPage) { return `${beginPage}-${endPage}`; } else if (beginPage) { return beginPage; } return undefined; } /** * 发起API请求 - 支持自动版本降级 */ async makeApiRequest(endpoint, config, isRetry = false) { const url = `${this.apiUrl}${endpoint}`; const requestConfig = { ...config, headers: { 'X-ApiKey': this.apiKey, 'Content-Type': 'application/json', 'User-Agent': USER_AGENT, ...config.headers }, timeout: TIMEOUTS.DEFAULT }; // Debug logs only in development to avoid noisy stderr in CI/production if (process.env.NODE_ENV === 'development') { logDebug(`WoS API Request: ${config.method} ${url} (version: ${this.apiVersion})`); logDebug('WoS Request params:', config.params); } try { const response = await axios(url, requestConfig); if (process.env.NODE_ENV === 'development') { logDebug(`WoS API Response: ${response.status} ${response.statusText}`); logDebug('WoS Response data preview:', JSON.stringify(response.data, null, 2).substring(0, 500)); } // Reset fallback state on success this.resetFallbackState(); return response; } catch (error) { const status = error.response?.status; if (process.env.NODE_ENV === 'development') { logDebug(`WoS API Error (${this.apiVersion}):`, { status, statusText: error.response?.statusText, data: error.response?.data, config: { url: error.config?.url, method: error.config?.method, params: error.config?.params } }); } // Try fallback version for connection/server errors (not auth errors) // 404, 500, 502, 503, 504, or network errors trigger fallback const shouldFallback = !isRetry && (!status || // Network error status === 404 || // Not found (version mismatch) status >= 500 // Server errors ); if (shouldFallback && this.switchToFallbackVersion()) { logDebug(`Retrying with WoS API ${this.apiVersion}...`); return this.makeApiRequest(endpoint, config, true); } throw error; } } /** * 验证API密钥 */ async validateApiKey() { if (!this.apiKey) return false; try { await this.search('test', { maxResults: 1 }); return true; } catch (error) { // API密钥无效通常返回401或403 if (error.response?.status === 401 || error.response?.status === 403) { return false; } // 其他错误可能是网络问题,认为密钥可能有效 return true; } } } //# sourceMappingURL=WebOfScienceSearcher.js.map