UNPKG

paper-search-mcp-nodejs

Version:

A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.

188 lines 7.26 kB
/** * bioRxiv API集成模块 * 支持bioRxiv和medRxiv预印本论文搜索 */ import axios from 'axios'; import * as fs from 'fs'; import * as path from 'path'; import { PaperFactory } from '../models/Paper.js'; import { PaperSource } from './PaperSource.js'; export class BioRxivSearcher extends PaperSource { serverType; constructor(serverType = 'biorxiv') { super(serverType, `https://api.biorxiv.org/details/${serverType}`); this.serverType = serverType; } getCapabilities() { return { search: true, download: true, fullText: true, citations: false, requiresApiKey: false, supportedOptions: ['maxResults', 'days', 'category'] }; } /** * 搜索bioRxiv/medRxiv论文 */ async search(query, options = {}) { try { // 计算日期范围 const days = options.days || 30; const endDate = new Date().toISOString().split('T')[0]; const startDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString().split('T')[0]; // 构建搜索URL const searchUrl = `${this.baseUrl}/${startDate}/${endDate}`; const params = { cursor: 0 }; // 添加分类过滤 if (query && query !== '*') { // 将查询转换为分类格式 const category = query.toLowerCase().replace(/\s+/g, '_'); params.category = category; } console.error(`🔍 ${this.serverType} API Request: GET ${searchUrl}`); console.error(`📋 ${this.serverType} Request params:`, params); const response = await axios.get(searchUrl, { params, timeout: 30000, headers: { 'User-Agent': 'Paper-Search-MCP/1.0 (Academic Research Tool)' } }); console.error(`✅ ${this.serverType} API Response: ${response.status} ${response.statusText}`); const papers = this.parseSearchResponse(response.data, query, options); console.error(`📄 ${this.serverType} Parsed ${papers.length} papers`); return papers.slice(0, options.maxResults || 10); } catch (error) { console.error(`❌ ${this.serverType} Search Error:`, error.message); this.handleHttpError(error, 'search'); } } /** * 下载PDF文件 */ async downloadPdf(paperId, options = {}) { try { const savePath = options.savePath || './downloads'; // 构建PDF URL const pdfUrl = `https://www.${this.serverType}.org/content/${paperId}v1.full.pdf`; // 确保保存目录存在 if (!fs.existsSync(savePath)) { fs.mkdirSync(savePath, { recursive: true }); } const filename = `${paperId.replace(/\//g, '_')}.pdf`; const filePath = path.join(savePath, filename); // 检查文件是否已存在 if (fs.existsSync(filePath) && !options.overwrite) { return filePath; } const response = await axios.get(pdfUrl, { responseType: 'stream', timeout: 60000, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }); const writer = fs.createWriteStream(filePath); response.data.pipe(writer); return new Promise((resolve, reject) => { writer.on('finish', () => resolve(filePath)); writer.on('error', reject); }); } catch (error) { this.handleHttpError(error, 'download PDF'); } } /** * 读取论文全文内容 */ async readPaper(paperId, options = {}) { try { const savePath = options.savePath || './downloads'; const filePath = path.join(savePath, `${paperId.replace(/\//g, '_')}.pdf`); // 如果PDF不存在,先下载 if (!fs.existsSync(filePath)) { await this.downloadPdf(paperId, options); } return `PDF file downloaded at: ${filePath}. Full text extraction requires additional PDF parsing implementation.`; } catch (error) { this.handleHttpError(error, 'read paper'); } } /** * 解析搜索响应 */ parseSearchResponse(data, query, options) { if (!data.collection || !Array.isArray(data.collection)) { return []; } // 如果有查询词,进行文本匹配过滤 let filteredCollection = data.collection; if (query && query !== '*' && query.trim()) { const queryLower = query.toLowerCase(); filteredCollection = data.collection.filter(item => item.title.toLowerCase().includes(queryLower) || item.abstract.toLowerCase().includes(queryLower) || item.authors.toLowerCase().includes(queryLower) || item.category.toLowerCase().includes(queryLower)); } return filteredCollection.map(item => this.parseBioRxivPaper(item)) .filter(paper => paper !== null); } /** * 解析单个bioRxiv论文 */ parseBioRxivPaper(item) { try { // 解析作者 const authors = item.authors.split(';').map(author => author.trim()); // 解析日期 const publishedDate = this.parseDate(item.date); const year = publishedDate?.getFullYear(); // 构建URL const paperUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}`; const pdfUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}.full.pdf`; return PaperFactory.create({ paperId: item.doi, title: this.cleanText(item.title), authors: authors, abstract: this.cleanText(item.abstract), doi: item.doi, publishedDate: publishedDate, pdfUrl: pdfUrl, url: paperUrl, source: this.serverType, categories: [item.category], keywords: [], citationCount: 0, year: year, extra: { version: item.version, type: item.type, license: item.license, server: item.server, corresponding_author: item.author_corresponding, corresponding_institution: item.author_corresponding_institution } }); } catch (error) { console.error(`Error parsing ${this.serverType} paper:`, error); return null; } } } /** * medRxiv搜索器 - 继承自BioRxivSearcher */ export class MedRxivSearcher extends BioRxivSearcher { constructor() { super('medrxiv'); } } //# sourceMappingURL=BioRxivSearcher.js.map