paper-search-mcp-nodejs
Version:
A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.
188 lines • 7.26 kB
JavaScript
/**
* bioRxiv API集成模块
* 支持bioRxiv和medRxiv预印本论文搜索
*/
import axios from 'axios';
import * as fs from 'fs';
import * as path from 'path';
import { PaperFactory } from '../models/Paper.js';
import { PaperSource } from './PaperSource.js';
export class BioRxivSearcher extends PaperSource {
serverType;
constructor(serverType = 'biorxiv') {
super(serverType, `https://api.biorxiv.org/details/${serverType}`);
this.serverType = serverType;
}
getCapabilities() {
return {
search: true,
download: true,
fullText: true,
citations: false,
requiresApiKey: false,
supportedOptions: ['maxResults', 'days', 'category']
};
}
/**
* 搜索bioRxiv/medRxiv论文
*/
async search(query, options = {}) {
try {
// 计算日期范围
const days = options.days || 30;
const endDate = new Date().toISOString().split('T')[0];
const startDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString().split('T')[0];
// 构建搜索URL
const searchUrl = `${this.baseUrl}/${startDate}/${endDate}`;
const params = {
cursor: 0
};
// 添加分类过滤
if (query && query !== '*') {
// 将查询转换为分类格式
const category = query.toLowerCase().replace(/\s+/g, '_');
params.category = category;
}
console.error(`🔍 ${this.serverType} API Request: GET ${searchUrl}`);
console.error(`📋 ${this.serverType} Request params:`, params);
const response = await axios.get(searchUrl, {
params,
timeout: 30000,
headers: {
'User-Agent': 'Paper-Search-MCP/1.0 (Academic Research Tool)'
}
});
console.error(`✅ ${this.serverType} API Response: ${response.status} ${response.statusText}`);
const papers = this.parseSearchResponse(response.data, query, options);
console.error(`📄 ${this.serverType} Parsed ${papers.length} papers`);
return papers.slice(0, options.maxResults || 10);
}
catch (error) {
console.error(`❌ ${this.serverType} Search Error:`, error.message);
this.handleHttpError(error, 'search');
}
}
/**
* 下载PDF文件
*/
async downloadPdf(paperId, options = {}) {
try {
const savePath = options.savePath || './downloads';
// 构建PDF URL
const pdfUrl = `https://www.${this.serverType}.org/content/${paperId}v1.full.pdf`;
// 确保保存目录存在
if (!fs.existsSync(savePath)) {
fs.mkdirSync(savePath, { recursive: true });
}
const filename = `${paperId.replace(/\//g, '_')}.pdf`;
const filePath = path.join(savePath, filename);
// 检查文件是否已存在
if (fs.existsSync(filePath) && !options.overwrite) {
return filePath;
}
const response = await axios.get(pdfUrl, {
responseType: 'stream',
timeout: 60000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
const writer = fs.createWriteStream(filePath);
response.data.pipe(writer);
return new Promise((resolve, reject) => {
writer.on('finish', () => resolve(filePath));
writer.on('error', reject);
});
}
catch (error) {
this.handleHttpError(error, 'download PDF');
}
}
/**
* 读取论文全文内容
*/
async readPaper(paperId, options = {}) {
try {
const savePath = options.savePath || './downloads';
const filePath = path.join(savePath, `${paperId.replace(/\//g, '_')}.pdf`);
// 如果PDF不存在,先下载
if (!fs.existsSync(filePath)) {
await this.downloadPdf(paperId, options);
}
return `PDF file downloaded at: ${filePath}. Full text extraction requires additional PDF parsing implementation.`;
}
catch (error) {
this.handleHttpError(error, 'read paper');
}
}
/**
* 解析搜索响应
*/
parseSearchResponse(data, query, options) {
if (!data.collection || !Array.isArray(data.collection)) {
return [];
}
// 如果有查询词,进行文本匹配过滤
let filteredCollection = data.collection;
if (query && query !== '*' && query.trim()) {
const queryLower = query.toLowerCase();
filteredCollection = data.collection.filter(item => item.title.toLowerCase().includes(queryLower) ||
item.abstract.toLowerCase().includes(queryLower) ||
item.authors.toLowerCase().includes(queryLower) ||
item.category.toLowerCase().includes(queryLower));
}
return filteredCollection.map(item => this.parseBioRxivPaper(item))
.filter(paper => paper !== null);
}
/**
* 解析单个bioRxiv论文
*/
parseBioRxivPaper(item) {
try {
// 解析作者
const authors = item.authors.split(';').map(author => author.trim());
// 解析日期
const publishedDate = this.parseDate(item.date);
const year = publishedDate?.getFullYear();
// 构建URL
const paperUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}`;
const pdfUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}.full.pdf`;
return PaperFactory.create({
paperId: item.doi,
title: this.cleanText(item.title),
authors: authors,
abstract: this.cleanText(item.abstract),
doi: item.doi,
publishedDate: publishedDate,
pdfUrl: pdfUrl,
url: paperUrl,
source: this.serverType,
categories: [item.category],
keywords: [],
citationCount: 0,
year: year,
extra: {
version: item.version,
type: item.type,
license: item.license,
server: item.server,
corresponding_author: item.author_corresponding,
corresponding_institution: item.author_corresponding_institution
}
});
}
catch (error) {
console.error(`Error parsing ${this.serverType} paper:`, error);
return null;
}
}
}
/**
* medRxiv搜索器 - 继承自BioRxivSearcher
*/
export class MedRxivSearcher extends BioRxivSearcher {
constructor() {
super('medrxiv');
}
}
//# sourceMappingURL=BioRxivSearcher.js.map