paper-search-mcp-nodejs
Version:
A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.
330 lines • 12.1 kB
JavaScript
/**
* Web of Science API集成模块
* 支持 Web of Science Starter API 和 Web of Science Researcher API
*/
import axios from 'axios';
import { PaperFactory } from '../models/Paper.js';
import { PaperSource } from './PaperSource.js';
export class WebOfScienceSearcher extends PaperSource {
apiUrl;
apiVersion;
constructor(apiKey, apiVersion = 'v1') {
super('webofscience', 'https://api.clarivate.com/apis', apiKey);
this.apiVersion = apiVersion;
this.apiUrl = `${this.baseUrl}/wos-starter/${this.apiVersion}`;
// 只在开发模式下输出调试信息
if (process.env.NODE_ENV === 'development') {
console.error(`🔧 WoS API URL: ${this.apiUrl}`);
}
}
getCapabilities() {
return {
search: true,
download: false, // WoS 通常不提供直接PDF下载
fullText: false, // 通常只有元数据
citations: true,
requiresApiKey: true,
supportedOptions: ['maxResults', 'year', 'author', 'journal', 'sortBy', 'sortOrder']
};
}
/**
* 搜索Web of Science论文
*/
async search(query, options = {}) {
if (!this.apiKey) {
throw new Error('Web of Science API key is required');
}
try {
const searchParams = this.buildSearchQuery(query, options);
const response = await this.makeApiRequest('/documents', {
method: 'GET',
params: searchParams
});
return this.parseSearchResponse(response.data);
}
catch (error) {
this.handleHttpError(error, 'search');
}
}
/**
* Web of Science 通常不支持直接PDF下载
*/
async downloadPdf(paperId, options) {
throw new Error('Web of Science does not support direct PDF download. Please use the DOI or URL to access the paper through the publisher.');
}
/**
* Web of Science 通常不提供全文内容
*/
async readPaper(paperId, options) {
throw new Error('Web of Science does not provide full-text content. Only bibliographic metadata and abstracts are available.');
}
/**
* 根据DOI获取论文详细信息
*/
async getPaperByDoi(doi) {
try {
const query = `DO="${doi}"`;
const results = await this.search(query, { maxResults: 1 });
return results.length > 0 ? results[0] : null;
}
catch (error) {
console.error('Error getting paper by DOI from Web of Science:', error);
return null;
}
}
/**
* 获取论文被引统计
*/
async getCitationCount(paperId) {
if (!this.apiKey) {
throw new Error('Web of Science API key is required');
}
try {
const response = await this.makeApiRequest(`/documents/${paperId}`, {
method: 'GET'
});
const record = response.data?.Data?.[0];
const citationData = record?.dynamic_data?.citation_related?.tc_list?.silo_tc;
return citationData ? parseInt(citationData.local_count, 10) : 0;
}
catch (error) {
console.error('Error getting citation count:', error);
return 0;
}
}
/**
* 构建搜索查询参数
*/
buildSearchQuery(query, options) {
// 构建WOS查询字符串 - 支持多主题和复杂查询
let formattedQuery = this.buildWosQuery(query, options);
const params = {
q: formattedQuery,
db: options.databases?.join(',') || 'WOS',
limit: Math.min(options.maxResults || 10, 100), // WOS API限制最大100条
page: 1
};
// 添加排序参数 - 使用正确的API参数名
if (options.sortBy) {
const sortField = this.mapSortField(options.sortBy);
params.sortBy = sortField; // 修正参数名从sortField到sortBy
// 添加排序顺序
if (options.sortOrder) {
params.sortOrder = options.sortOrder.toUpperCase(); // API要求大写: ASC 或 DESC
}
}
return params;
}
/**
* 构建WOS格式的查询字符串
*/
buildWosQuery(query, options) {
const queryParts = [];
// 处理主题搜索 - 支持多个关键词
if (query && query.trim()) {
// 转义特殊字符并处理多主题搜索
const escapedQuery = this.escapeWosQuery(query);
// 检查是否已经包含WOS字段标签
if (escapedQuery.includes('=')) {
// 用户提供了带字段标签的查询
queryParts.push(escapedQuery);
}
else {
// 简单查询,使用TS(Topic)字段
queryParts.push(`TS=(${escapedQuery})`);
}
}
// 添加年份过滤
if (options.year) {
if (options.year.includes('-')) {
// 年份范围 "2020-2023"
const [startYear, endYear] = options.year.split('-');
queryParts.push(`PY=(${startYear.trim()}-${endYear.trim()})`);
}
else {
// 单个年份
queryParts.push(`PY=${options.year}`);
}
}
// 添加作者过滤
if (options.author) {
const escapedAuthor = this.escapeWosQuery(options.author);
queryParts.push(`AU=(${escapedAuthor})`);
}
// 添加期刊过滤
if (options.journal) {
const escapedJournal = this.escapeWosQuery(options.journal);
queryParts.push(`SO=(${escapedJournal})`);
}
// 用AND连接所有查询部分
return queryParts.join(' AND ');
}
/**
* 转义WOS查询中的特殊字符
*/
escapeWosQuery(query) {
if (!query)
return '';
// 移除多余的引号和转义特殊字符
return query
.replace(/"/g, '') // 移除引号
.replace(/[\(\)]/g, '') // 移除括号(API会自动添加)
.trim();
}
/**
* 映射排序字段到WOS API格式
*/
mapSortField(sortBy) {
const fieldMap = {
'relevance': 'relevance',
'date': 'PD', // Publication Date - 更准确的日期排序字段
'citations': 'TC', // Times Cited
'title': 'TI', // Title
'author': 'AU', // Author
'journal': 'SO' // Source (Journal)
};
return fieldMap[sortBy.toLowerCase()] || 'relevance';
}
/**
* 解析搜索响应
*/
parseSearchResponse(data) {
if (!data.hits || !Array.isArray(data.hits)) {
console.error('❌ WoS: No hits found in response or hits is not an array');
return [];
}
console.error(`📊 WoS: Found ${data.hits.length} hits out of ${data.metadata?.total || 0} total`);
return data.hits.map(record => this.parseWoSRecord(record))
.filter(paper => paper !== null);
}
/**
* 解析单个WoS记录
*/
parseWoSRecord(record) {
try {
// 提取基本信息
const title = record.title || 'No title available';
const authors = record.names?.authors?.map(author => author.displayName) || [];
const abstractText = record.abstract || '';
// 提取出版信息
const year = record.source?.publishYear;
const publishedDate = year ? new Date(year, 0, 1) : null;
const journal = record.source?.sourceTitle || '';
// 提取DOI
const doi = record.identifiers?.doi || '';
// 提取被引次数
const citationCount = record.citations?.[0]?.citingArticlesCount || 0;
// 提取关键词
const keywords = record.keywords?.authorKeywords || [];
// 构建URL
const wosUrl = `https://www.webofscience.com/wos/woscc/full-record/${record.uid}`;
return PaperFactory.create({
paperId: record.uid,
title: this.cleanText(title),
authors: authors,
abstract: this.cleanText(abstractText),
doi: doi,
publishedDate: publishedDate,
pdfUrl: '', // WoS通常不提供直接PDF链接
url: wosUrl,
source: 'webofscience',
categories: record.types || [],
keywords: keywords,
citationCount: citationCount,
journal: journal,
volume: record.source?.volume || undefined,
issue: record.source?.issue || undefined,
pages: record.source?.pages || undefined,
year: year,
extra: {
uid: record.uid,
doctype: record.types?.[0],
sourceTypes: record.sourceTypes
}
});
}
catch (error) {
console.error('Error parsing WoS record:', error);
console.error('Record data:', record);
return null;
}
}
/**
* 提取页码信息
*/
extractPages(pubInfo) {
if (!pubInfo?.page)
return undefined;
const beginPage = pubInfo.page['@begin'];
const endPage = pubInfo.page['@end'];
if (beginPage && endPage) {
return `${beginPage}-${endPage}`;
}
else if (beginPage) {
return beginPage;
}
return undefined;
}
/**
* 发起API请求
*/
async makeApiRequest(endpoint, config) {
const url = `${this.apiUrl}${endpoint}`;
const requestConfig = {
...config,
headers: {
'X-ApiKey': this.apiKey,
'Content-Type': 'application/json',
'User-Agent': 'Paper-Search-MCP/1.0 (Academic Research Tool)',
...config.headers
},
timeout: 30000
};
// 调试日志 - 只在开发模式或详细日志模式下输出
if (process.env.NODE_ENV === 'development' || process.env.WOS_VERBOSE_LOGGING === 'true') {
console.error(`🔍 WoS API Request: ${config.method} ${url}`);
console.error(`📋 WoS Request params:`, config.params);
}
try {
const response = await axios(url, requestConfig);
if (process.env.NODE_ENV === 'development' || process.env.WOS_VERBOSE_LOGGING === 'true') {
console.error(`✅ WoS API Response: ${response.status} ${response.statusText}`);
console.error(`📄 WoS Response data preview:`, JSON.stringify(response.data, null, 2).substring(0, 500));
}
return response;
}
catch (error) {
console.error(`❌ WoS API Error:`, {
status: error.response?.status,
statusText: error.response?.statusText,
data: error.response?.data,
config: {
url: error.config?.url,
method: error.config?.method,
params: error.config?.params
}
});
throw error;
}
}
/**
* 验证API密钥
*/
async validateApiKey() {
if (!this.apiKey)
return false;
try {
await this.search('test', { maxResults: 1 });
return true;
}
catch (error) {
// API密钥无效通常返回401或403
if (error.response?.status === 401 || error.response?.status === 403) {
return false;
}
// 其他错误可能是网络问题,认为密钥可能有效
return true;
}
}
}
//# sourceMappingURL=WebOfScienceSearcher.js.map