paper-search-mcp-nodejs
Version:
A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.
211 lines • 8.21 kB
JavaScript
/**
* Wiley TDM (Text and Data Mining) API Searcher
*
* Documentation: https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining
* API Endpoints:
* - Search API: https://api.wiley.com/onlinelibrary/tdm/v1/articles
*
* Required: Wiley TDM Token (CR-TDM-Token header)
* Get token from: https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining
*/
import axios from 'axios';
import { PaperSource } from './PaperSource.js';
import { PaperFactory } from '../models/Paper.js';
import { RateLimiter } from '../utils/RateLimiter.js';
export class WileySearcher extends PaperSource {
client;
rateLimiter;
constructor(tdmToken) {
super('wiley', 'https://api.wiley.com/onlinelibrary/tdm/v1', tdmToken);
this.client = axios.create({
baseURL: 'https://api.wiley.com/onlinelibrary/tdm/v1',
headers: {
'Accept': 'application/json',
...(tdmToken ? { 'CR-TDM-Token': tdmToken } : {})
}
});
// Wiley rate limits:
// Conservative estimate: 100 requests per hour
this.rateLimiter = new RateLimiter({
requestsPerSecond: 0.028, // ~100 per hour
burstCapacity: 3
});
}
async search(query, options = {}) {
const customOptions = options;
if (!this.apiKey) {
throw new Error('Wiley TDM token is required');
}
const maxResults = Math.min(options.maxResults || 10, 100);
const papers = [];
try {
// Build search filters
const filters = [];
// Add query
filters.push(`title:${query} OR abstract:${query}`);
// Add author filter
if (options.author) {
filters.push(`author:${options.author}`);
}
// Add journal filter
if (options.journal) {
filters.push(`container-title:"${options.journal}"`);
}
// Add year filter
if (options.year) {
if (options.year.includes('-')) {
const [startYear, endYear] = options.year.split('-');
filters.push(`published:${startYear}-01-01:${endYear || '*'}-12-31`);
}
else {
filters.push(`published:${options.year}-01-01:${options.year}-12-31`);
}
}
// Add subject filter
if (customOptions.subject) {
filters.push(`subject:"${customOptions.subject}"`);
}
// Add open access filter
if (customOptions.openAccess) {
filters.push('license:*');
}
await this.rateLimiter.waitForPermission();
const response = await this.client.get('/articles', {
params: {
filter: filters.join(' AND '),
rows: maxResults,
offset: 0
}
});
if (response.data.items) {
for (const article of response.data.items) {
const paper = this.parseArticle(article);
if (paper) {
papers.push(paper);
}
}
}
return papers;
}
catch (error) {
console.error('Wiley search error:', error.message);
if (error.response?.status === 401) {
throw new Error('Invalid or missing Wiley TDM token');
}
if (error.response?.status === 429) {
throw new Error('Wiley rate limit exceeded. Please try again later.');
}
throw error;
}
}
parseArticle(article) {
try {
// Extract authors
let authors = '';
if (article.authors && article.authors.length > 0) {
authors = article.authors
.map(a => `${a.given} ${a.family}`.trim())
.join(', ');
}
// Extract publication date
let publishedDate = '';
const dateData = article.published || article['published-print'] || article['published-online'];
if (dateData && dateData['date-parts'] && dateData['date-parts'][0]) {
const [year, month, day] = dateData['date-parts'][0];
publishedDate = `${year}${month ? `-${String(month).padStart(2, '0')}` : ''}${day ? `-${String(day).padStart(2, '0')}` : ''}`;
}
// Extract PDF URL if available
let pdfUrl;
if (article.link) {
const pdfLink = article.link.find(l => l['content-type'] === 'application/pdf' ||
l['content-type'] === 'unspecified' && l.URL.includes('.pdf'));
if (pdfLink) {
pdfUrl = pdfLink.URL;
}
}
// Construct paper URL
const paperUrl = article.URL || (article.doi ? `https://doi.org/${article.doi}` : undefined);
return PaperFactory.create({
paperId: article.doi || '',
title: article.title || '',
authors: authors ? authors.split(', ') : [],
abstract: article.abstract || '',
doi: article.doi,
publishedDate: publishedDate ? new Date(publishedDate) : null,
pdfUrl: pdfUrl,
url: paperUrl,
source: 'Wiley',
journal: article['container-title'],
volume: article.volume,
issue: article.issue,
pages: article.page,
extra: {
publisher: article.publisher,
type: article.type,
subjects: article.subject,
issn: article.ISSN,
licenses: article.license
}
});
}
catch (error) {
console.error('Error parsing Wiley article:', error);
return null;
}
}
async downloadPdf(doi, options = {}) {
// Search for the paper first
const papers = await this.search(doi, { maxResults: 1 });
if (papers.length === 0) {
throw new Error('Paper not found');
}
const paper = papers[0];
if (!paper.pdfUrl) {
throw new Error('PDF not available for this paper (may require institutional access)');
}
// Download PDF
const fs = await import('fs');
const path = await import('path');
const savePath = options.savePath || './downloads';
if (!fs.existsSync(savePath)) {
fs.mkdirSync(savePath, { recursive: true });
}
const fileName = `${doi.replace(/[\/\\:*?"<>|]/g, '_')}.pdf`;
const filePath = path.join(savePath, fileName);
try {
const response = await axios.get(paper.pdfUrl, {
responseType: 'stream',
headers: {
'CR-TDM-Token': this.apiKey
}
});
const writer = fs.createWriteStream(filePath);
response.data.pipe(writer);
return new Promise((resolve, reject) => {
writer.on('finish', () => resolve(filePath));
writer.on('error', reject);
});
}
catch (error) {
throw new Error(`Failed to download PDF: ${error.message}`);
}
}
getCapabilities() {
return {
search: true,
download: true, // With TDM token
fullText: false,
citations: false,
requiresApiKey: true,
supportedOptions: ['maxResults', 'year', 'author', 'journal']
};
}
async readPaper(paperId, options = {}) {
const papers = await this.search(paperId, { maxResults: 1 });
if (papers.length === 0) {
throw new Error('Paper not found');
}
return papers[0].abstract || 'Abstract not available';
}
}
//# sourceMappingURL=WileySearcher.js.map