UNPKG

paper-search-mcp-nodejs

Version:

A Node.js MCP server for searching and downloading academic papers from multiple sources, including arXiv, PubMed, bioRxiv, Web of Science, and more.

340 lines 13.9 kB
/** * Scopus (Elsevier) Searcher * * Documentation: https://dev.elsevier.com/documentation/SCOPUSSearchAPI.wadl * API Endpoints: * - Search API: https://api.elsevier.com/content/search/scopus * - Abstract API: https://api.elsevier.com/content/abstract/scopus_id/ * * Required API Key: Yes (X-ELS-APIKey header or apikey parameter) * Get API key from: https://dev.elsevier.com/apikey/manage * * Scopus is the largest abstract and citation database of peer-reviewed literature */ import axios from 'axios'; import { PaperSource } from './PaperSource.js'; import { PaperFactory } from '../models/Paper.js'; import { RateLimiter } from '../utils/RateLimiter.js'; import { TIMEOUTS, USER_AGENT } from '../config/constants.js'; import { logDebug } from '../utils/Logger.js'; export class ScopusSearcher extends PaperSource { client; rateLimiter; searchApiKey; elsevierApiKey; constructor(apiKey, searchApiKey) { super('scopus', 'https://api.elsevier.com', apiKey); // Support two API keys: one for search, one for other operations this.elsevierApiKey = apiKey || process.env.ELSEVIER_API_KEY; this.searchApiKey = searchApiKey || process.env.SCOPUS_SEARCH_API_KEY || this.elsevierApiKey; this.client = axios.create({ baseURL: 'https://api.elsevier.com', timeout: TIMEOUTS.DEFAULT, headers: { 'Accept': 'application/json', 'User-Agent': USER_AGENT, ...(this.searchApiKey ? { 'X-ELS-APIKey': this.searchApiKey } : {}) } }); // Scopus rate limits (same as Elsevier): // - Without key: 20 requests per minute // - With key: 10 requests per second (600 per minute) const requestsPerSecond = this.searchApiKey ? 10 : 0.33; this.rateLimiter = new RateLimiter({ requestsPerSecond, burstCapacity: this.searchApiKey ? 20 : 5 }); } async search(query, options = {}) { const customOptions = options; if (!this.apiKey) { throw new Error('Scopus API key is required'); } const maxResults = Math.min(options.maxResults || 10, 25); // Scopus max is 25 per request const papers = []; try { // Build Scopus search query let searchQuery = `TITLE-ABS-KEY(${query})`; if (options.author) { searchQuery += ` AND AUTHOR(${options.author})`; } if (options.journal) { searchQuery += ` AND SRCTITLE(${options.journal})`; } if (customOptions.affiliation) { searchQuery += ` AND AFFIL(${customOptions.affiliation})`; } if (customOptions.subject) { searchQuery += ` AND SUBJAREA(${customOptions.subject})`; } if (options.year) { if (options.year.includes('-')) { const [startYear, endYear] = options.year.split('-'); searchQuery += ` AND PUBYEAR > ${parseInt(startYear) - 1}`; if (endYear) { searchQuery += ` AND PUBYEAR < ${parseInt(endYear) + 1}`; } } else { searchQuery += ` AND PUBYEAR = ${options.year}`; } } if (customOptions.openAccess) { searchQuery += ' AND OPENACCESS(1)'; } if (customOptions.documentType) { const docTypeMap = { 'ar': 'Article', 'cp': 'Conference Paper', 're': 'Review', 'bk': 'Book', 'ch': 'Book Chapter' }; searchQuery += ` AND DOCTYPE(${docTypeMap[customOptions.documentType]})`; } await this.rateLimiter.waitForPermission(); const response = await this.client.get('/content/search/scopus', { params: { query: searchQuery, count: maxResults, start: 0, view: 'COMPLETE', field: 'dc:identifier,dc:title,dc:creator,prism:publicationName,prism:coverDate,prism:doi,prism:url,prism:volume,prism:issueIdentifier,prism:pageRange,citedby-count,dc:description,authkeywords,author,affiliation,openaccess,eid' } }); const entries = response.data['search-results']?.entry || []; for (const entry of entries) { const paper = await this.parseEntry(entry); if (paper) { papers.push(paper); } } return papers; } catch (error) { this.handleHttpError(error, 'search'); } } async parseEntry(entry) { try { // Extract authors let authors = ''; if (entry.author && entry.author.length > 0) { authors = entry.author.map(a => a.authname).join(', '); } else if (entry['dc:creator']) { authors = entry['dc:creator']; } // Extract affiliations let affiliations = []; if (entry.affiliation) { affiliations = entry.affiliation.map(a => a.affilname); } // Build paper URL const paperUrl = entry['prism:url'] || (entry['prism:doi'] ? `https://doi.org/${entry['prism:doi']}` : undefined); // Extract keywords const keywords = entry.authkeywords?.split(' | ') || []; return PaperFactory.create({ paperId: entry.eid || entry['dc:identifier'] || '', title: entry['dc:title'] || '', authors: authors ? authors.split(', ') : [], abstract: '', // Abstract not included in search results, need separate API call doi: entry['prism:doi'], publishedDate: entry['prism:coverDate'] ? new Date(entry['prism:coverDate']) : null, url: paperUrl, source: 'scopus', journal: entry['prism:publicationName'], volume: entry['prism:volume'], issue: entry['prism:issueIdentifier'], pages: entry['prism:pageRange'], citationCount: entry['citedby-count'] ? parseInt(entry['citedby-count']) : undefined, keywords: keywords, extra: { scopusId: entry['dc:identifier'], eid: entry.eid, affiliations: affiliations, documentType: entry.subtypeDescription, issn: entry['prism:issn'], eIssn: entry['prism:eIssn'], openAccess: entry.openaccess === '1' || entry.openaccessFlag === true } }); } catch (error) { logDebug('Error parsing Scopus entry:', error); return null; } } async getAbstract(scopusId) { if (!this.apiKey) { throw new Error('Scopus API key is required'); } try { await this.rateLimiter.waitForPermission(); const response = await this.client.get(`/content/abstract/scopus_id/${scopusId}`, { params: { view: 'FULL' } }); const coredata = response.data['abstracts-retrieval-response']?.coredata; if (!coredata) return null; // Extract authors from detailed response let authors = ''; const authorsData = response.data['abstracts-retrieval-response']?.authors; if (authorsData && authorsData.author) { authors = authorsData.author .map(a => `${a['preferred-name']['ce:given-name']} ${a['preferred-name']['ce:surname']}`) .join(', '); } else if (coredata['dc:creator']) { authors = coredata['dc:creator'].map((c) => c.$).join(', '); } // Extract subjects/keywords let keywords = []; const subjectData = response.data['abstracts-retrieval-response']?.subject; if (subjectData && subjectData.subject) { keywords = subjectData.subject.map(s => s.$); } return PaperFactory.create({ paperId: scopusId, title: coredata['dc:title'] || '', authors: authors ? authors.split(', ') : [], abstract: coredata['dc:description'] || '', doi: coredata['prism:doi'], publishedDate: coredata['prism:coverDate'] ? new Date(coredata['prism:coverDate']) : null, url: coredata['prism:doi'] ? `https://doi.org/${coredata['prism:doi']}` : undefined, source: 'scopus', journal: coredata['prism:publicationName'], volume: coredata['prism:volume'], issue: coredata['prism:issueIdentifier'], pages: coredata['prism:pageRange'], citationCount: coredata['citedby-count'] ? parseInt(coredata['citedby-count']) : undefined, keywords: keywords, extra: { scopusId: coredata['dc:identifier'], eid: coredata.eid, pubmedId: coredata['pubmed-id'], issn: coredata['prism:issn'] } }); } catch (error) { logDebug('Scopus abstract retrieval error:', error.message); return null; } } getCapabilities() { return { search: true, download: false, fullText: false, citations: true, requiresApiKey: true, supportedOptions: ['maxResults', 'year', 'author', 'journal'] }; } async downloadPdf(paperId, options = {}) { throw new Error('PDF download requires institutional access for Scopus'); } async readPaper(paperId, options = {}) { const paper = await this.getAbstract(paperId); if (!paper) { throw new Error('Paper not found'); } return paper.abstract || 'Abstract not available'; } /** * 获取参考文献的Scopus ID列表 */ async getReferenceIds(scopusId) { if (!this.elsevierApiKey) return []; try { await this.rateLimiter.waitForPermission(); const response = await axios.get(`https://api.elsevier.com/content/abstract/scopus_id/${scopusId}`, { params: { view: 'REF' }, headers: { 'Accept': 'application/json', 'X-ELS-APIKey': this.elsevierApiKey } }); const refIds = []; const coreData = response.data?.['abstracts-retrieval-response']?.item?.bibrecord; const tail = coreData?.tail; const bibliography = tail?.bibliography; const references = bibliography?.reference || []; for (const ref of references) { const refInfo = ref?.['ref-info']; const refScopusId = refInfo?.['refd-itemidlist']?.itemid?.['#text']; if (refScopusId) { refIds.push(refScopusId); } } return refIds; } catch (error) { logDebug(`Error getting reference IDs for Scopus ID ${scopusId}:`, error); return []; } } /** * 获取引用文献的Scopus ID列表 */ async getCitationIds(scopusId) { if (!this.elsevierApiKey) return []; try { await this.rateLimiter.waitForPermission(); const response = await axios.get('https://api.elsevier.com/content/abstract/citations', { params: { scopus_id: scopusId }, headers: { 'Accept': 'application/json', 'X-ELS-APIKey': this.elsevierApiKey } }); const citIds = []; const citationData = response.data?.['abstract-citations-response']; const citeInfoMatrix = citationData?.citeInfoMatrix; const citeInfo = citeInfoMatrix?.citeInfo || []; for (const cite of citeInfo) { const citeScopusId = cite?.['scopus-id']; if (citeScopusId) { citIds.push(citeScopusId); } } return citIds; } catch (error) { logDebug(`Error getting citation IDs for Scopus ID ${scopusId}:`, error); return []; } } /** * 获取论文详情(包含references和citations ID列表) */ async getPaperWithCitations(paperId) { try { const paper = await this.getAbstract(paperId); if (!paper) return null; const scopusId = paper.extra?.scopusId?.replace('SCOPUS_ID:', '') || paperId; const [refIds, citIds] = await Promise.all([ this.getReferenceIds(scopusId), this.getCitationIds(scopusId) ]); paper.references = refIds; paper.extra = { ...paper.extra, citationIds: citIds }; return paper; } catch (error) { logDebug('Error getting paper with citations:', error); return null; } } } //# sourceMappingURL=ScopusSearcher.js.map