UNPKG

aiwg

Version:

Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo

243 lines 8.38 kB
/** * Discovery service for unified research paper search * * @module research/services/discovery */ import { SemanticScholarClient } from '../clients/semantic-scholar.js'; import { CrossRefClient } from '../clients/crossref.js'; import { ArxivClient } from '../clients/arxiv.js'; import { CacheManager } from '../cache/manager.js'; /** * Discovery service for unified search across API clients */ export class DiscoveryService { semanticScholar; crossref; arxiv; cache; constructor(config = {}) { this.semanticScholar = config.semanticScholar || new SemanticScholarClient(); this.crossref = config.crossref || new CrossRefClient(); this.arxiv = config.arxiv || new ArxivClient(); this.cache = config.cache || new CacheManager(); } /** * Search across all configured API clients */ async search(query, options = {}) { const { limit = 10, offset = 0, useCache = true, minYear, maxYear, relevanceThreshold = 0.0, deduplicate = true, } = options; // Check cache if (useCache) { const cacheKey = this.cache.generateKey('discovery:search', { query, limit, offset, minYear, maxYear, }); const cached = await this.cache.get(cacheKey); if (cached) { return cached; } } // Search all APIs in parallel const [ssResults, crResults, arxivResults] = await Promise.allSettled([ this.searchSemanticScholar(query, limit, offset), this.searchCrossRef(query, limit, offset), this.searchArxiv(query, limit, offset), ]); // Collect successful results const allPapers = []; if (ssResults.status === 'fulfilled') { allPapers.push(...ssResults.value); } if (crResults.status === 'fulfilled') { allPapers.push(...crResults.value); } if (arxivResults.status === 'fulfilled') { allPapers.push(...arxivResults.value); } // Filter by year range let filtered = allPapers; if (minYear !== undefined) { filtered = filtered.filter((p) => p.year >= minYear); } if (maxYear !== undefined) { filtered = filtered.filter((p) => p.year <= maxYear); } // Deduplicate by DOI/arXiv ID/title if (deduplicate) { filtered = this.deduplicatePapers(filtered); } // Rank by relevance const ranked = this.rankByRelevance(filtered, query); // Filter by relevance threshold const thresholded = ranked.filter((p) => this.calculateRelevance(p, query) >= relevanceThreshold); // Apply limit const results = thresholded.slice(0, limit); // Cache results if (useCache) { const cacheKey = this.cache.generateKey('discovery:search', { query, limit, offset, minYear, maxYear, }); await this.cache.set(cacheKey, results, 'semantic-scholar'); } return results; } /** * Analyze gaps in research corpus */ async analyzeGaps(_corpusRefIds) { // For now, return a basic report structure // In a full implementation, this would analyze corpus metadata return { underrepresentedTopics: [], yearGaps: [], sourceTypeDistribution: {}, recommendations: [ 'Corpus gap analysis requires corpus metadata', 'Consider adding more recent publications', 'Balance source types (journal vs conference vs preprint)', ], }; } /** * Follow citation network starting from a paper */ async followCitations(paperId, depth = 1) { const results = []; const visited = new Set(); await this.followCitationsRecursive(paperId, depth, visited, results); return results; } /** * Search Semantic Scholar */ async searchSemanticScholar(query, limit, offset) { try { const result = await this.semanticScholar.search(query, { limit, offset, }); return result.papers; } catch (error) { console.warn('Semantic Scholar search failed:', error); return []; } } /** * Search CrossRef */ async searchCrossRef(query, limit, offset) { try { const result = await this.crossref.search(query, { limit, offset }); return result.papers; } catch (error) { console.warn('CrossRef search failed:', error); return []; } } /** * Search arXiv */ async searchArxiv(query, limit, offset) { try { const result = await this.arxiv.search(query, { limit, offset }); return result.papers; } catch (error) { console.warn('arXiv search failed:', error); return []; } } /** * Deduplicate papers by DOI, arXiv ID, or title similarity */ deduplicatePapers(papers) { const seen = new Map(); for (const paper of papers) { // Use DOI as primary key if (paper.doi) { const key = `doi:${paper.doi.toLowerCase()}`; if (!seen.has(key)) { seen.set(key, paper); } continue; } // Use arXiv ID as secondary key if (paper.arxivId) { const key = `arxiv:${paper.arxivId.toLowerCase()}`; if (!seen.has(key)) { seen.set(key, paper); } continue; } // Use normalized title as fallback const normalizedTitle = paper.title .toLowerCase() .replace(/[^\w\s]/g, '') .replace(/\s+/g, ' ') .trim(); const key = `title:${normalizedTitle}`; if (!seen.has(key)) { seen.set(key, paper); } } return Array.from(seen.values()); } /** * Rank papers by relevance to query */ rankByRelevance(papers, query) { const scored = papers.map((paper) => ({ paper, relevance: this.calculateRelevance(paper, query), })); scored.sort((a, b) => b.relevance - a.relevance); return scored.map((s) => s.paper); } /** * Calculate relevance score (0-1) for a paper */ calculateRelevance(paper, query) { const queryLower = query.toLowerCase(); const queryTerms = queryLower.split(/\s+/); let score = 0; // Title match (weight: 0.5) const titleLower = paper.title.toLowerCase(); const titleMatches = queryTerms.filter((term) => titleLower.includes(term)).length; score += (titleMatches / queryTerms.length) * 0.5; // Abstract match (weight: 0.3) if (paper.abstract) { const abstractLower = paper.abstract.toLowerCase(); const abstractMatches = queryTerms.filter((term) => abstractLower.includes(term)).length; score += (abstractMatches / queryTerms.length) * 0.3; } // Citation count bonus (weight: 0.2) if (paper.citationCount) { // Normalize citation count (log scale) const citationScore = Math.log10(paper.citationCount + 1) / 3; // Max ~3 for 1000 citations score += Math.min(citationScore, 1.0) * 0.2; } return Math.min(score, 1.0); } /** * Recursively follow citations */ async followCitationsRecursive(paperId, remainingDepth, visited, _results) { if (remainingDepth === 0 || visited.has(paperId)) { return; } visited.add(paperId); // For now, just stub - full implementation would query Semantic Scholar // citation graph API endpoint // This would require additional API calls to get references/citations } } //# sourceMappingURL=discovery.js.map