UNPKG

@access-mcp/allocations

Version:

MCP server for ACCESS-CI Allocations and Research Projects API

1,090 lines 103 kB
import { BaseAccessServer, handleApiError } from "@access-mcp/shared"; export class AllocationsServer extends BaseAccessServer { projectCache = new Map(); cacheTimestamps = new Map(); CACHE_TTL = 5 * 60 * 1000; // 5 minutes constructor() { super("access-allocations", "0.3.0", "https://allocations.access-ci.org"); // Set up periodic cache cleanup - TEMPORARILY DISABLED FOR DEBUGGING // setInterval(() => { // this.cleanupExpiredCache(); // }, 10 * 60 * 1000); // Clean up every 10 minutes } getTools() { return [ { name: "search_projects", description: "Advanced search for ACCESS-CI research projects with operators, filters, and sorting", inputSchema: { type: "object", properties: { query: { type: "string", description: "Search query supporting operators: 'term1 AND term2', 'term1 OR term2', 'term1 NOT term2', exact phrases with quotes", }, field_of_science: { type: "string", description: "Filter by field of science (e.g., 'Computer Science', 'Physics')", }, allocation_type: { type: "string", description: "Filter by allocation type (e.g., 'Discover', 'Explore', 'Accelerate')", }, date_range: { type: "object", description: "Filter by project date range", properties: { start_date: { type: "string", description: "Start date in YYYY-MM-DD format" }, end_date: { type: "string", description: "End date in YYYY-MM-DD format" } } }, min_allocation: { type: "number", description: "Minimum allocation amount filter" }, sort_by: { type: "string", description: "Sort results by: 'relevance', 'date_desc', 'date_asc', 'allocation_desc', 'allocation_asc', 'pi_name'", enum: ["relevance", "date_desc", "date_asc", "allocation_desc", "allocation_asc", "pi_name"], default: "relevance" }, limit: { type: "number", description: "Maximum number of results to return (default: 20, max: 100)", default: 20, }, }, required: ["query"], }, }, { name: "get_project_details", description: "Get detailed information about a specific research project", inputSchema: { type: "object", properties: { project_id: { type: "number", description: "The project ID number", }, }, required: ["project_id"], }, }, { name: "list_projects_by_field", description: "List projects by field of science", inputSchema: { type: "object", properties: { field_of_science: { type: "string", description: "Field of science (e.g., 'Computer Science', 'Physics', 'Chemistry')", }, limit: { type: "number", description: "Maximum number of results to return (default: 20)", default: 20, }, }, required: ["field_of_science"], }, }, { name: "list_projects_by_resource", description: "Find projects using specific computational resources", inputSchema: { type: "object", properties: { resource_name: { type: "string", description: "Resource name (e.g., 'NCSA Delta GPU', 'Purdue Anvil', 'ACCESS Credits')", }, limit: { type: "number", description: "Maximum number of results to return (default: 20)", default: 20, }, }, required: ["resource_name"], }, }, { name: "get_allocation_statistics", description: "Get statistics about resource allocations and research trends", inputSchema: { type: "object", properties: { pages_to_analyze: { type: "number", description: "Number of pages to analyze for statistics (default: 5, max: 20)", default: 5, }, }, required: [], }, }, { name: "find_similar_projects", description: "Find projects with similar research focus using advanced semantic matching", inputSchema: { type: "object", properties: { project_id: { type: "number", description: "Reference project ID to find similar projects", }, keywords: { type: "string", description: "Keywords or research terms to find similar projects (alternative to project_id)", }, similarity_threshold: { type: "number", description: "Minimum similarity score as decimal (0.0-1.0). Convert percentages: 80% = 0.8, 70% = 0.7, 50% = 0.5. Default: 0.3", default: 0.3, minimum: 0.0, maximum: 1.0 }, include_same_field: { type: "boolean", description: "Whether to prioritize projects in the same field of science (default: true)", default: true }, show_similarity_scores: { type: "boolean", description: "Whether to display similarity scores in results (default: true)", default: true }, limit: { type: "number", description: "Maximum number of similar projects to return (default: 10, max: 50)", default: 10, }, }, required: [], }, }, { name: "analyze_project_funding", description: "Analyze ACCESS project funding by cross-referencing with NSF awards data", inputSchema: { type: "object", properties: { project_id: { type: "number", description: "ACCESS project ID to analyze for NSF funding connections", }, }, required: ["project_id"], }, }, { name: "find_funded_projects", description: "Find ACCESS projects that have corresponding NSF funding", inputSchema: { type: "object", properties: { pi_name: { type: "string", description: "Principal investigator name to search for funded projects", }, institution_name: { type: "string", description: "Institution name to search for funded projects", }, field_of_science: { type: "string", description: "Field of science to filter results", }, limit: { type: "number", description: "Maximum number of results to return (default: 10)", default: 10, }, }, required: [], }, }, { name: "institutional_funding_profile", description: "Generate comprehensive funding profile for an institution combining ACCESS allocations and NSF awards", inputSchema: { type: "object", properties: { institution_name: { type: "string", description: "Institution name to analyze", }, limit: { type: "number", description: "Maximum number of projects to analyze per source (default: 20)", default: 20, }, }, required: ["institution_name"], }, } ]; } getResources() { return [ { uri: "accessci://allocations", name: "ACCESS-CI Research Projects and Allocations", description: "Current research projects, allocations, and resource utilization data", mimeType: "application/json", }, ]; } async handleToolCall(request) { const { name, arguments: args } = request.params; try { switch (name) { case "search_projects": return await this.searchProjects(args.query, args.field_of_science, args.allocation_type, args.limit, args.date_range, args.min_allocation, args.sort_by); case "get_project_details": return await this.getProjectDetails(args.project_id); case "list_projects_by_field": return await this.listProjectsByField(args.field_of_science, args.limit); case "list_projects_by_resource": return await this.listProjectsByResource(args.resource_name, args.limit); case "get_allocation_statistics": return await this.getAllocationStatistics(args.pages_to_analyze || 5); case "find_similar_projects": return await this.findSimilarProjects(args.project_id, args.keywords, args.limit, args.similarity_threshold, args.include_same_field, args.show_similarity_scores); case "analyze_project_funding": return await this.analyzeProjectFunding(args.project_id); case "find_funded_projects": return await this.findFundedProjects(args.pi_name, args.institution_name, args.field_of_science, args.limit); case "institutional_funding_profile": return await this.institutionalFundingProfile(args.institution_name, args.limit); default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { return { content: [ { type: "text", text: `Error: ${handleApiError(error)}`, }, ], }; } } async handleResourceRead(request) { const { uri } = request.params; if (uri === "accessci://allocations") { try { const data = await this.fetchProjects(1); return { contents: [ { uri, mimeType: "application/json", text: JSON.stringify(data, null, 2), }, ], }; } catch (error) { throw new Error(`Failed to fetch allocations data: ${handleApiError(error)}`); } } throw new Error(`Unknown resource: ${uri}`); } // Core API methods async fetchProjects(page = 1) { // Check cache first const cachedData = this.getCachedProjects(page); if (cachedData) { return cachedData; } const url = `${this.baseURL}/current-projects.json?page=${page}`; try { const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json(); // Cache the result this.cacheProjects(page, data); return data; } catch (error) { throw new Error(`Failed to fetch projects: ${error instanceof Error ? error.message : String(error)}`); } } getCachedProjects(page) { const cached = this.projectCache.get(page); const timestamp = this.cacheTimestamps.get(page); if (cached && timestamp && (Date.now() - timestamp) < this.CACHE_TTL) { return cached; } // Clean up expired cache if (timestamp && (Date.now() - timestamp) >= this.CACHE_TTL) { this.projectCache.delete(page); this.cacheTimestamps.delete(page); } return null; } cacheProjects(page, data) { this.projectCache.set(page, data); this.cacheTimestamps.set(page, Date.now()); } async fetchMultiplePages(pages, maxConcurrent = 5) { const results = []; // Process pages in batches to avoid overwhelming the server for (let i = 0; i < pages.length; i += maxConcurrent) { const batch = pages.slice(i, i + maxConcurrent); const promises = batch.map(page => this.fetchProjects(page)); try { const batchResults = await Promise.all(promises); batchResults.forEach(data => { results.push(...data.projects); }); } catch (error) { // Log error but continue with other batches console.warn(`Error fetching batch starting at page ${batch[0]}:`, error); } } return results; } async searchProjects(query, fieldOfScience, allocationType, limit = 20, dateRange, minAllocation, sortBy = 'relevance') { // Input validation if (!query || query.trim().length === 0) { throw new Error("Search query cannot be empty"); } if (limit > 100) limit = 100; // Cap at 100 // Parse advanced search query const searchTerms = this.parseAdvancedQuery(query); // Use parallel fetching for better performance const maxPages = Math.min(15, limit > 50 ? 20 : 15); const pagesToFetch = Array.from({ length: maxPages }, (_, i) => i + 1); // Fetch first page to get total pages available const firstPageData = await this.fetchProjects(1); const totalPages = Math.min(firstPageData.pages, maxPages); const actualPages = Array.from({ length: totalPages }, (_, i) => i + 1); // Fetch all pages in parallel const allProjects = await this.fetchMultiplePages(actualPages); // Apply filters let filteredProjects = allProjects.filter(project => { // Date range filter if (dateRange) { const projectStart = new Date(project.beginDate); const projectEnd = new Date(project.endDate); if (dateRange.start_date) { const filterStart = new Date(dateRange.start_date); if (projectEnd < filterStart) return false; } if (dateRange.end_date) { const filterEnd = new Date(dateRange.end_date); if (projectStart > filterEnd) return false; } } // Minimum allocation filter if (minAllocation) { const totalAllocation = project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0); if (totalAllocation < minAllocation) return false; } return true; }); // Score and filter projects based on search terms const scoredResults = filteredProjects .map(project => ({ project, score: this.calculateAdvancedSearchScore(project, searchTerms, fieldOfScience, allocationType) })) .filter(item => item.score > 0); // Apply sorting const sortedResults = this.applySorting(scoredResults, sortBy).slice(0, limit); // Format results with enhanced metadata const searchSummary = this.buildSearchSummary(query, fieldOfScience, allocationType, dateRange, minAllocation, sortBy, scoredResults.length, sortedResults.length); return { content: [ { type: "text", text: this.formatAdvancedSearchResults(sortedResults, searchSummary), }, ], }; } // Parse advanced search query with operators parseAdvancedQuery(query) { const result = { andTerms: [], orTerms: [], notTerms: [], exactPhrases: [], regularTerms: [] }; // Extract exact phrases first (quoted strings) const phraseRegex = /"([^"]*)"/g; let match; let queryWithoutPhrases = query; while ((match = phraseRegex.exec(query)) !== null) { result.exactPhrases.push(match[1]); queryWithoutPhrases = queryWithoutPhrases.replace(match[0], ''); } // Parse remaining query for operators const tokens = queryWithoutPhrases.split(/\s+/).filter(token => token.length > 0); let i = 0; while (i < tokens.length) { const token = tokens[i]; if (token.toUpperCase() === 'AND' && i + 1 < tokens.length) { result.andTerms.push(tokens[i + 1]); i += 2; } else if (token.toUpperCase() === 'OR' && i + 1 < tokens.length) { result.orTerms.push(tokens[i + 1]); i += 2; } else if (token.toUpperCase() === 'NOT' && i + 1 < tokens.length) { result.notTerms.push(tokens[i + 1]); i += 2; } else if (!['AND', 'OR', 'NOT'].includes(token.toUpperCase())) { result.regularTerms.push(token); i++; } else { i++; } } return result; } // Enhanced search scoring with advanced query support calculateAdvancedSearchScore(project, searchTerms, fieldOfScience, allocationType) { let score = 0; // Field of science filter (required match) if (fieldOfScience && !project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) { return 0; } // Allocation type filter (required match) if (allocationType && !project.allocationType.toLowerCase().includes(allocationType.toLowerCase())) { return 0; } const projectText = (project.abstract + ' ' + project.requestTitle + ' ' + project.pi).toLowerCase(); const titleText = project.requestTitle.toLowerCase(); // Handle NOT terms first (exclusions) for (const notTerm of searchTerms.notTerms) { if (projectText.includes(notTerm.toLowerCase())) { return 0; // Exclude if any NOT term is found } } // Exact phrases (highest weight) for (const phrase of searchTerms.exactPhrases) { if (projectText.includes(phrase.toLowerCase())) { score += titleText.includes(phrase.toLowerCase()) ? 5 : 3; } } // AND terms (all must be present) if (searchTerms.andTerms.length > 0) { const andMatches = searchTerms.andTerms.filter(term => projectText.includes(term.toLowerCase())); if (andMatches.length === searchTerms.andTerms.length) { score += 2 * andMatches.length; } else { return 0; // All AND terms must match } } // OR terms (any can be present) if (searchTerms.orTerms.length > 0) { const orMatches = searchTerms.orTerms.filter(term => projectText.includes(term.toLowerCase())); score += orMatches.length * 1.5; } // Regular terms for (const term of searchTerms.regularTerms) { if (!this.isStopWord(term) && term.length > 2) { const termLower = term.toLowerCase(); if (titleText.includes(termLower)) score += 3; else if (project.pi.toLowerCase().includes(termLower)) score += 2; else if (project.fos.toLowerCase().includes(termLower)) score += 1.5; else if (project.abstract.toLowerCase().includes(termLower)) score += 1; else if (project.piInstitution.toLowerCase().includes(termLower)) score += 0.5; } } return Math.min(score, 20); // Cap at reasonable maximum } // Apply sorting to search results applySorting(scoredResults, sortBy) { switch (sortBy) { case 'date_desc': return scoredResults.sort((a, b) => new Date(b.project.beginDate).getTime() - new Date(a.project.beginDate).getTime()); case 'date_asc': return scoredResults.sort((a, b) => new Date(a.project.beginDate).getTime() - new Date(b.project.beginDate).getTime()); case 'allocation_desc': return scoredResults.sort((a, b) => { const aTotal = a.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0); const bTotal = b.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0); return bTotal - aTotal; }); case 'allocation_asc': return scoredResults.sort((a, b) => { const aTotal = a.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0); const bTotal = b.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0); return aTotal - bTotal; }); case 'pi_name': return scoredResults.sort((a, b) => a.project.pi.localeCompare(b.project.pi)); case 'relevance': default: return scoredResults.sort((a, b) => b.score - a.score); } } // Build search summary with metadata buildSearchSummary(query, fieldOfScience, allocationType, dateRange, minAllocation, sortBy, totalMatches, returnedResults) { let summary = `**Advanced Search Results**\n`; summary += `• **Query:** ${query}\n`; if (fieldOfScience) summary += `• **Field:** ${fieldOfScience}\n`; if (allocationType) summary += `• **Allocation Type:** ${allocationType}\n`; if (dateRange?.start_date || dateRange?.end_date) { summary += `• **Date Range:** ${dateRange.start_date || 'any'} to ${dateRange.end_date || 'any'}\n`; } if (minAllocation) summary += `• **Min Allocation:** ${minAllocation.toLocaleString()}\n`; if (sortBy && sortBy !== 'relevance') summary += `• **Sorted By:** ${sortBy.replace('_', ' ')}\n`; summary += `• **Results:** ${returnedResults} of ${totalMatches} matches\n`; return summary; } // Enhanced formatting for advanced search results formatAdvancedSearchResults(scoredResults, searchSummary) { if (scoredResults.length === 0) { return `${searchSummary}\n\nNo projects found matching the search criteria.\n\n**Search Tips:**\n• Try broader terms or different operators\n• Use quotes for exact phrases: "machine learning"\n• Use AND/OR/NOT operators: "AI AND physics"\n• Check spelling and try synonyms`; } let result = `${searchSummary}\n\n`; scoredResults.forEach(({ project, score }, index) => { result += `**${index + 1}. ${project.requestTitle}** `; if (score > 0) result += `(relevance: ${score.toFixed(1)})\n`; else result += `\n`; result += `• **PI:** ${project.pi} (${project.piInstitution})\n`; result += `• **Field:** ${project.fos}\n`; result += `• **Type:** ${project.allocationType}\n`; result += `• **Period:** ${project.beginDate} to ${project.endDate}\n`; result += `• **Project ID:** ${project.projectId}\n`; if (project.resources.length > 0) { const resourceSummaries = project.resources.map(r => { const allocation = this.formatAllocation(r.allocation || 0, r.units, r.resourceName); return allocation ? `${r.resourceName} (${allocation})` : r.resourceName; }); result += `• **Resources:** ${resourceSummaries.join(', ')}\n`; } // Show first 150 characters of abstract const abstractPreview = project.abstract.length > 150 ? project.abstract.substring(0, 150) + '...' : project.abstract; result += `• **Abstract:** ${abstractPreview}\n\n`; }); return result; } calculateSearchScore(project, query, fieldOfScience, allocationType) { let score = 0; const queryLower = query.toLowerCase(); // Field of science filter (required match) if (fieldOfScience && !project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) { return 0; } // Allocation type filter (required match) if (allocationType && !project.allocationType.toLowerCase().includes(allocationType.toLowerCase())) { return 0; } // Basic query matching with scoring const titleMatch = project.requestTitle.toLowerCase().includes(queryLower); const abstractMatch = project.abstract.toLowerCase().includes(queryLower); const piMatch = project.pi.toLowerCase().includes(queryLower); const institutionMatch = project.piInstitution.toLowerCase().includes(queryLower); const fosMatch = project.fos.toLowerCase().includes(queryLower); // Weighted scoring if (titleMatch) score += 3; if (piMatch) score += 2; if (fosMatch) score += 1.5; if (institutionMatch) score += 1; if (abstractMatch) score += 0.5; // Exact matches get bonus points if (project.requestTitle.toLowerCase() === queryLower) score += 5; if (project.pi.toLowerCase() === queryLower) score += 3; return score; } projectMatchesQuery(project, query) { const searchTerms = query.toLowerCase(); return (project.requestTitle.toLowerCase().includes(searchTerms) || project.abstract.toLowerCase().includes(searchTerms) || project.pi.toLowerCase().includes(searchTerms) || project.piInstitution.toLowerCase().includes(searchTerms) || project.fos.toLowerCase().includes(searchTerms)); } async getProjectDetails(projectId) { // Input validation if (!projectId || typeof projectId !== 'number' || projectId <= 0) { throw new Error("Project ID must be a positive number"); } // Search through pages to find the specific project let currentPage = 1; const maxPages = 20; while (currentPage <= maxPages) { const data = await this.fetchProjects(currentPage); const project = data.projects.find(p => p.projectId === projectId); if (project) { return { content: [ { type: "text", text: this.formatSingleProject(project), }, ], }; } currentPage++; if (currentPage > data.pages) break; } return { content: [ { type: "text", text: `Project with ID ${projectId} not found in current allocations.`, }, ], }; } async listProjectsByField(fieldOfScience, limit = 20) { // Input validation if (!fieldOfScience || typeof fieldOfScience !== 'string' || fieldOfScience.trim().length === 0) { throw new Error("Field of science must be a non-empty string"); } if (limit < 1 || limit > 200) { throw new Error("Limit must be between 1 and 200"); } const results = []; let currentPage = 1; const maxPages = 10; while (results.length < limit && currentPage <= maxPages) { const data = await this.fetchProjects(currentPage); for (const project of data.projects) { if (results.length >= limit) break; if (project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) { results.push(project); } } currentPage++; if (currentPage > data.pages) break; } return { content: [ { type: "text", text: this.formatProjectResults(results, `Projects in ${fieldOfScience}`), }, ], }; } async listProjectsByResource(resourceName, limit = 20) { // Input validation if (!resourceName || typeof resourceName !== 'string' || resourceName.trim().length === 0) { throw new Error("Resource name must be a non-empty string"); } if (limit < 1 || limit > 200) { throw new Error("Limit must be between 1 and 200"); } const results = []; let currentPage = 1; const maxPages = 10; while (results.length < limit && currentPage <= maxPages) { const data = await this.fetchProjects(currentPage); for (const project of data.projects) { if (results.length >= limit) break; const hasResource = project.resources.some(resource => resource.resourceName.toLowerCase().includes(resourceName.toLowerCase())); if (hasResource) { results.push(project); } } currentPage++; if (currentPage > data.pages) break; } return { content: [ { type: "text", text: this.formatProjectResults(results, `Projects using ${resourceName}`), }, ], }; } async getAllocationStatistics(pagesToAnalyze = 5) { // Input validation if (pagesToAnalyze < 1 || pagesToAnalyze > 20) { throw new Error("Pages to analyze must be between 1 and 20"); } const projects = []; const fieldsMap = new Map(); const resourcesMap = new Map(); const institutionsMap = new Map(); const allocationTypesMap = new Map(); // Collect data from multiple pages using parallel fetching for better performance const pagesToFetch = Array.from({ length: Math.min(pagesToAnalyze, 20) }, (_, i) => i + 1); const allProjects = await this.fetchMultiplePages(pagesToFetch); projects.push(...allProjects); // Update statistics for (const project of allProjects) { fieldsMap.set(project.fos, (fieldsMap.get(project.fos) || 0) + 1); institutionsMap.set(project.piInstitution, (institutionsMap.get(project.piInstitution) || 0) + 1); allocationTypesMap.set(project.allocationType, (allocationTypesMap.get(project.allocationType) || 0) + 1); for (const resource of project.resources) { resourcesMap.set(resource.resourceName, (resourcesMap.get(resource.resourceName) || 0) + 1); } } // Format statistics const topFields = Array.from(fieldsMap.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10); const topResources = Array.from(resourcesMap.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10); const topInstitutions = Array.from(institutionsMap.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10); const allocationTypes = Array.from(allocationTypesMap.entries()) .sort((a, b) => b[1] - a[1]); let statsText = `📊 **ACCESS-CI Allocation Statistics**\n`; statsText += `*(Analysis of ${projects.length} projects from ${pagesToAnalyze} pages)*\n\n`; statsText += `**🔬 Top Fields of Science:**\n`; topFields.forEach(([field, count], i) => { statsText += `${i + 1}. ${field}: ${count} projects\n`; }); statsText += `\n**💻 Most Requested Resources:**\n`; topResources.forEach(([resource, count], i) => { statsText += `${i + 1}. ${resource}: ${count} projects\n`; }); statsText += `\n**🏛️ Top Institutions:**\n`; topInstitutions.forEach(([institution, count], i) => { statsText += `${i + 1}. ${institution}: ${count} projects\n`; }); statsText += `\n**📈 Allocation Types:**\n`; allocationTypes.forEach(([type, count]) => { statsText += `• ${type}: ${count} projects\n`; }); return { content: [ { type: "text", text: statsText, }, ], }; } async findSimilarProjects(projectId, keywords, limit = 10, similarityThreshold = 0.3, includeSameField = true, showSimilarityScores = true) { let referenceProject = null; let searchTerms = ""; let referenceField = ""; // Input validation if (limit > 50) limit = 50; if (similarityThreshold < 0) similarityThreshold = 0; if (similarityThreshold > 1) similarityThreshold = 1; // Get reference project if projectId provided if (projectId) { let currentPage = 1; const maxPages = 20; while (currentPage <= maxPages && !referenceProject) { const data = await this.fetchProjects(currentPage); referenceProject = data.projects.find(p => p.projectId === projectId) || null; currentPage++; if (currentPage > data.pages) break; } if (!referenceProject) { return { content: [ { type: "text", text: `Project with ID ${projectId} not found in current allocations database.`, }, ], }; } // Extract sophisticated search terms from reference project searchTerms = this.extractKeyTermsFromProject(referenceProject); referenceField = referenceProject.fos; } else if (keywords) { searchTerms = keywords; referenceField = ""; // No specific field for keyword searches } else { return { content: [ { type: "text", text: "Please provide either a project_id or keywords to find similar projects.", }, ], }; } // Fetch projects for similarity analysis const maxPages = 15; const actualPages = Array.from({ length: maxPages }, (_, i) => i + 1); const allProjects = await this.fetchMultiplePages(actualPages); // Calculate similarity scores for all projects const scoredResults = allProjects .filter(project => !referenceProject || project.projectId !== referenceProject.projectId) // Exclude reference project .map(project => ({ project, similarity: this.calculateAdvancedSimilarity(project, searchTerms, referenceField, includeSameField) })) .filter(item => item.similarity >= similarityThreshold) .sort((a, b) => b.similarity - a.similarity) .slice(0, limit); // Build comprehensive result const header = referenceProject ? `🔍 **Projects Similar to "${referenceProject.requestTitle}"**` : `🔍 **Projects Similar to Keywords: "${keywords}"**`; let result = `${header}\n\n`; // Reference project info if (referenceProject) { result += `**🎯 Reference Project:**\n`; result += `• **ID:** ${referenceProject.projectId}\n`; result += `• **PI:** ${referenceProject.pi} (${referenceProject.piInstitution})\n`; result += `• **Field:** ${referenceProject.fos}\n`; result += `• **Resources:** ${this.summarizeResources(referenceProject.resources)}\n\n`; } // Search parameters result += `**⚙️ Search Parameters:**\n`; result += `• **Similarity Threshold:** ${(similarityThreshold * 100).toFixed(0)}%\n`; result += `• **Field Priority:** ${includeSameField ? 'Same field preferred' : 'All fields equal'}\n`; result += `• **Results Found:** ${scoredResults.length}${scoredResults.length >= limit ? '+' : ''}\n`; if (referenceField) result += `• **Reference Field:** ${referenceField}\n`; result += `\n`; if (scoredResults.length === 0) { result += `**No similar projects found above ${(similarityThreshold * 100).toFixed(0)}% threshold.**\n\n`; result += `**💡 Try adjusting parameters:**\n`; result += `• Lower similarity threshold (e.g., 0.2 or 0.1)\n`; result += `• Broader keywords or different terms\n`; result += `• Disable field prioritization for cross-disciplinary search\n`; return { content: [ { type: "text", text: result, }, ], }; } // Group similar projects by similarity ranges const highSimilarity = scoredResults.filter(r => r.similarity >= 0.7); const mediumSimilarity = scoredResults.filter(r => r.similarity >= 0.4 && r.similarity < 0.7); const lowSimilarity = scoredResults.filter(r => r.similarity < 0.4); if (highSimilarity.length > 0) { result += `**🎯 High Similarity (70%+ match):**\n`; highSimilarity.forEach((item, index) => { result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores); }); result += `\n`; } if (mediumSimilarity.length > 0) { result += `**🔍 Moderate Similarity (40-70% match):**\n`; mediumSimilarity.forEach((item, index) => { result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores); }); result += `\n`; } if (lowSimilarity.length > 0 && showSimilarityScores) { result += `**📋 Lower Similarity (${(similarityThreshold * 100).toFixed(0)}-40% match):**\n`; lowSimilarity.forEach((item, index) => { result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores); }); result += `\n`; } // Analysis insights result += `**📊 Similarity Analysis:**\n`; const fieldMatches = scoredResults.filter(r => r.project.fos === referenceField).length; if (referenceField) { result += `• **Same Field Matches:** ${fieldMatches}/${scoredResults.length} (${Math.round(fieldMatches / scoredResults.length * 100)}%)\n`; } const avgSimilarity = scoredResults.reduce((sum, r) => sum + r.similarity, 0) / scoredResults.length; result += `• **Average Similarity:** ${(avgSimilarity * 100).toFixed(1)}%\n`; const institutionDiversity = new Set(scoredResults.map(r => r.project.piInstitution)).size; result += `• **Institution Diversity:** ${institutionDiversity} different institutions\n`; result += `• **Potential Collaborations:** High similarity indicates shared research interests\n`; return { content: [ { type: "text", text: result, }, ], }; } // Extract sophisticated key terms from a project extractKeyTermsFromProject(project) { // Combine title, abstract, and field for comprehensive term extraction const titleWords = project.requestTitle.toLowerCase().split(/\s+/); const abstractWords = project.abstract.toLowerCase().split(/\s+/); const fieldWords = project.fos.toLowerCase().split(/\s+/); // Weight terms: title (high), field (medium), abstract (medium) const termFrequency = new Map(); // Title terms get higher weight titleWords.forEach(word => { if (word.length > 3 && !this.isStopWord(word)) { termFrequency.set(word, (termFrequency.get(word) || 0) + 3); } }); // Field terms get medium-high weight fieldWords.forEach(word => { if (word.length > 3 && !this.isStopWord(word)) { termFrequency.set(word, (termFrequency.get(word) || 0) + 2); } }); // Abstract terms - focus on first 50 words (usually most relevant) abstractWords.slice(0, 50).forEach(word => { if (word.length > 3 && !this.isStopWord(word)) { termFrequency.set(word, (termFrequency.get(word) || 0) + 1); } }); // Return top weighted terms return Array.from(termFrequency.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([word]) => word) .join(' '); } // Advanced similarity calculation with multiple factors calculateAdvancedSimilarity(project, searchTerms, referenceField, includeSameField) { let similarity = 0; // Field of science similarity (high weight if same field) if (referenceField && includeSameField) { if (project.fos.toLowerCase() === referenceField.toLowerCase()) { similarity += 0.4; // 40% boost for same field } else if (project.fos.toLowerCase().includes(referenceField.toLowerCase()) || referenceField.toLowerCase().includes(project.fos.toLowerCase())) { similarity += 0.2; // 20% boost for related fields } } // Text similarity analysis const projectText = (project.requestTitle + ' ' + project.abstract + ' ' + project.fos).toLowerCase(); const searchWords = searchTerms.toLowerCase() .split(/\s+/) .filter(word => word.length > 3 && !this.isStopWord(word)); if (searchWords.length === 0) return similarity; // Term matching with position-based weighting const titleText = project.requestTitle.toLowerCase(); const abstractText = project.abstract.toLowerCase(); let titleMatches = 0; let abstractMatches = 0; let totalTerms = searchWords.length; searchWords.forEach(term => { if (titleText.includes(term)) { titleMatches++; similarity += 0.15; // Title matches are very valuable } else if (abstractText.includes(term)) { abstractMatches++; similarity += 0.05; // Abstract matches are good } }); // Bonus for multiple term clusters const termCoverage = (titleMatches + abstractMatches) / totalTerms; if (termCoverage > 0.5) { similarity += 0.1 * termCoverage; // Bonus for good term coverage } // Resource type similarity (same computational needs might indicate similar research) // This is more sophisticated than basic keyword matching const resourceSimilarity = this.calculateResourceSimilarity(project, searchTerms); similarity += resourceSimilarity * 0.1; // PI institution clustering (same institution might indicate similar research environment) // This is a weak signal but can be useful for collaboration discovery return Math.min(similarity, 1.0); // Cap at 1.0 } // Calculate resource-based similarity calculateResourceSimilarity(project, searchTerms) { // Check if resource needs align with search context const resourceTypes = project.resources.map(r => r.resourceName.toLowerCase()); const searchLower = searchTerms.toLowerCase(); let resourceScore = 0; // GPU resources for AI/ML research if ((searchLower.includes('machine') || searchLower.includes('neural') || searchLower.includes('deep')) && resourceTypes.some(r => r.includes('gpu'))) { resourceScore += 0.5; } // HPC resources for simulation/modeling if ((searchLower.includes('simulation') || searchLower.includes('modeling') || searchLower.includes('computational')) && resourceTypes.some(r => r.includes('cpu') || r.includes('core'))) { resourceScore += 0.3; } // Storage for data-intensive research if ((searchLower.includes('data') || searchLower.includes('analysis') || searchLower.includes('dataset')) && resourceTypes.some(r => r.includes('storage'))) { resourceScore += 0.2; } return Math.min(resourceScore, 1.0); } // Format individual similar project with optional similarity score formatSimilarProject(project, similarity, index, showScore) { let result = `${index}. **${project.requestTitle}**`; if (showScore) { result += ` (${(similarity * 100).toFixed(1)}% similar)`; } result += `\n`; result += ` • **PI:** ${project.pi} (${project.piInstitution})\n`; result += ` • **Field:** ${project.fos}\n`; result += ` • **ID:** ${project.projectId}\n`; if (project.resources.length > 0) { const resources = this.summarizeResources(project.resources); result += ` • **Resources:** ${resources}\n`; } // Show first 100 characters of abstract for context const abstractPreview = project.abstract.length > 100 ? project.abstract.substring(0, 100) + '...' : project.abstract; result += ` • **Focus:** ${abstractPreview}\n\n`; return result; } calculateProjectSimilarity(project, searchTerms, referenceField) { let score = 0; // Field of science match (high importance) if (referenceField && project.fos.toLowerCase() === referenceField.toLowerCase()) { score += 0.6; } else if (referenceField && project.fos.toLowerCase().includes(referenceField.toLowerCase())) { score += 0.3; } // Enhanced text similarity with weighted scoring const projectText = (project.abstract + ' ' + project.requestTitle).toLowerCase(); const titleText = project.requestTitle