@access-mcp/allocations
Version:
MCP server for ACCESS-CI Allocations and Research Projects API
1,090 lines • 103 kB
JavaScript
import { BaseAccessServer, handleApiError } from "@access-mcp/shared";
export class AllocationsServer extends BaseAccessServer {
projectCache = new Map();
cacheTimestamps = new Map();
CACHE_TTL = 5 * 60 * 1000; // 5 minutes
constructor() {
super("access-allocations", "0.3.0", "https://allocations.access-ci.org");
// Set up periodic cache cleanup - TEMPORARILY DISABLED FOR DEBUGGING
// setInterval(() => {
// this.cleanupExpiredCache();
// }, 10 * 60 * 1000); // Clean up every 10 minutes
}
getTools() {
return [
{
name: "search_projects",
description: "Advanced search for ACCESS-CI research projects with operators, filters, and sorting",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Search query supporting operators: 'term1 AND term2', 'term1 OR term2', 'term1 NOT term2', exact phrases with quotes",
},
field_of_science: {
type: "string",
description: "Filter by field of science (e.g., 'Computer Science', 'Physics')",
},
allocation_type: {
type: "string",
description: "Filter by allocation type (e.g., 'Discover', 'Explore', 'Accelerate')",
},
date_range: {
type: "object",
description: "Filter by project date range",
properties: {
start_date: {
type: "string",
description: "Start date in YYYY-MM-DD format"
},
end_date: {
type: "string",
description: "End date in YYYY-MM-DD format"
}
}
},
min_allocation: {
type: "number",
description: "Minimum allocation amount filter"
},
sort_by: {
type: "string",
description: "Sort results by: 'relevance', 'date_desc', 'date_asc', 'allocation_desc', 'allocation_asc', 'pi_name'",
enum: ["relevance", "date_desc", "date_asc", "allocation_desc", "allocation_asc", "pi_name"],
default: "relevance"
},
limit: {
type: "number",
description: "Maximum number of results to return (default: 20, max: 100)",
default: 20,
},
},
required: ["query"],
},
},
{
name: "get_project_details",
description: "Get detailed information about a specific research project",
inputSchema: {
type: "object",
properties: {
project_id: {
type: "number",
description: "The project ID number",
},
},
required: ["project_id"],
},
},
{
name: "list_projects_by_field",
description: "List projects by field of science",
inputSchema: {
type: "object",
properties: {
field_of_science: {
type: "string",
description: "Field of science (e.g., 'Computer Science', 'Physics', 'Chemistry')",
},
limit: {
type: "number",
description: "Maximum number of results to return (default: 20)",
default: 20,
},
},
required: ["field_of_science"],
},
},
{
name: "list_projects_by_resource",
description: "Find projects using specific computational resources",
inputSchema: {
type: "object",
properties: {
resource_name: {
type: "string",
description: "Resource name (e.g., 'NCSA Delta GPU', 'Purdue Anvil', 'ACCESS Credits')",
},
limit: {
type: "number",
description: "Maximum number of results to return (default: 20)",
default: 20,
},
},
required: ["resource_name"],
},
},
{
name: "get_allocation_statistics",
description: "Get statistics about resource allocations and research trends",
inputSchema: {
type: "object",
properties: {
pages_to_analyze: {
type: "number",
description: "Number of pages to analyze for statistics (default: 5, max: 20)",
default: 5,
},
},
required: [],
},
},
{
name: "find_similar_projects",
description: "Find projects with similar research focus using advanced semantic matching",
inputSchema: {
type: "object",
properties: {
project_id: {
type: "number",
description: "Reference project ID to find similar projects",
},
keywords: {
type: "string",
description: "Keywords or research terms to find similar projects (alternative to project_id)",
},
similarity_threshold: {
type: "number",
description: "Minimum similarity score as decimal (0.0-1.0). Convert percentages: 80% = 0.8, 70% = 0.7, 50% = 0.5. Default: 0.3",
default: 0.3,
minimum: 0.0,
maximum: 1.0
},
include_same_field: {
type: "boolean",
description: "Whether to prioritize projects in the same field of science (default: true)",
default: true
},
show_similarity_scores: {
type: "boolean",
description: "Whether to display similarity scores in results (default: true)",
default: true
},
limit: {
type: "number",
description: "Maximum number of similar projects to return (default: 10, max: 50)",
default: 10,
},
},
required: [],
},
},
{
name: "analyze_project_funding",
description: "Analyze ACCESS project funding by cross-referencing with NSF awards data",
inputSchema: {
type: "object",
properties: {
project_id: {
type: "number",
description: "ACCESS project ID to analyze for NSF funding connections",
},
},
required: ["project_id"],
},
},
{
name: "find_funded_projects",
description: "Find ACCESS projects that have corresponding NSF funding",
inputSchema: {
type: "object",
properties: {
pi_name: {
type: "string",
description: "Principal investigator name to search for funded projects",
},
institution_name: {
type: "string",
description: "Institution name to search for funded projects",
},
field_of_science: {
type: "string",
description: "Field of science to filter results",
},
limit: {
type: "number",
description: "Maximum number of results to return (default: 10)",
default: 10,
},
},
required: [],
},
},
{
name: "institutional_funding_profile",
description: "Generate comprehensive funding profile for an institution combining ACCESS allocations and NSF awards",
inputSchema: {
type: "object",
properties: {
institution_name: {
type: "string",
description: "Institution name to analyze",
},
limit: {
type: "number",
description: "Maximum number of projects to analyze per source (default: 20)",
default: 20,
},
},
required: ["institution_name"],
},
}
];
}
getResources() {
return [
{
uri: "accessci://allocations",
name: "ACCESS-CI Research Projects and Allocations",
description: "Current research projects, allocations, and resource utilization data",
mimeType: "application/json",
},
];
}
async handleToolCall(request) {
const { name, arguments: args } = request.params;
try {
switch (name) {
case "search_projects":
return await this.searchProjects(args.query, args.field_of_science, args.allocation_type, args.limit, args.date_range, args.min_allocation, args.sort_by);
case "get_project_details":
return await this.getProjectDetails(args.project_id);
case "list_projects_by_field":
return await this.listProjectsByField(args.field_of_science, args.limit);
case "list_projects_by_resource":
return await this.listProjectsByResource(args.resource_name, args.limit);
case "get_allocation_statistics":
return await this.getAllocationStatistics(args.pages_to_analyze || 5);
case "find_similar_projects":
return await this.findSimilarProjects(args.project_id, args.keywords, args.limit, args.similarity_threshold, args.include_same_field, args.show_similarity_scores);
case "analyze_project_funding":
return await this.analyzeProjectFunding(args.project_id);
case "find_funded_projects":
return await this.findFundedProjects(args.pi_name, args.institution_name, args.field_of_science, args.limit);
case "institutional_funding_profile":
return await this.institutionalFundingProfile(args.institution_name, args.limit);
default:
throw new Error(`Unknown tool: ${name}`);
}
}
catch (error) {
return {
content: [
{
type: "text",
text: `Error: ${handleApiError(error)}`,
},
],
};
}
}
async handleResourceRead(request) {
const { uri } = request.params;
if (uri === "accessci://allocations") {
try {
const data = await this.fetchProjects(1);
return {
contents: [
{
uri,
mimeType: "application/json",
text: JSON.stringify(data, null, 2),
},
],
};
}
catch (error) {
throw new Error(`Failed to fetch allocations data: ${handleApiError(error)}`);
}
}
throw new Error(`Unknown resource: ${uri}`);
}
// Core API methods
async fetchProjects(page = 1) {
// Check cache first
const cachedData = this.getCachedProjects(page);
if (cachedData) {
return cachedData;
}
const url = `${this.baseURL}/current-projects.json?page=${page}`;
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const data = await response.json();
// Cache the result
this.cacheProjects(page, data);
return data;
}
catch (error) {
throw new Error(`Failed to fetch projects: ${error instanceof Error ? error.message : String(error)}`);
}
}
getCachedProjects(page) {
const cached = this.projectCache.get(page);
const timestamp = this.cacheTimestamps.get(page);
if (cached && timestamp && (Date.now() - timestamp) < this.CACHE_TTL) {
return cached;
}
// Clean up expired cache
if (timestamp && (Date.now() - timestamp) >= this.CACHE_TTL) {
this.projectCache.delete(page);
this.cacheTimestamps.delete(page);
}
return null;
}
cacheProjects(page, data) {
this.projectCache.set(page, data);
this.cacheTimestamps.set(page, Date.now());
}
async fetchMultiplePages(pages, maxConcurrent = 5) {
const results = [];
// Process pages in batches to avoid overwhelming the server
for (let i = 0; i < pages.length; i += maxConcurrent) {
const batch = pages.slice(i, i + maxConcurrent);
const promises = batch.map(page => this.fetchProjects(page));
try {
const batchResults = await Promise.all(promises);
batchResults.forEach(data => {
results.push(...data.projects);
});
}
catch (error) {
// Log error but continue with other batches
console.warn(`Error fetching batch starting at page ${batch[0]}:`, error);
}
}
return results;
}
async searchProjects(query, fieldOfScience, allocationType, limit = 20, dateRange, minAllocation, sortBy = 'relevance') {
// Input validation
if (!query || query.trim().length === 0) {
throw new Error("Search query cannot be empty");
}
if (limit > 100)
limit = 100; // Cap at 100
// Parse advanced search query
const searchTerms = this.parseAdvancedQuery(query);
// Use parallel fetching for better performance
const maxPages = Math.min(15, limit > 50 ? 20 : 15);
const pagesToFetch = Array.from({ length: maxPages }, (_, i) => i + 1);
// Fetch first page to get total pages available
const firstPageData = await this.fetchProjects(1);
const totalPages = Math.min(firstPageData.pages, maxPages);
const actualPages = Array.from({ length: totalPages }, (_, i) => i + 1);
// Fetch all pages in parallel
const allProjects = await this.fetchMultiplePages(actualPages);
// Apply filters
let filteredProjects = allProjects.filter(project => {
// Date range filter
if (dateRange) {
const projectStart = new Date(project.beginDate);
const projectEnd = new Date(project.endDate);
if (dateRange.start_date) {
const filterStart = new Date(dateRange.start_date);
if (projectEnd < filterStart)
return false;
}
if (dateRange.end_date) {
const filterEnd = new Date(dateRange.end_date);
if (projectStart > filterEnd)
return false;
}
}
// Minimum allocation filter
if (minAllocation) {
const totalAllocation = project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0);
if (totalAllocation < minAllocation)
return false;
}
return true;
});
// Score and filter projects based on search terms
const scoredResults = filteredProjects
.map(project => ({
project,
score: this.calculateAdvancedSearchScore(project, searchTerms, fieldOfScience, allocationType)
}))
.filter(item => item.score > 0);
// Apply sorting
const sortedResults = this.applySorting(scoredResults, sortBy).slice(0, limit);
// Format results with enhanced metadata
const searchSummary = this.buildSearchSummary(query, fieldOfScience, allocationType, dateRange, minAllocation, sortBy, scoredResults.length, sortedResults.length);
return {
content: [
{
type: "text",
text: this.formatAdvancedSearchResults(sortedResults, searchSummary),
},
],
};
}
// Parse advanced search query with operators
parseAdvancedQuery(query) {
const result = {
andTerms: [],
orTerms: [],
notTerms: [],
exactPhrases: [],
regularTerms: []
};
// Extract exact phrases first (quoted strings)
const phraseRegex = /"([^"]*)"/g;
let match;
let queryWithoutPhrases = query;
while ((match = phraseRegex.exec(query)) !== null) {
result.exactPhrases.push(match[1]);
queryWithoutPhrases = queryWithoutPhrases.replace(match[0], '');
}
// Parse remaining query for operators
const tokens = queryWithoutPhrases.split(/\s+/).filter(token => token.length > 0);
let i = 0;
while (i < tokens.length) {
const token = tokens[i];
if (token.toUpperCase() === 'AND' && i + 1 < tokens.length) {
result.andTerms.push(tokens[i + 1]);
i += 2;
}
else if (token.toUpperCase() === 'OR' && i + 1 < tokens.length) {
result.orTerms.push(tokens[i + 1]);
i += 2;
}
else if (token.toUpperCase() === 'NOT' && i + 1 < tokens.length) {
result.notTerms.push(tokens[i + 1]);
i += 2;
}
else if (!['AND', 'OR', 'NOT'].includes(token.toUpperCase())) {
result.regularTerms.push(token);
i++;
}
else {
i++;
}
}
return result;
}
// Enhanced search scoring with advanced query support
calculateAdvancedSearchScore(project, searchTerms, fieldOfScience, allocationType) {
let score = 0;
// Field of science filter (required match)
if (fieldOfScience && !project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) {
return 0;
}
// Allocation type filter (required match)
if (allocationType && !project.allocationType.toLowerCase().includes(allocationType.toLowerCase())) {
return 0;
}
const projectText = (project.abstract + ' ' + project.requestTitle + ' ' + project.pi).toLowerCase();
const titleText = project.requestTitle.toLowerCase();
// Handle NOT terms first (exclusions)
for (const notTerm of searchTerms.notTerms) {
if (projectText.includes(notTerm.toLowerCase())) {
return 0; // Exclude if any NOT term is found
}
}
// Exact phrases (highest weight)
for (const phrase of searchTerms.exactPhrases) {
if (projectText.includes(phrase.toLowerCase())) {
score += titleText.includes(phrase.toLowerCase()) ? 5 : 3;
}
}
// AND terms (all must be present)
if (searchTerms.andTerms.length > 0) {
const andMatches = searchTerms.andTerms.filter(term => projectText.includes(term.toLowerCase()));
if (andMatches.length === searchTerms.andTerms.length) {
score += 2 * andMatches.length;
}
else {
return 0; // All AND terms must match
}
}
// OR terms (any can be present)
if (searchTerms.orTerms.length > 0) {
const orMatches = searchTerms.orTerms.filter(term => projectText.includes(term.toLowerCase()));
score += orMatches.length * 1.5;
}
// Regular terms
for (const term of searchTerms.regularTerms) {
if (!this.isStopWord(term) && term.length > 2) {
const termLower = term.toLowerCase();
if (titleText.includes(termLower))
score += 3;
else if (project.pi.toLowerCase().includes(termLower))
score += 2;
else if (project.fos.toLowerCase().includes(termLower))
score += 1.5;
else if (project.abstract.toLowerCase().includes(termLower))
score += 1;
else if (project.piInstitution.toLowerCase().includes(termLower))
score += 0.5;
}
}
return Math.min(score, 20); // Cap at reasonable maximum
}
// Apply sorting to search results
applySorting(scoredResults, sortBy) {
switch (sortBy) {
case 'date_desc':
return scoredResults.sort((a, b) => new Date(b.project.beginDate).getTime() - new Date(a.project.beginDate).getTime());
case 'date_asc':
return scoredResults.sort((a, b) => new Date(a.project.beginDate).getTime() - new Date(b.project.beginDate).getTime());
case 'allocation_desc':
return scoredResults.sort((a, b) => {
const aTotal = a.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0);
const bTotal = b.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0);
return bTotal - aTotal;
});
case 'allocation_asc':
return scoredResults.sort((a, b) => {
const aTotal = a.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0);
const bTotal = b.project.resources.reduce((sum, r) => sum + (r.allocation || 0), 0);
return aTotal - bTotal;
});
case 'pi_name':
return scoredResults.sort((a, b) => a.project.pi.localeCompare(b.project.pi));
case 'relevance':
default:
return scoredResults.sort((a, b) => b.score - a.score);
}
}
// Build search summary with metadata
buildSearchSummary(query, fieldOfScience, allocationType, dateRange, minAllocation, sortBy, totalMatches, returnedResults) {
let summary = `**Advanced Search Results**\n`;
summary += `• **Query:** ${query}\n`;
if (fieldOfScience)
summary += `• **Field:** ${fieldOfScience}\n`;
if (allocationType)
summary += `• **Allocation Type:** ${allocationType}\n`;
if (dateRange?.start_date || dateRange?.end_date) {
summary += `• **Date Range:** ${dateRange.start_date || 'any'} to ${dateRange.end_date || 'any'}\n`;
}
if (minAllocation)
summary += `• **Min Allocation:** ${minAllocation.toLocaleString()}\n`;
if (sortBy && sortBy !== 'relevance')
summary += `• **Sorted By:** ${sortBy.replace('_', ' ')}\n`;
summary += `• **Results:** ${returnedResults} of ${totalMatches} matches\n`;
return summary;
}
// Enhanced formatting for advanced search results
formatAdvancedSearchResults(scoredResults, searchSummary) {
if (scoredResults.length === 0) {
return `${searchSummary}\n\nNo projects found matching the search criteria.\n\n**Search Tips:**\n• Try broader terms or different operators\n• Use quotes for exact phrases: "machine learning"\n• Use AND/OR/NOT operators: "AI AND physics"\n• Check spelling and try synonyms`;
}
let result = `${searchSummary}\n\n`;
scoredResults.forEach(({ project, score }, index) => {
result += `**${index + 1}. ${project.requestTitle}** `;
if (score > 0)
result += `(relevance: ${score.toFixed(1)})\n`;
else
result += `\n`;
result += `• **PI:** ${project.pi} (${project.piInstitution})\n`;
result += `• **Field:** ${project.fos}\n`;
result += `• **Type:** ${project.allocationType}\n`;
result += `• **Period:** ${project.beginDate} to ${project.endDate}\n`;
result += `• **Project ID:** ${project.projectId}\n`;
if (project.resources.length > 0) {
const resourceSummaries = project.resources.map(r => {
const allocation = this.formatAllocation(r.allocation || 0, r.units, r.resourceName);
return allocation ? `${r.resourceName} (${allocation})` : r.resourceName;
});
result += `• **Resources:** ${resourceSummaries.join(', ')}\n`;
}
// Show first 150 characters of abstract
const abstractPreview = project.abstract.length > 150
? project.abstract.substring(0, 150) + '...'
: project.abstract;
result += `• **Abstract:** ${abstractPreview}\n\n`;
});
return result;
}
calculateSearchScore(project, query, fieldOfScience, allocationType) {
let score = 0;
const queryLower = query.toLowerCase();
// Field of science filter (required match)
if (fieldOfScience && !project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) {
return 0;
}
// Allocation type filter (required match)
if (allocationType && !project.allocationType.toLowerCase().includes(allocationType.toLowerCase())) {
return 0;
}
// Basic query matching with scoring
const titleMatch = project.requestTitle.toLowerCase().includes(queryLower);
const abstractMatch = project.abstract.toLowerCase().includes(queryLower);
const piMatch = project.pi.toLowerCase().includes(queryLower);
const institutionMatch = project.piInstitution.toLowerCase().includes(queryLower);
const fosMatch = project.fos.toLowerCase().includes(queryLower);
// Weighted scoring
if (titleMatch)
score += 3;
if (piMatch)
score += 2;
if (fosMatch)
score += 1.5;
if (institutionMatch)
score += 1;
if (abstractMatch)
score += 0.5;
// Exact matches get bonus points
if (project.requestTitle.toLowerCase() === queryLower)
score += 5;
if (project.pi.toLowerCase() === queryLower)
score += 3;
return score;
}
projectMatchesQuery(project, query) {
const searchTerms = query.toLowerCase();
return (project.requestTitle.toLowerCase().includes(searchTerms) ||
project.abstract.toLowerCase().includes(searchTerms) ||
project.pi.toLowerCase().includes(searchTerms) ||
project.piInstitution.toLowerCase().includes(searchTerms) ||
project.fos.toLowerCase().includes(searchTerms));
}
async getProjectDetails(projectId) {
// Input validation
if (!projectId || typeof projectId !== 'number' || projectId <= 0) {
throw new Error("Project ID must be a positive number");
}
// Search through pages to find the specific project
let currentPage = 1;
const maxPages = 20;
while (currentPage <= maxPages) {
const data = await this.fetchProjects(currentPage);
const project = data.projects.find(p => p.projectId === projectId);
if (project) {
return {
content: [
{
type: "text",
text: this.formatSingleProject(project),
},
],
};
}
currentPage++;
if (currentPage > data.pages)
break;
}
return {
content: [
{
type: "text",
text: `Project with ID ${projectId} not found in current allocations.`,
},
],
};
}
async listProjectsByField(fieldOfScience, limit = 20) {
// Input validation
if (!fieldOfScience || typeof fieldOfScience !== 'string' || fieldOfScience.trim().length === 0) {
throw new Error("Field of science must be a non-empty string");
}
if (limit < 1 || limit > 200) {
throw new Error("Limit must be between 1 and 200");
}
const results = [];
let currentPage = 1;
const maxPages = 10;
while (results.length < limit && currentPage <= maxPages) {
const data = await this.fetchProjects(currentPage);
for (const project of data.projects) {
if (results.length >= limit)
break;
if (project.fos.toLowerCase().includes(fieldOfScience.toLowerCase())) {
results.push(project);
}
}
currentPage++;
if (currentPage > data.pages)
break;
}
return {
content: [
{
type: "text",
text: this.formatProjectResults(results, `Projects in ${fieldOfScience}`),
},
],
};
}
async listProjectsByResource(resourceName, limit = 20) {
// Input validation
if (!resourceName || typeof resourceName !== 'string' || resourceName.trim().length === 0) {
throw new Error("Resource name must be a non-empty string");
}
if (limit < 1 || limit > 200) {
throw new Error("Limit must be between 1 and 200");
}
const results = [];
let currentPage = 1;
const maxPages = 10;
while (results.length < limit && currentPage <= maxPages) {
const data = await this.fetchProjects(currentPage);
for (const project of data.projects) {
if (results.length >= limit)
break;
const hasResource = project.resources.some(resource => resource.resourceName.toLowerCase().includes(resourceName.toLowerCase()));
if (hasResource) {
results.push(project);
}
}
currentPage++;
if (currentPage > data.pages)
break;
}
return {
content: [
{
type: "text",
text: this.formatProjectResults(results, `Projects using ${resourceName}`),
},
],
};
}
async getAllocationStatistics(pagesToAnalyze = 5) {
// Input validation
if (pagesToAnalyze < 1 || pagesToAnalyze > 20) {
throw new Error("Pages to analyze must be between 1 and 20");
}
const projects = [];
const fieldsMap = new Map();
const resourcesMap = new Map();
const institutionsMap = new Map();
const allocationTypesMap = new Map();
// Collect data from multiple pages using parallel fetching for better performance
const pagesToFetch = Array.from({ length: Math.min(pagesToAnalyze, 20) }, (_, i) => i + 1);
const allProjects = await this.fetchMultiplePages(pagesToFetch);
projects.push(...allProjects);
// Update statistics
for (const project of allProjects) {
fieldsMap.set(project.fos, (fieldsMap.get(project.fos) || 0) + 1);
institutionsMap.set(project.piInstitution, (institutionsMap.get(project.piInstitution) || 0) + 1);
allocationTypesMap.set(project.allocationType, (allocationTypesMap.get(project.allocationType) || 0) + 1);
for (const resource of project.resources) {
resourcesMap.set(resource.resourceName, (resourcesMap.get(resource.resourceName) || 0) + 1);
}
}
// Format statistics
const topFields = Array.from(fieldsMap.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
const topResources = Array.from(resourcesMap.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
const topInstitutions = Array.from(institutionsMap.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
const allocationTypes = Array.from(allocationTypesMap.entries())
.sort((a, b) => b[1] - a[1]);
let statsText = `📊 **ACCESS-CI Allocation Statistics**\n`;
statsText += `*(Analysis of ${projects.length} projects from ${pagesToAnalyze} pages)*\n\n`;
statsText += `**🔬 Top Fields of Science:**\n`;
topFields.forEach(([field, count], i) => {
statsText += `${i + 1}. ${field}: ${count} projects\n`;
});
statsText += `\n**💻 Most Requested Resources:**\n`;
topResources.forEach(([resource, count], i) => {
statsText += `${i + 1}. ${resource}: ${count} projects\n`;
});
statsText += `\n**🏛️ Top Institutions:**\n`;
topInstitutions.forEach(([institution, count], i) => {
statsText += `${i + 1}. ${institution}: ${count} projects\n`;
});
statsText += `\n**📈 Allocation Types:**\n`;
allocationTypes.forEach(([type, count]) => {
statsText += `• ${type}: ${count} projects\n`;
});
return {
content: [
{
type: "text",
text: statsText,
},
],
};
}
async findSimilarProjects(projectId, keywords, limit = 10, similarityThreshold = 0.3, includeSameField = true, showSimilarityScores = true) {
let referenceProject = null;
let searchTerms = "";
let referenceField = "";
// Input validation
if (limit > 50)
limit = 50;
if (similarityThreshold < 0)
similarityThreshold = 0;
if (similarityThreshold > 1)
similarityThreshold = 1;
// Get reference project if projectId provided
if (projectId) {
let currentPage = 1;
const maxPages = 20;
while (currentPage <= maxPages && !referenceProject) {
const data = await this.fetchProjects(currentPage);
referenceProject = data.projects.find(p => p.projectId === projectId) || null;
currentPage++;
if (currentPage > data.pages)
break;
}
if (!referenceProject) {
return {
content: [
{
type: "text",
text: `Project with ID ${projectId} not found in current allocations database.`,
},
],
};
}
// Extract sophisticated search terms from reference project
searchTerms = this.extractKeyTermsFromProject(referenceProject);
referenceField = referenceProject.fos;
}
else if (keywords) {
searchTerms = keywords;
referenceField = ""; // No specific field for keyword searches
}
else {
return {
content: [
{
type: "text",
text: "Please provide either a project_id or keywords to find similar projects.",
},
],
};
}
// Fetch projects for similarity analysis
const maxPages = 15;
const actualPages = Array.from({ length: maxPages }, (_, i) => i + 1);
const allProjects = await this.fetchMultiplePages(actualPages);
// Calculate similarity scores for all projects
const scoredResults = allProjects
.filter(project => !referenceProject || project.projectId !== referenceProject.projectId) // Exclude reference project
.map(project => ({
project,
similarity: this.calculateAdvancedSimilarity(project, searchTerms, referenceField, includeSameField)
}))
.filter(item => item.similarity >= similarityThreshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, limit);
// Build comprehensive result
const header = referenceProject
? `🔍 **Projects Similar to "${referenceProject.requestTitle}"**`
: `🔍 **Projects Similar to Keywords: "${keywords}"**`;
let result = `${header}\n\n`;
// Reference project info
if (referenceProject) {
result += `**🎯 Reference Project:**\n`;
result += `• **ID:** ${referenceProject.projectId}\n`;
result += `• **PI:** ${referenceProject.pi} (${referenceProject.piInstitution})\n`;
result += `• **Field:** ${referenceProject.fos}\n`;
result += `• **Resources:** ${this.summarizeResources(referenceProject.resources)}\n\n`;
}
// Search parameters
result += `**⚙️ Search Parameters:**\n`;
result += `• **Similarity Threshold:** ${(similarityThreshold * 100).toFixed(0)}%\n`;
result += `• **Field Priority:** ${includeSameField ? 'Same field preferred' : 'All fields equal'}\n`;
result += `• **Results Found:** ${scoredResults.length}${scoredResults.length >= limit ? '+' : ''}\n`;
if (referenceField)
result += `• **Reference Field:** ${referenceField}\n`;
result += `\n`;
if (scoredResults.length === 0) {
result += `**No similar projects found above ${(similarityThreshold * 100).toFixed(0)}% threshold.**\n\n`;
result += `**💡 Try adjusting parameters:**\n`;
result += `• Lower similarity threshold (e.g., 0.2 or 0.1)\n`;
result += `• Broader keywords or different terms\n`;
result += `• Disable field prioritization for cross-disciplinary search\n`;
return {
content: [
{
type: "text",
text: result,
},
],
};
}
// Group similar projects by similarity ranges
const highSimilarity = scoredResults.filter(r => r.similarity >= 0.7);
const mediumSimilarity = scoredResults.filter(r => r.similarity >= 0.4 && r.similarity < 0.7);
const lowSimilarity = scoredResults.filter(r => r.similarity < 0.4);
if (highSimilarity.length > 0) {
result += `**🎯 High Similarity (70%+ match):**\n`;
highSimilarity.forEach((item, index) => {
result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores);
});
result += `\n`;
}
if (mediumSimilarity.length > 0) {
result += `**🔍 Moderate Similarity (40-70% match):**\n`;
mediumSimilarity.forEach((item, index) => {
result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores);
});
result += `\n`;
}
if (lowSimilarity.length > 0 && showSimilarityScores) {
result += `**📋 Lower Similarity (${(similarityThreshold * 100).toFixed(0)}-40% match):**\n`;
lowSimilarity.forEach((item, index) => {
result += this.formatSimilarProject(item.project, item.similarity, index + 1, showSimilarityScores);
});
result += `\n`;
}
// Analysis insights
result += `**📊 Similarity Analysis:**\n`;
const fieldMatches = scoredResults.filter(r => r.project.fos === referenceField).length;
if (referenceField) {
result += `• **Same Field Matches:** ${fieldMatches}/${scoredResults.length} (${Math.round(fieldMatches / scoredResults.length * 100)}%)\n`;
}
const avgSimilarity = scoredResults.reduce((sum, r) => sum + r.similarity, 0) / scoredResults.length;
result += `• **Average Similarity:** ${(avgSimilarity * 100).toFixed(1)}%\n`;
const institutionDiversity = new Set(scoredResults.map(r => r.project.piInstitution)).size;
result += `• **Institution Diversity:** ${institutionDiversity} different institutions\n`;
result += `• **Potential Collaborations:** High similarity indicates shared research interests\n`;
return {
content: [
{
type: "text",
text: result,
},
],
};
}
// Extract sophisticated key terms from a project
extractKeyTermsFromProject(project) {
// Combine title, abstract, and field for comprehensive term extraction
const titleWords = project.requestTitle.toLowerCase().split(/\s+/);
const abstractWords = project.abstract.toLowerCase().split(/\s+/);
const fieldWords = project.fos.toLowerCase().split(/\s+/);
// Weight terms: title (high), field (medium), abstract (medium)
const termFrequency = new Map();
// Title terms get higher weight
titleWords.forEach(word => {
if (word.length > 3 && !this.isStopWord(word)) {
termFrequency.set(word, (termFrequency.get(word) || 0) + 3);
}
});
// Field terms get medium-high weight
fieldWords.forEach(word => {
if (word.length > 3 && !this.isStopWord(word)) {
termFrequency.set(word, (termFrequency.get(word) || 0) + 2);
}
});
// Abstract terms - focus on first 50 words (usually most relevant)
abstractWords.slice(0, 50).forEach(word => {
if (word.length > 3 && !this.isStopWord(word)) {
termFrequency.set(word, (termFrequency.get(word) || 0) + 1);
}
});
// Return top weighted terms
return Array.from(termFrequency.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([word]) => word)
.join(' ');
}
// Advanced similarity calculation with multiple factors
calculateAdvancedSimilarity(project, searchTerms, referenceField, includeSameField) {
let similarity = 0;
// Field of science similarity (high weight if same field)
if (referenceField && includeSameField) {
if (project.fos.toLowerCase() === referenceField.toLowerCase()) {
similarity += 0.4; // 40% boost for same field
}
else if (project.fos.toLowerCase().includes(referenceField.toLowerCase()) ||
referenceField.toLowerCase().includes(project.fos.toLowerCase())) {
similarity += 0.2; // 20% boost for related fields
}
}
// Text similarity analysis
const projectText = (project.requestTitle + ' ' + project.abstract + ' ' + project.fos).toLowerCase();
const searchWords = searchTerms.toLowerCase()
.split(/\s+/)
.filter(word => word.length > 3 && !this.isStopWord(word));
if (searchWords.length === 0)
return similarity;
// Term matching with position-based weighting
const titleText = project.requestTitle.toLowerCase();
const abstractText = project.abstract.toLowerCase();
let titleMatches = 0;
let abstractMatches = 0;
let totalTerms = searchWords.length;
searchWords.forEach(term => {
if (titleText.includes(term)) {
titleMatches++;
similarity += 0.15; // Title matches are very valuable
}
else if (abstractText.includes(term)) {
abstractMatches++;
similarity += 0.05; // Abstract matches are good
}
});
// Bonus for multiple term clusters
const termCoverage = (titleMatches + abstractMatches) / totalTerms;
if (termCoverage > 0.5) {
similarity += 0.1 * termCoverage; // Bonus for good term coverage
}
// Resource type similarity (same computational needs might indicate similar research)
// This is more sophisticated than basic keyword matching
const resourceSimilarity = this.calculateResourceSimilarity(project, searchTerms);
similarity += resourceSimilarity * 0.1;
// PI institution clustering (same institution might indicate similar research environment)
// This is a weak signal but can be useful for collaboration discovery
return Math.min(similarity, 1.0); // Cap at 1.0
}
// Calculate resource-based similarity
calculateResourceSimilarity(project, searchTerms) {
// Check if resource needs align with search context
const resourceTypes = project.resources.map(r => r.resourceName.toLowerCase());
const searchLower = searchTerms.toLowerCase();
let resourceScore = 0;
// GPU resources for AI/ML research
if ((searchLower.includes('machine') || searchLower.includes('neural') || searchLower.includes('deep')) &&
resourceTypes.some(r => r.includes('gpu'))) {
resourceScore += 0.5;
}
// HPC resources for simulation/modeling
if ((searchLower.includes('simulation') || searchLower.includes('modeling') || searchLower.includes('computational')) &&
resourceTypes.some(r => r.includes('cpu') || r.includes('core'))) {
resourceScore += 0.3;
}
// Storage for data-intensive research
if ((searchLower.includes('data') || searchLower.includes('analysis') || searchLower.includes('dataset')) &&
resourceTypes.some(r => r.includes('storage'))) {
resourceScore += 0.2;
}
return Math.min(resourceScore, 1.0);
}
// Format individual similar project with optional similarity score
formatSimilarProject(project, similarity, index, showScore) {
let result = `${index}. **${project.requestTitle}**`;
if (showScore) {
result += ` (${(similarity * 100).toFixed(1)}% similar)`;
}
result += `\n`;
result += ` • **PI:** ${project.pi} (${project.piInstitution})\n`;
result += ` • **Field:** ${project.fos}\n`;
result += ` • **ID:** ${project.projectId}\n`;
if (project.resources.length > 0) {
const resources = this.summarizeResources(project.resources);
result += ` • **Resources:** ${resources}\n`;
}
// Show first 100 characters of abstract for context
const abstractPreview = project.abstract.length > 100
? project.abstract.substring(0, 100) + '...'
: project.abstract;
result += ` • **Focus:** ${abstractPreview}\n\n`;
return result;
}
calculateProjectSimilarity(project, searchTerms, referenceField) {
let score = 0;
// Field of science match (high importance)
if (referenceField && project.fos.toLowerCase() === referenceField.toLowerCase()) {
score += 0.6;
}
else if (referenceField && project.fos.toLowerCase().includes(referenceField.toLowerCase())) {
score += 0.3;
}
// Enhanced text similarity with weighted scoring
const projectText = (project.abstract + ' ' + project.requestTitle).toLowerCase();
const titleText = project.requestTitle