UNPKG

@access-mcp/shared

Version:

Shared utilities for ACCESS-CI MCP servers

628 lines (627 loc) 22.4 kB
/** * Shared taxonomies and reference data for ACCESS-CI MCP servers * These provide context to AI assistants about the ACCESS-CI ecosystem */ /** * NSF Field of Science classification with ACCESS-CI context * Based on NSF's formal classification system with added context about * typical resource usage patterns and software requirements */ export const FIELDS_OF_SCIENCE = { "Computer Science": { name: "Computer Science", description: "Research in algorithms, AI/ML, data science, cybersecurity, and high-performance computing", keywords: [ "machine learning", "artificial intelligence", "deep learning", "neural networks", "data science", "algorithms", "distributed systems", "parallel computing", "HPC", "cybersecurity", "computer vision", "natural language processing", "NLP", ], typical_resources: ["GPU", "high memory", "fast storage", "high-speed networking"], common_software: [ "TensorFlow", "PyTorch", "scikit-learn", "Python", "CUDA", "Jupyter", "NumPy", "Pandas", ], allocation_range: { min: 50000, max: 1000000, typical: 250000, }, }, "Biological Sciences": { name: "Biological Sciences", description: "Research in genomics, proteomics, structural biology, systems biology, and bioinformatics", keywords: [ "genomics", "proteomics", "bioinformatics", "structural biology", "molecular dynamics", "sequence analysis", "protein folding", "phylogenetics", "systems biology", "metagenomics", ], typical_resources: ["high throughput", "large storage", "CPU clusters", "high memory"], common_software: [ "BLAST", "GROMACS", "AMBER", "NAMD", "Rosetta", "Bowtie", "SAMtools", "BioPython", ], allocation_range: { min: 100000, max: 800000, typical: 300000, }, }, Physics: { name: "Physics", description: "Research in high energy physics, astrophysics, condensed matter, and computational physics", keywords: [ "quantum mechanics", "particle physics", "astrophysics", "cosmology", "condensed matter", "computational physics", "molecular dynamics", "lattice QCD", "gravitational waves", ], typical_resources: ["CPU clusters", "high memory", "GPU for simulations", "large storage"], common_software: [ "LAMMPS", "Quantum ESPRESSO", "VASP", "GROMACS", "ROOT", "Geant4", "MATLAB", "Mathematica", ], allocation_range: { min: 100000, max: 2000000, typical: 500000, }, }, Chemistry: { name: "Chemistry", description: "Research in computational chemistry, molecular modeling, and materials science", keywords: [ "molecular dynamics", "quantum chemistry", "computational chemistry", "materials science", "drug discovery", "reaction mechanisms", "DFT", "ab initio", ], typical_resources: ["CPU clusters", "high memory", "GPU acceleration", "fast storage"], common_software: ["Gaussian", "GAMESS", "NWChem", "ORCA", "AMBER", "NAMD", "LAMMPS", "VMD"], allocation_range: { min: 75000, max: 1000000, typical: 300000, }, }, Engineering: { name: "Engineering", description: "Research in computational engineering, CFD, structural analysis, and design optimization", keywords: [ "computational fluid dynamics", "CFD", "finite element analysis", "FEA", "structural analysis", "optimization", "CAD", "mechanical engineering", "aerospace", ], typical_resources: ["CPU clusters", "high memory", "GPU for visualization", "parallel I/O"], common_software: ["ANSYS", "OpenFOAM", "COMSOL", "ABAQUS", "LS-DYNA", "SU2", "ParaView"], allocation_range: { min: 100000, max: 1500000, typical: 400000, }, }, "Earth Sciences": { name: "Earth Sciences", description: "Research in climate modeling, atmospheric science, geophysics, and environmental science", keywords: [ "climate modeling", "atmospheric science", "weather prediction", "oceanography", "geophysics", "seismology", "remote sensing", "environmental science", ], typical_resources: ["large storage", "CPU clusters", "high I/O", "data analytics"], common_software: ["WRF", "CESM", "NCAR", "netCDF", "GDAL", "Python", "R", "MATLAB"], allocation_range: { min: 150000, max: 2000000, typical: 600000, }, }, "Mathematics and Statistics": { name: "Mathematics and Statistics", description: "Research in numerical analysis, optimization, data analytics, and statistical modeling", keywords: [ "numerical analysis", "optimization", "linear algebra", "statistics", "data analytics", "monte carlo", "stochastic processes", "computational mathematics", ], typical_resources: ["CPU clusters", "high memory", "GPU for linear algebra"], common_software: ["MATLAB", "R", "Python", "Julia", "Mathematica", "SAS", "SPSS", "Octave"], allocation_range: { min: 50000, max: 500000, typical: 150000, }, }, "Social Sciences": { name: "Social Sciences", description: "Research in economics, sociology, political science, and computational social science", keywords: [ "econometrics", "agent-based modeling", "network analysis", "social networks", "political science", "computational social science", "survey analysis", ], typical_resources: ["data analytics", "storage", "CPU clusters for simulations"], common_software: ["R", "Python", "Stata", "SPSS", "NetLogo", "Gephi", "Julia"], allocation_range: { min: 25000, max: 300000, typical: 100000, }, }, "Astronomy and Astrophysics": { name: "Astronomy and Astrophysics", description: "Research in observational astronomy, cosmological simulations, and data analysis", keywords: [ "cosmology", "galaxy formation", "stellar evolution", "exoplanets", "gravitational waves", "radio astronomy", "optical astronomy", "simulation", ], typical_resources: ["large storage", "CPU clusters", "GPU for visualization", "data pipelines"], common_software: ["Gadget", "FLASH", "Enzo", "Athena", "IRAF", "DS9", "Python", "astropy"], allocation_range: { min: 150000, max: 2500000, typical: 700000, }, }, }; export const ALLOCATION_TYPES = { Discover: { name: "Discover ACCESS Credits", description: "Small allocations for exploring ACCESS resources and conducting preliminary research", typical_duration: "12 months", credit_range: { min: 1000, max: 400000, }, use_cases: [ "Preliminary research and feasibility studies", "Code development and testing", "Learning ACCESS systems", "Small-scale computational experiments", "Proof-of-concept work", ], eligibility: "All researchers at US institutions", }, Explore: { name: "Explore ACCESS Credits", description: "Medium allocations for established research projects", typical_duration: "12 months", credit_range: { min: 400000, max: 1500000, }, use_cases: [ "Ongoing research projects", "Production computations", "Data analysis and processing", "Multi-parameter studies", "Medium-scale simulations", ], eligibility: "Researchers with demonstrated need beyond Discover level", }, Accelerate: { name: "Accelerate ACCESS Credits", description: "Large allocations for significant computational research", typical_duration: "12 months", credit_range: { min: 1500000, max: 10000000, }, use_cases: [ "Large-scale simulations", "Major research initiatives", "High-throughput computing campaigns", "Big data analytics", "Multi-year projects", ], eligibility: "Well-established research programs with significant computational needs", }, Maximize: { name: "Maximize ACCESS Credits", description: "Very large allocations for exceptional computational research with broad impact", typical_duration: "12 months", credit_range: { min: 10000000, max: 50000000, }, use_cases: [ "Grand challenge problems", "Transformative research", "National-scale computing campaigns", "Major scientific breakthroughs", "Leadership-class computing", ], eligibility: "Exceptional projects with demonstrated transformative potential and broad impact", }, }; export const RESOURCE_TYPES = { CPU: { name: "CPU Compute", description: "General-purpose computing with standard processors", typical_use_cases: [ "Serial and parallel applications", "General scientific computing", "Data processing", "Simulations", ], key_features: ["High core counts", "Good memory bandwidth", "MPI support", "Long-running jobs"], }, GPU: { name: "GPU Accelerated", description: "Systems with graphics processing units for accelerated computing", typical_use_cases: [ "Machine learning and AI", "Deep learning", "Molecular dynamics", "Image processing", "CFD simulations", ], key_features: ["CUDA/ROCm support", "High memory bandwidth", "Tensor cores", "Fast training"], }, "High Memory": { name: "High Memory Systems", description: "Systems with large RAM for memory-intensive applications", typical_use_cases: [ "Large datasets in memory", "Genome assembly", "In-memory databases", "Large-scale graph analysis", ], key_features: ["1TB+ memory per node", "Fast memory access", "Large working sets"], }, Storage: { name: "Storage Resources", description: "High-capacity storage systems for data-intensive research", typical_use_cases: [ "Large datasets", "Data archival", "Intermediate results", "Collaborative data sharing", ], key_features: [ "Petabyte-scale capacity", "High-speed I/O", "Data management tools", "Backup and archival", ], }, Cloud: { name: "Cloud Computing", description: "Flexible cloud-based computing resources", typical_use_cases: [ "Web services", "Containers and microservices", "Science gateways", "Interactive computing", "Elastic workloads", ], key_features: ["On-demand resources", "Virtual machines", "Container support", "API access"], }, }; /** * Get field of science by name or partial match */ export function getFieldOfScience(fieldName) { // Exact match if (FIELDS_OF_SCIENCE[fieldName]) { return FIELDS_OF_SCIENCE[fieldName]; } // Case-insensitive partial match const lowerFieldName = fieldName.toLowerCase(); for (const [key, value] of Object.entries(FIELDS_OF_SCIENCE)) { if (key.toLowerCase().includes(lowerFieldName) || lowerFieldName.includes(key.toLowerCase())) { return value; } } return null; } /** * Get all field names */ export function getFieldNames() { return Object.keys(FIELDS_OF_SCIENCE); } /** * Get allocation type by name */ export function getAllocationType(typeName) { // Exact match if (ALLOCATION_TYPES[typeName]) { return ALLOCATION_TYPES[typeName]; } // Case-insensitive partial match const lowerTypeName = typeName.toLowerCase(); for (const [key, value] of Object.entries(ALLOCATION_TYPES)) { if (key.toLowerCase() === lowerTypeName) { return value; } } return null; } /** * ACCESS-CI Feature Codes * These numeric codes identify capabilities and characteristics of ACCESS resources * Derived from the ACCESS Operations API resource catalog */ export const ACCESS_FEATURE_CODES = { // Core resource capabilities 100: "GPU Computing", 101: "High Memory Computing", 102: "High Performance Storage", 103: "High Throughput Computing", 104: "Large Scale Computing", // Access and interface types 134: "Cloud Computing Platform", 135: "Container Support", 136: "Virtual Machine Support", 137: "Science Gateway Resource", // Often excluded from general listings 138: "Interactive Computing", 139: "ACCESS Allocated Resource", // Requires ACCESS allocation // Specialized capabilities 140: "Data Transfer Node", 141: "Visualization Capabilities", 142: "GPU Acceleration", 143: "AI/ML Optimized", 144: "Quantum Computing", // Network and I/O 145: "High-Speed Networking", 146: "Parallel I/O", 147: "Data Staging", // Software and environment 148: "Specialized Software Stack", 149: "Custom Environments", 150: "Jupyter Support", 151: "RStudio Support", // Note: This is a partial mapping based on observed feature IDs // Complete documentation may be available from ACCESS Operations team }; /** * Get feature name by code */ export function getFeatureName(featureCode) { return ACCESS_FEATURE_CODES[featureCode] || `Unknown Feature (${featureCode})`; } /** * Get feature names for an array of codes */ export function getFeatureNames(featureCodes) { return featureCodes.map((code) => getFeatureName(code)); } export const ACCESS_SYSTEMS = { Delta: { name: "Delta", organization: "NCSA (University of Illinois)", description: "GPU-focused system with NVIDIA A100 and A40 GPUs", strengths: ["AI/ML", "Deep Learning", "GPU Computing", "Large Models"], gpu_types: ["NVIDIA A100 (40GB/80GB)", "NVIDIA A40"], max_memory_per_node: "256 GB (CPU nodes), 2 TB (large memory)", storage_capacity: "Large parallel filesystem", user_interfaces: ["SSH", "Open OnDemand", "Jupyter"], ideal_for: [ "Training large language models", "Deep learning research", "GPU-accelerated simulations", "Computer vision", ], experience_level: ["Intermediate", "Advanced"], }, "Bridges-2": { name: "Bridges-2", organization: "PSC (Pittsburgh Supercomputing Center)", description: "Versatile system with CPU, GPU, and high-memory nodes", strengths: ["General Purpose", "High Memory", "GPU Computing", "Visualization"], gpu_types: ["NVIDIA V100", "NVIDIA A100", "NVIDIA A40"], max_memory_per_node: "4 TB Extreme Memory nodes", storage_capacity: "15 PB", user_interfaces: ["SSH", "Open OnDemand", "Jupyter", "RStudio", "VS Code"], ideal_for: [ "Genome assembly", "Large-scale data analytics", "Mixed workloads (CPU+GPU)", "Memory-intensive applications", ], experience_level: ["Beginner", "Intermediate", "Advanced"], }, Anvil: { name: "Anvil", organization: "Purdue University", description: "Composable subsystem architecture with flexible resource allocation", strengths: ["Composable Architecture", "Flexible Resources", "CPU Computing"], max_memory_per_node: "256 GB (standard), 1 TB (large memory)", storage_capacity: "Multi-PB", user_interfaces: ["SSH", "Open OnDemand", "Jupyter"], ideal_for: [ "Parallel CPU applications", "Genomics workflows", "Data science", "Gateway hosting", ], experience_level: ["Beginner", "Intermediate", "Advanced"], }, Expanse: { name: "Expanse", organization: "SDSC (San Diego Supercomputing Center)", description: "Balanced CPU and GPU computing with excellent storage", strengths: ["Data-Intensive Computing", "GPU Computing", "CPU Computing"], gpu_types: ["NVIDIA V100"], max_memory_per_node: "2 TB", storage_capacity: "7 PB parallel storage", user_interfaces: ["SSH", "Jupyter", "RStudio"], ideal_for: [ "Data analytics at scale", "Bioinformatics", "Machine learning", "Simulation science", ], experience_level: ["Intermediate", "Advanced"], }, Stampede3: { name: "Stampede3", organization: "TACC (Texas Advanced Computing Center)", description: "Leadership-class supercomputer with NVIDIA Grace-Hopper", strengths: ["Leadership Computing", "AI/ML", "Large-Scale Simulation"], gpu_types: ["NVIDIA Grace Hopper Superchip"], max_memory_per_node: "480 GB (Grace-Hopper)", user_interfaces: ["SSH", "TACC Portal"], ideal_for: [ "Grand challenge problems", "Large-scale AI training", "Extreme-scale simulations", "Leadership-class workloads", ], experience_level: ["Advanced", "Expert"], }, Jetstream2: { name: "Jetstream2", organization: "Indiana University", description: "Cloud computing platform for interactive science and gateways", strengths: ["Cloud Computing", "Interactive Computing", "Science Gateways", "Flexibility"], gpu_types: ["NVIDIA A100"], user_interfaces: ["Exosphere (web)", "OpenStack API", "Jupyter", "RStudio"], ideal_for: [ "Science gateways", "Web services", "Interactive analysis", "Classroom use", "Container-based workflows", ], experience_level: ["Beginner", "Intermediate"], }, "Open Science Grid": { name: "Open Science Grid", organization: "OSG Consortium", description: "High-throughput computing for distributed workflows", strengths: ["High-Throughput Computing", "Distributed Computing", "Workflows"], user_interfaces: ["HTCondor", "APIs"], ideal_for: [ "High-throughput workflows", "Parameter sweeps", "Embarrassingly parallel tasks", "Large-scale ensembles", ], experience_level: ["Intermediate", "Advanced"], }, }; /** * Memory requirements guide */ export const MEMORY_REQUIREMENTS = { "< 64 GB": { description: "Standard memory for most applications", typical_uses: ["Serial applications", "Small datasets", "Code development"], recommended_systems: ["Anvil", "Bridges-2", "Expanse"], }, "64-256 GB": { description: "Medium memory for moderate-scale applications", typical_uses: ["Parallel applications", "Medium datasets", "Ensemble runs"], recommended_systems: ["Delta", "Anvil", "Bridges-2", "Expanse"], }, "256-512 GB": { description: "High memory for large-scale applications", typical_uses: ["Large genomic assemblies", "Big data analytics", "In-memory databases"], recommended_systems: ["Bridges-2", "Anvil"], }, "> 512 GB": { description: "Extreme memory for the largest problems", typical_uses: ["Whole genome assembly", "Extreme-scale graph analytics", "Very large matrices"], recommended_systems: ["Bridges-2 (up to 4TB)"], }, }; /** * GPU selection guide based on use case */ export const GPU_SELECTION_GUIDE = { "Large Language Models": { recommended_gpu: "NVIDIA A100 80GB or Grace-Hopper", recommended_systems: ["Delta", "Stampede3"], min_memory: "80 GB per GPU", notes: "Multi-GPU required for models >10B parameters", }, "Computer Vision": { recommended_gpu: "NVIDIA A100 40GB or V100", recommended_systems: ["Delta", "Bridges-2", "Expanse"], min_memory: "16-40 GB per GPU", notes: "A100 recommended for training, V100 sufficient for inference", }, "Molecular Dynamics": { recommended_gpu: "NVIDIA A100 or V100", recommended_systems: ["Delta", "Bridges-2", "Expanse"], min_memory: "16-40 GB per GPU", notes: "Double-precision performance important", }, "General AI/ML": { recommended_gpu: "NVIDIA V100 or A100", recommended_systems: ["Delta", "Bridges-2", "Expanse", "Jetstream2"], min_memory: "16-40 GB per GPU", notes: "V100 good for most workloads, A100 for larger models", }, };