cuda-wasm
Version:
High-performance CUDA to WebAssembly/WebGPU transpiler with Rust safety - Run GPU kernels in browsers and Node.js
172 lines (139 loc) • 5.44 kB
JavaScript
// CUDA Parser - Parses CUDA code and extracts kernel information
const fs = require('fs');
class CudaParser {
constructor() {
this.kernels = [];
this.globalFunctions = [];
this.deviceFunctions = [];
}
parse(cudaCode) {
// Reset state
this.kernels = [];
this.globalFunctions = [];
this.deviceFunctions = [];
// Extract kernels
const kernelRegex = /__global__\s+(\w+\s+)?(\w+)\s*\(([^)]*)\)\s*{/g;
let match;
while ((match = kernelRegex.exec(cudaCode)) !== null) {
const returnType = match[1] ? match[1].trim() : 'void';
const kernelName = match[2];
const params = this.parseParameters(match[3]);
// Find the kernel body
const startIdx = match.index + match[0].length;
const body = this.extractBody(cudaCode, startIdx);
this.kernels.push({
name: kernelName,
returnType,
parameters: params,
body,
attributes: this.extractAttributes(cudaCode, match.index)
});
}
// Extract device functions
const deviceRegex = /__device__\s+(\w+\s+)?(\w+)\s*\(([^)]*)\)\s*{/g;
while ((match = deviceRegex.exec(cudaCode)) !== null) {
const returnType = match[1] ? match[1].trim() : 'void';
const funcName = match[2];
const params = this.parseParameters(match[3]);
const startIdx = match.index + match[0].length;
const body = this.extractBody(cudaCode, startIdx);
this.deviceFunctions.push({
name: funcName,
returnType,
parameters: params,
body
});
}
return {
kernels: this.kernels,
deviceFunctions: this.deviceFunctions,
globalFunctions: this.globalFunctions
};
}
parseParameters(paramStr) {
if (!paramStr.trim()) return [];
const params = [];
const paramParts = paramStr.split(',');
for (const param of paramParts) {
const trimmed = param.trim();
if (!trimmed) continue;
// Match type and name
const match = trimmed.match(/^(.+?)\s+(\w+)$/);
if (match) {
params.push({
type: match[1].trim(),
name: match[2]
});
}
}
return params;
}
extractBody(code, startIdx) {
let braceCount = 1;
let i = startIdx;
let body = '';
while (i < code.length && braceCount > 0) {
const char = code[i];
if (char === '{') braceCount++;
else if (char === '}') braceCount--;
if (braceCount > 0) {
body += char;
}
i++;
}
return body.trim();
}
extractAttributes(code, kernelStart) {
const attributes = [];
// Look for launch bounds
const launchBoundsRegex = /__launch_bounds__\s*\((\d+)(?:,\s*(\d+))?\)/;
const beforeKernel = code.substring(Math.max(0, kernelStart - 200), kernelStart);
const launchMatch = beforeKernel.match(launchBoundsRegex);
if (launchMatch) {
attributes.push({
type: 'launch_bounds',
maxThreads: parseInt(launchMatch[1]),
minBlocks: launchMatch[2] ? parseInt(launchMatch[2]) : null
});
}
return attributes;
}
analyzeKernel(kernel) {
const analysis = {
name: kernel.name,
complexity: 'low',
memoryPattern: 'unknown',
threadUtilization: 0,
sharedMemoryUsage: 0,
registerUsage: 0,
suggestions: []
};
// Analyze memory access patterns
if (kernel.body.includes('threadIdx.x') || kernel.body.includes('blockIdx.x')) {
analysis.memoryPattern = 'coalesced';
analysis.threadUtilization = 80;
}
// Check for shared memory usage
if (kernel.body.includes('__shared__')) {
analysis.sharedMemoryUsage = 1024; // placeholder
analysis.suggestions.push('Shared memory detected - ensure bank conflicts are minimized');
}
// Analyze complexity
const loopCount = (kernel.body.match(/for\s*\(/g) || []).length;
const ifCount = (kernel.body.match(/if\s*\(/g) || []).length;
if (loopCount > 2 || ifCount > 3) {
analysis.complexity = 'high';
} else if (loopCount > 0 || ifCount > 0) {
analysis.complexity = 'medium';
}
// Add suggestions based on patterns
if (!kernel.body.includes('__syncthreads()') && kernel.body.includes('__shared__')) {
analysis.suggestions.push('Consider adding __syncthreads() when using shared memory');
}
if (kernel.body.includes('malloc') || kernel.body.includes('new')) {
analysis.suggestions.push('Dynamic memory allocation in kernels can hurt performance');
}
return analysis;
}
}
module.exports = CudaParser;