UNPKG

greed.js

Version:

Run Python libraries in the browser with WebGPU acceleration - PyTorch, NumPy, and more. Modular architecture with full backward compatibility.

github.com/adityakhalkar/greed

adityakhalkar/greed

607 lines (507 loc) • 18.6 kB

JavaScript

/** * WebGPU Compute Engine - High-performance tensor operations with GPU acceleration * Refactored from monolithic implementation for better modularity and performance */ import EventEmitter from '../../core/event-emitter.js'; import BufferManager from './buffer-manager.js'; import PipelineCache from './pipeline-cache.js'; import logger from '../../utils/logger.js'; class WebGPUComputeEngine extends EventEmitter { constructor(options = {}) { super(); this.config = { powerPreference: options.powerPreference || 'high-performance', enableProfiling: options.enableProfiling !== false, maxBufferSize: options.maxBufferSize || 256 * 1024 * 1024, // 256MB workgroupSize: options.workgroupSize || [64, 1, 1], enableValidation: options.enableValidation !== false, ...options }; // Core WebGPU resources this.adapter = null; this.device = null; this.isInitialized = false; // Modular components this.bufferManager = null; this.pipelineCache = null; // Feature support this.supportedFeatures = new Set(); this.limits = null; // Performance tracking this.stats = { computeOperations: 0, totalExecutionTime: 0, averageExecutionTime: 0, memoryUsage: 0, lastOperationTime: 0 }; } /** * Initialize WebGPU device and components */ async initialize() { if (this.isInitialized) { return true; } try { this.emit('init:start'); // Check WebGPU support if (!navigator.gpu) { throw new Error('WebGPU not supported in this browser'); } // Request adapter this.adapter = await navigator.gpu.requestAdapter({ powerPreference: this.config.powerPreference }); if (!this.adapter) { throw new Error('Failed to get WebGPU adapter'); } // Get device features and limits this.supportedFeatures = this.adapter.features; this.limits = this.adapter.limits; this.emit('init:adapter', { features: Array.from(this.supportedFeatures), limits: this.limits }); // Request device with required features const deviceDescriptor = { requiredFeatures: [], requiredLimits: {} }; // Add optional features if supported if (this.supportedFeatures.has('timestamp-query')) { deviceDescriptor.requiredFeatures.push('timestamp-query'); } this.device = await this.adapter.requestDevice(deviceDescriptor); // Set up comprehensive error handling this.device.addEventListener('uncapturederror', (event) => { const error = event.error; this.emit('device:error', { error, type: 'uncaptured', timestamp: Date.now() }); logger.error('WebGPU uncaptured error:', { type: error.constructor.name, message: error.message, stack: error.stack }); // Attempt recovery for recoverable errors this._handleDeviceError(error); }); // Initialize modular components this.bufferManager = new BufferManager(this.device, { maxBufferSize: this.config.maxBufferSize, enablePooling: true, maxPoolSize: 100 }); this.pipelineCache = new PipelineCache(this.device, { maxCacheSize: 50, enableWarmup: true, shaderOptimization: 'balanced' }); // Set up event forwarding this._setupEventForwarding(); // Warmup common operations await this.pipelineCache.warmup(); this.isInitialized = true; this.emit('init:complete', { device: this.device, features: Array.from(this.supportedFeatures) }); return true; } catch (error) { this.emit('init:error', { error, timestamp: Date.now() }); logger.error('WebGPU initialization failed:', { type: error.constructor.name, message: error.message, stack: error.stack, config: this.config }); // Set failure state for debugging this.isInitialized = false; this.initFailureReason = error.message; return false; } } /** * Execute tensor operation on GPU */ async execute(operation, tensors, options = {}) { if (!this.isInitialized) { throw new Error('WebGPU compute engine not initialized'); } const startTime = performance.now(); this.emit('compute:start', { operation, options }); try { // Validate inputs this._validateOperation(operation, tensors, options); // Get optimal workgroup size for this operation const tensorArray = Array.isArray(tensors) ? tensors : [tensors]; const optimalWorkgroupSize = this.pipelineCache.getOptimalWorkgroupSize( operation, tensorArray[0].shape || [tensorArray[0].length], this.limits ); // Get compute pipeline const pipeline = await this.pipelineCache.get(operation, { workgroupSize: options.workgroupSize || optimalWorkgroupSize, dataType: options.dataType || 'f32', inputCount: tensorArray.length, outputCount: options.outputCount || 1, ...options }); // Prepare buffers const buffers = await this._prepareBuffers(tensors, operation, options); // Create bind group const bindGroup = this._createBindGroup(pipeline, buffers, options); // Execute compute pass const result = await this._executeComputePass(pipeline, bindGroup, buffers, options); // Update statistics const executionTime = performance.now() - startTime; this._updateStats(operation, executionTime, buffers); this.emit('compute:complete', { operation, executionTime, resultSize: result.length }); return result; } catch (error) { const executionTime = performance.now() - startTime; // Enhanced error handling with context const errorContext = { operation, error: { type: error.constructor.name, message: error.message, stack: error.stack }, executionTime, tensors: Array.isArray(tensors) ? tensors.length : 1, options, deviceStable: this.deviceStable ?? true, timestamp: Date.now() }; this.emit('compute:error', errorContext); logger.error('WebGPU compute operation failed:', errorContext); // Attempt recovery for specific error types if (error.message.includes('out of memory') || error.constructor.name === 'GPUOutOfMemoryError') { logger.warn('GPU memory exhausted, attempting emergency cleanup'); await this.bufferManager.emergencyCleanup(); this.emit('recovery:memory', { operation, timestamp: Date.now() }); } throw error; } } /** * Execute batch of operations efficiently */ async executeBatch(operations, options = {}) { const { parallel = false, maxConcurrency = 4 } = options; if (parallel) { // Execute operations in parallel with concurrency limit const semaphore = new Semaphore(maxConcurrency); const promises = operations.map(async (op) => { await semaphore.acquire(); try { return await this.execute(op.operation, op.tensors, op.options); } finally { semaphore.release(); } }); return Promise.all(promises); } else { // Execute operations sequentially const results = []; for (const op of operations) { const result = await this.execute(op.operation, op.tensors, op.options); results.push(result); } return results; } } /** * Copy tensor data to GPU buffer */ async uploadTensor(data, options = {}) { const { usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST } = options; return this.bufferManager.createMappedBuffer(data, usage); } /** * Download tensor data from GPU buffer */ async downloadTensor(buffer, size, options = {}) { const { format = Float32Array } = options; // Create staging buffer for readback const stagingBuffer = this.bufferManager.allocate( size * 4, // Assuming 32-bit floats GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ ); try { // Copy GPU buffer to staging buffer const encoder = this.device.createCommandEncoder(); encoder.copyBufferToBuffer(buffer, 0, stagingBuffer, 0, size * 4); this.device.queue.submit([encoder.finish()]); // Map and read data await stagingBuffer.mapAsync(GPUMapMode.READ); const mappedRange = stagingBuffer.getMappedRange(); const result = new format(mappedRange.slice()); stagingBuffer.unmap(); return result; } finally { this.bufferManager.release(stagingBuffer, { forceDestroy: true }); } } /** * Get engine statistics */ getStats() { return { ...this.stats, bufferStats: this.bufferManager?.getStats() || {}, pipelineStats: this.pipelineCache?.getStats() || {}, deviceLimits: this.limits, supportedFeatures: Array.from(this.supportedFeatures || []) }; } /** * Cleanup all resources */ async cleanup() { this.emit('cleanup:start'); try { if (this.bufferManager) { await this.bufferManager.cleanup(); this.bufferManager = null; } if (this.pipelineCache) { this.pipelineCache.cleanup(); this.pipelineCache = null; } if (this.device) { this.device.destroy(); this.device = null; } this.adapter = null; this.isInitialized = false; this.emit('cleanup:complete'); } catch (error) { this.emit('cleanup:error', { error }); throw error; } } // Private methods _validateOperation(operation, tensors, options) { if (!operation || typeof operation !== 'string') { throw new Error('Operation must be a non-empty string'); } if (!tensors) { throw new Error('Tensors parameter is required'); } const tensorArray = Array.isArray(tensors) ? tensors : [tensors]; for (const tensor of tensorArray) { if (!tensor || (!ArrayBuffer.isView(tensor) && !(tensor instanceof ArrayBuffer))) { throw new Error('All tensors must be typed arrays or ArrayBuffers'); } } } async _prepareBuffers(tensors, operation, options) { const tensorArray = Array.isArray(tensors) ? tensors : [tensors]; const buffers = { inputs: [], output: null, params: null }; // Upload input tensors for (const tensor of tensorArray) { const buffer = await this.uploadTensor(tensor); buffers.inputs.push(buffer); } // Create output buffer const outputSize = this._calculateOutputSize(operation, tensorArray, options); buffers.output = this.bufferManager.allocate( outputSize * 4, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC ); // Create parameter buffer using WebGPU shaders helper const params = this.pipelineCache.generateOperationParams(operation, tensorArray, options); buffers.params = await this.uploadTensor(params, { usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST }); return buffers; } _createBindGroup(pipeline, buffers, options) { const entries = []; // Add input buffers for (let i = 0; i < buffers.inputs.length; i++) { entries.push({ binding: i, resource: { buffer: buffers.inputs[i] } }); } // Add output buffer entries.push({ binding: buffers.inputs.length, resource: { buffer: buffers.output } }); // Add parameter buffer entries.push({ binding: buffers.inputs.length + 1, resource: { buffer: buffers.params } }); return this.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), entries }); } async _executeComputePass(pipeline, bindGroup, buffers, options) { const encoder = this.device.createCommandEncoder(); const computePass = encoder.beginComputePass(); computePass.setPipeline(pipeline); computePass.setBindGroup(0, bindGroup); // Calculate dispatch size const workgroupSize = options.workgroupSize || this.config.workgroupSize; const outputSize = buffers.output.size / 4; // Assuming 32-bit floats const dispatchSize = Math.ceil(outputSize / workgroupSize[0]); computePass.dispatchWorkgroups(dispatchSize, 1, 1); computePass.end(); this.device.queue.submit([encoder.finish()]); // Wait for completion await this.device.queue.onSubmittedWorkDone(); // Download result return this.downloadTensor(buffers.output, outputSize); } _calculateOutputSize(operation, tensors, options) { if (options.outputSize) { return options.outputSize; } const firstTensor = tensors[0]; const getElementCount = (tensor) => { return ArrayBuffer.isView(tensor) ? tensor.length : tensor.byteLength / 4; }; // Operation-specific output size calculation switch (operation) { case 'matmul': // A(M,K) × B(K,N) = C(M,N) const M = tensors[0].shape?.[0] || Math.sqrt(getElementCount(tensors[0])); const N = tensors[1].shape?.[1] || Math.sqrt(getElementCount(tensors[1])); return M * N; case 'bmm': // Batch matrix multiply: A(B,M,K) × B(B,K,N) = C(B,M,N) const B = tensors[0].shape?.[0] || 1; const bM = tensors[0].shape?.[1] || Math.sqrt(getElementCount(tensors[0]) / B); const bN = tensors[1].shape?.[2] || Math.sqrt(getElementCount(tensors[1]) / B); return B * bM * bN; case 'conv2d': // Simplified - assumes same padding and stride=1 const inHeight = tensors[0].shape?.[2] || 28; const inWidth = tensors[0].shape?.[3] || 28; const outChannels = tensors[1].shape?.[0] || 32; const batchSize = tensors[0].shape?.[0] || 1; return batchSize * outChannels * inHeight * inWidth; case 'transpose': return getElementCount(firstTensor); case 'sum': case 'mean': // Reduction operations output a single value per batch/dimension return options.keepDim ? getElementCount(firstTensor) : 1; case 'softmax': return getElementCount(firstTensor); case 'maxpool2d': case 'avgpool2d': // Simplified pooling calculation const poolKernel = options.kernelSize || 2; const poolStride = options.stride || poolKernel; const poolInH = tensors[0].shape?.[2] || 28; const poolInW = tensors[0].shape?.[3] || 28; const poolOutH = Math.floor((poolInH - poolKernel) / poolStride) + 1; const poolOutW = Math.floor((poolInW - poolKernel) / poolStride) + 1; const poolChannels = tensors[0].shape?.[1] || 1; const poolBatch = tensors[0].shape?.[0] || 1; return poolBatch * poolChannels * poolOutH * poolOutW; default: // Element-wise operations preserve input size return getElementCount(firstTensor); } } _updateStats(operation, executionTime, buffers) { this.stats.computeOperations++; this.stats.totalExecutionTime += executionTime; this.stats.averageExecutionTime = this.stats.totalExecutionTime / this.stats.computeOperations; this.stats.lastOperationTime = executionTime; const bufferMemory = buffers.inputs.reduce((sum, buf) => sum + buf.size, 0) + buffers.output.size; this.stats.memoryUsage = Math.max(this.stats.memoryUsage, bufferMemory); } _setupEventForwarding() { // Forward buffer manager events this.bufferManager.on('buffer:created', (data) => this.emit('buffer:created', data)); this.bufferManager.on('buffer:destroyed', (data) => this.emit('buffer:destroyed', data)); this.bufferManager.on('gc:complete', (data) => this.emit('buffer:gc', data)); // Forward pipeline cache events this.pipelineCache.on('cache:miss', (data) => this.emit('pipeline:miss', data)); this.pipelineCache.on('pipeline:compiled', (data) => this.emit('pipeline:compiled', data)); this.pipelineCache.on('warmup:complete', (data) => this.emit('pipeline:warmup', data)); } /** * Handle device errors with recovery attempts */ _handleDeviceError(error) { const errorType = error.constructor.name; switch (errorType) { case 'GPUOutOfMemoryError': logger.warn('GPU out of memory, attempting buffer cleanup'); this.bufferManager.emergencyCleanup(); this.emit('recovery:attempt', { type: 'memory-cleanup', timestamp: Date.now() }); break; case 'GPUInternalError': logger.warn('GPU internal error, marking device as potentially unstable'); this.deviceStable = false; this.emit('device:unstable', { reason: 'internal-error', timestamp: Date.now() }); break; case 'GPUValidationError': logger.warn('GPU validation error, this may indicate shader or pipeline issues'); this.emit('validation:error', { error, timestamp: Date.now() }); break; default: logger.warn('Unknown GPU error type:', errorType); this.emit('error:unknown', { error, timestamp: Date.now() }); } } /** * Get comprehensive error diagnostics */ getErrorDiagnostics() { return { isInitialized: this.isInitialized, deviceStable: this.deviceStable ?? true, initFailureReason: this.initFailureReason || null, bufferStats: this.bufferManager?.getStats() || null, pipelineStats: this.pipelineCache?.getStats() || null, supportedFeatures: Array.from(this.supportedFeatures || []), timestamp: Date.now() }; } } // Simple semaphore for concurrency control class Semaphore { constructor(max) { this.max = max; this.current = 0; this.queue = []; } async acquire() { return new Promise((resolve) => { if (this.current < this.max) { this.current++; resolve(); } else { this.queue.push(resolve); } }); } release() { this.current--; if (this.queue.length > 0) { const resolve = this.queue.shift(); this.current++; resolve(); } } } export default WebGPUComputeEngine;