greed.js
Version:
Lightweight, private alternative to Colab. Run PyTorch & NumPy in browser with GPU acceleration (8.8x speedup). Fast, secure, runs locally.
1,157 lines (977 loc) • 322 kB
JavaScript
/**
* PyTorch Runtime Polyfill - Extracted from main thread for better performance
* Provides PyTorch-compatible API with WebGPU acceleration support
*/
/**
* Initialize PyTorch polyfill in Python runtime
*/
export function createPyTorchPolyfill() {
return `
# WebGPU-enabled PyTorch polyfill setup
import numpy as np
import sys
# Global gradient tracking state
_grad_enabled = True
def is_grad_enabled():
"""Check if gradient computation is currently enabled"""
global _grad_enabled
return _grad_enabled
def set_grad_enabled(mode):
"""Enable or disable gradient computation globally"""
global _grad_enabled
prev = _grad_enabled
_grad_enabled = mode
return prev
class WebGPUDevice:
def __init__(self, device_type, **kwargs):
self.type = device_type
def __str__(self):
return self.type
def __repr__(self):
return f"device(type='{self.type}')"
class WebGPUTensor:
def __init__(self, data, device='cpu', dtype='float32', requires_grad=False, _force_webgpu=False, _internal=False, **kwargs):
if isinstance(data, (list, tuple)):
self._data = np.array(data, dtype=dtype)
elif isinstance(data, np.ndarray):
self._data = data.astype(dtype)
else:
self._data = np.array(data, dtype=dtype)
# Determine actual device based on tensor size and WebGPU availability
self._original_device = device
self._force_webgpu = _force_webgpu
# WebGPU auto-detection with recursion prevention
# Only auto-detect for user-facing tensor creation (not internal operations)
if device == 'webgpu' or _internal:
# Explicitly requested webgpu or internal operation - use as-is
self.device = device if isinstance(device, str) else device
elif _force_webgpu or (device in ['cuda', 'gpu']):
# Map CUDA/GPU requests to WebGPU
self.device = 'webgpu'
elif device == 'cpu':
# Explicitly requested CPU - respect that
self.device = device
else:
# Auto-detect for user-facing tensor creation
if self._should_use_webgpu(self._data):
self.device = 'webgpu'
else:
self.device = device if isinstance(device, str) else device
self.dtype = dtype
# Only enable gradient tracking if globally enabled and explicitly requested
self.requires_grad = requires_grad and is_grad_enabled()
self.shape = self._data.shape
self.ndim = self._data.ndim
self.grad = None
self.grad_fn = None
# GPU ACCELERATION: Allocate GPU buffer if on webgpu device
self._gpu_buffer_id = None
# Debug: Check each condition
device_str = str(self.device)
print(f"[GPU DEBUG] device={self.device}, device_str={device_str}, is_webgpu={device_str == 'webgpu'}, has_allocate={'__webgpu_allocate__' in globals()}, _internal={_internal}")
if device_str == 'webgpu' and '__webgpu_allocate__' in globals() and not _internal:
try:
# Allocate buffer on GPU for faster operations
print(f"[GPU] Calling __webgpu_allocate__ with shape={list(self.shape)}, dtype={self.dtype}")
self._gpu_buffer_id = __webgpu_allocate__(
self._data.tolist(), # Convert numpy to Python list for JS
list(self.shape),
self.dtype
)
print(f"[GPU] Allocated buffer ID: {self._gpu_buffer_id}")
except Exception as e:
# Log error and fallback to CPU if GPU allocation fails
print(f"[GPU] Allocation failed: {str(e)}")
import traceback
traceback.print_exc()
@property
def data(self):
"""Return data wrapped with PyTorch-like methods"""
return TensorDataWrapper(self._data, self)
@property
def is_cpu(self):
"""Check if tensor is on CPU"""
return self.device == 'cpu'
@property
def is_cuda(self):
"""Check if tensor is on CUDA (WebGPU in our case)"""
return self.device in ['cuda', 'webgpu', 'gpu']
@property
def T(self):
"""Transpose property - returns transposed view for 2D tensors"""
if self.ndim == 2:
return self.transpose(0, 1)
elif self.ndim == 1:
# For 1D tensors, T returns the tensor unchanged (like PyTorch)
return self
else:
raise RuntimeError(f"T property expects a 1D or 2D tensor, but got {self.ndim}D")
def is_contiguous(self):
"""Check if tensor is contiguous in memory.
In our simplified implementation, tensors are always contiguous."""
return True
def contiguous(self):
"""Return a contiguous tensor.
In our implementation, tensors are always contiguous, so return self."""
return self
def t(self):
"""Transpose 2D tensor (shorthand for transpose(0, 1))"""
if self.ndim != 2:
raise RuntimeError(f"t() expects a 2D tensor, but got {self.ndim}D")
return self.transpose(0, 1)
def size(self, dim=None):
"""Return the size of the tensor or a specific dimension"""
if dim is None:
return self.shape
else:
if dim < 0:
dim = self.ndim + dim
if dim >= self.ndim or dim < 0:
raise IndexError(f"Dimension out of range (expected to be in range of [{-self.ndim}, {self.ndim-1}], but got {dim})")
return self.shape[dim]
def numel(self):
"""Return the total number of elements in the tensor"""
return self._data.size
def dim(self):
"""Return the number of dimensions of the tensor (method call)"""
return self.ndim
# For compatibility: some code might access .dim without calling it
# We already have the dim() method above, but this helps with edge cases
# In real PyTorch, dim is ONLY a method, never an attribute
def _should_use_webgpu(self, data):
\"\"\"Determine if WebGPU should be used based on tensor characteristics\"\"\"
try:
# Use WebGPU for tensors with more than 1000 elements for optimal performance
# Smaller tensors are faster on CPU due to GPU overhead
if hasattr(data, 'size'):
return data.size >= 1000
elif hasattr(data, '__len__'):
# For nested structures, estimate total size
total_size = 1
def estimate_size(obj):
if hasattr(obj, '__len__'):
return len(obj) * estimate_size(obj[0] if len(obj) > 0 else 1)
return 1
return estimate_size(data) >= 1000
return False
except:
return False
def _sync_from_gpu(self):
"""Sync GPU data to CPU when needed (lazy sync)"""
if hasattr(self, '_gpu_only') and self._gpu_only and self._gpu_buffer_id is not None:
try:
if '__webgpu_read__' in globals():
# Read from GPU
result_data_flat = __webgpu_read__(self._gpu_buffer_id)
self._data = result_data_flat.reshape(self.shape)
self._gpu_only = False # Data now synced
except Exception as e:
print(f"[GPU] Sync from GPU failed: {str(e)}")
def numpy(self):
self._sync_from_gpu() # Sync if GPU-only
return self._data
def tolist(self):
self._sync_from_gpu() # Sync if GPU-only
return self._data.tolist()
def __str__(self):
"""String representation of tensor"""
self._sync_from_gpu() # Sync if GPU-only
result = f"tensor({self._data.tolist()}, requires_grad={self.requires_grad})"
return result
def __repr__(self):
"""Detailed representation of tensor"""
return f"WebGPUTensor({self._data}, device='{self.device}', requires_grad={self.requires_grad}, _internal=True)"
def item(self):
"""Return the value of this tensor as a standard Python number"""
if self._data.size == 1:
value = self._data.item()
# Ensure we return proper Python types that can be used as indices
if self.dtype in ['int32', 'int64', 'long']:
return int(value)
elif self.dtype in ['float32', 'float64', 'double']:
return float(value)
else:
# For other types, try to convert appropriately
if isinstance(value, (int, np.integer)):
return int(value)
elif isinstance(value, (float, np.floating)):
return float(value)
else:
return value
else:
raise ValueError("only one element tensors can be converted to Python scalars")
def __format__(self, format_spec):
"""Support for f-string formatting"""
if self._data.size == 1:
return format(self._data.item(), format_spec)
else:
return format(str(self), format_spec)
def view(self, *shape):
"""Reshape tensor maintaining data"""
if len(shape) == 1 and isinstance(shape[0], (list, tuple)):
shape = shape[0]
# Handle -1 for automatic size calculation
if -1 in shape:
total_size = self._data.size
known_size = 1
unknown_idx = -1
for i, s in enumerate(shape):
if s == -1:
unknown_idx = i
else:
known_size *= s
if unknown_idx != -1:
shape = list(shape)
shape[unknown_idx] = total_size // known_size
shape = tuple(shape)
reshaped_data = self._data.reshape(shape)
return WebGPUTensor(reshaped_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def reshape(self, *shape):
return self.view(*shape)
def transpose(self, dim0, dim1):
transposed_data = np.swapaxes(self._data, dim0, dim1)
return WebGPUTensor(transposed_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def t(self):
"""Transpose a 2D tensor (shorthand for transpose(0, 1))"""
if self.ndim != 2:
raise RuntimeError(f"t() expects a 2D tensor, but got {self.ndim}D tensor")
return self.transpose(0, 1)
def unsqueeze(self, dim):
"""Add a dimension of size 1"""
new_shape = list(self._data.shape)
if dim < 0:
dim = len(new_shape) + dim + 1
new_shape.insert(dim, 1)
reshaped_data = self._data.reshape(new_shape)
return WebGPUTensor(reshaped_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def flatten(self, start_dim=0, end_dim=-1):
"""Flatten tensor dimensions"""
if end_dim == -1:
end_dim = self._data.ndim - 1
shape = list(self._data.shape)
flattened_size = 1
for i in range(start_dim, end_dim + 1):
flattened_size *= shape[i]
new_shape = shape[:start_dim] + [flattened_size] + shape[end_dim + 1:]
flattened_data = self._data.reshape(new_shape)
return WebGPUTensor(flattened_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def squeeze(self, dim=None):
"""Remove dimensions of size 1"""
if dim is None:
# Remove all dimensions of size 1
squeezed_data = np.squeeze(self._data)
else:
# Remove specific dimension if it has size 1
if dim < 0:
dim = self._data.ndim + dim
if self._data.shape[dim] != 1:
return self # No change if dimension is not size 1
squeezed_data = np.squeeze(self._data, axis=dim)
return WebGPUTensor(squeezed_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def clone(self):
"""Create a copy of the tensor with gradient tracking preserved.
The cloned tensor shares no storage with the original tensor but preserves
requires_grad and creates a new node in the computation graph if applicable.
"""
cloned_data = self._data.copy()
cloned_tensor = WebGPUTensor(
cloned_data,
device=self.device,
dtype=self.dtype,
requires_grad=self.requires_grad
)
# If original tensor has gradient tracking, set up backward function for clone
if self.requires_grad:
def clone_backward(grad_output):
# Gradient flows back to original tensor unchanged
if self.grad is None:
self.grad = grad_output
else:
self.grad._data += grad_output._data
cloned_tensor._backward_fn = clone_backward
cloned_tensor._inputs = [self]
return cloned_tensor
def detach(self):
"""Create a copy of the tensor that is detached from the computation graph.
The detached tensor will never require gradient and breaks the gradient flow.
Returns a new tensor with the same data but requires_grad=False.
"""
detached_data = self._data.copy()
detached_tensor = WebGPUTensor(
detached_data,
device=self.device,
dtype=self.dtype,
requires_grad=False # Always False for detached tensors
, _internal=True)
return detached_tensor
def sum(self, dim=None, keepdim=False):
if dim is None:
result_data = np.sum(self._data)
else:
result_data = np.sum(self._data, axis=dim, keepdims=keepdim)
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad)
# Set up autograd for sum
if result.requires_grad:
result.grad_fn = 'SumBackward'
result._inputs = [self]
def sum_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient of sum: broadcast the gradient back to input shape
if dim is None:
# Sum over all dimensions - broadcast gradient to all elements
self.grad._data += grad._data * np.ones_like(self._data)
else:
# Sum over specific dimension - broadcast along that dimension
grad_data = grad._data if hasattr(grad, '_data') else grad
if not keepdim:
# Need to add the dimension back for broadcasting
grad_data = np.expand_dims(grad_data, axis=dim)
self.grad._data += np.broadcast_to(grad_data, self._data.shape)
result._backward_fn = sum_backward
return result
def mean(self, dim=None, keepdim=False):
if dim is None:
result_data = np.mean(self._data)
else:
result_data = np.mean(self._data, axis=dim, keepdims=keepdim)
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad)
# Set up autograd for mean
if result.requires_grad:
result.grad_fn = 'MeanBackward'
result._inputs = [self]
def mean_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient of mean: broadcast and divide by number of elements
if dim is None:
# Mean over all dimensions
n = self._data.size
self.grad._data += (grad._data / n) * np.ones_like(self._data)
else:
# Mean over specific dimension
grad_data = grad._data if hasattr(grad, '_data') else grad
if not keepdim:
grad_data = np.expand_dims(grad_data, axis=dim)
n = self._data.shape[dim]
self.grad._data += np.broadcast_to(grad_data / n, self._data.shape)
result._backward_fn = mean_backward
return result
def std(self, dim=None, keepdim=False, unbiased=True):
"""Compute standard deviation"""
if dim is None:
result_data = np.std(self._data, ddof=1 if unbiased else 0)
else:
result_data = np.std(self._data, axis=dim, keepdims=keepdim, ddof=1 if unbiased else 0)
return WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def var(self, dim=None, keepdim=False, unbiased=True):
"""Compute variance"""
if dim is None:
result_data = np.var(self._data, ddof=1 if unbiased else 0)
else:
result_data = np.var(self._data, axis=dim, keepdims=keepdim, ddof=1 if unbiased else 0)
return WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def tanh(self):
"""Hyperbolic tangent activation - tensor method"""
result_data = np.tanh(self._data)
return WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def abs(self):
"""Absolute value - tensor method"""
result_data = np.abs(self._data)
return WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def all(self):
"""Test if all elements are True"""
result_data = np.all(self._data)
return result_data
def max(self, dim=None, keepdim=False):
"""Maximum values along a dimension"""
if dim is None:
result_data = np.max(self._data)
return WebGPUTensor([result_data], device="webgpu", dtype=self.dtype, _internal=True)
else:
max_values = np.max(self._data, axis=dim, keepdims=keepdim)
max_indices = np.argmax(self._data, axis=dim)
if keepdim:
max_indices = np.expand_dims(max_indices, axis=dim)
values_tensor = WebGPUTensor(max_values, device="webgpu", dtype=self.dtype, _internal=True)
indices_tensor = WebGPUTensor(max_indices, device="webgpu", dtype='int64', _internal=True)
return values_tensor, indices_tensor
def min(self, dim=None, keepdim=False):
"""Minimum values along a dimension"""
if dim is None:
result_data = np.min(self._data)
return WebGPUTensor([result_data], device="webgpu", dtype=self.dtype, _internal=True)
else:
min_values = np.min(self._data, axis=dim, keepdims=keepdim)
min_indices = np.argmin(self._data, axis=dim)
if keepdim:
min_indices = np.expand_dims(min_indices, axis=dim)
values_tensor = WebGPUTensor(min_values, device="webgpu", dtype=self.dtype, _internal=True)
indices_tensor = WebGPUTensor(min_indices, device="webgpu", dtype='int64', _internal=True)
return values_tensor, indices_tensor
def argmax(self, dim=None, keepdim=False):
"""Indices of maximum values along a dimension"""
if dim is None:
result_data = np.argmax(self._data)
return WebGPUTensor([result_data], device="webgpu", dtype='int64', _internal=True)
else:
result_data = np.argmax(self._data, axis=dim)
if keepdim:
result_data = np.expand_dims(result_data, axis=dim)
return WebGPUTensor(result_data, device="webgpu", dtype='int64', _internal=True)
def argmin(self, dim=None, keepdim=False):
"""Indices of minimum values along a dimension"""
if dim is None:
result_data = np.argmin(self._data)
return WebGPUTensor([result_data], device="webgpu", dtype='int64', _internal=True)
else:
result_data = np.argmin(self._data, axis=dim)
if keepdim:
result_data = np.expand_dims(result_data, axis=dim)
return WebGPUTensor(result_data, device="webgpu", dtype='int64', _internal=True)
def to(self, device):
new_device = WebGPUDevice(device) if isinstance(device, str) else device
# Don't use _internal=True to allow GPU buffer allocation when moving to GPU
return WebGPUTensor(self._data.copy(), device=new_device, dtype=self.dtype, requires_grad=self.requires_grad)
def cpu(self):
return self.to('cpu')
def cuda(self):
return self.to('webgpu') # Map CUDA to WebGPU
def float(self):
"""Convert tensor to float32 dtype"""
return WebGPUTensor(self._data.copy(), device=self.device, dtype='float32', requires_grad=self.requires_grad, _internal=True)
def double(self):
"""Convert tensor to float64 dtype"""
return WebGPUTensor(self._data.copy(), device=self.device, dtype='float64', requires_grad=self.requires_grad, _internal=True)
def int(self):
"""Convert tensor to int32 dtype"""
return WebGPUTensor(self._data.copy(), device=self.device, dtype='int32', requires_grad=self.requires_grad, _internal=True)
def long(self):
"""Convert tensor to int64 dtype"""
return WebGPUTensor(self._data.copy(), device=self.device, dtype='int64', requires_grad=self.requires_grad, _internal=True)
def type_as(self, other):
"""Convert this tensor to the same dtype as other tensor"""
if isinstance(other, WebGPUTensor):
target_dtype = other.dtype
else:
# If other is not a tensor, assume it's float32
target_dtype = 'float32'
return WebGPUTensor(self._data.copy(), device=self.device, dtype=target_dtype, requires_grad=self.requires_grad, _internal=True)
def __getitem__(self, key):
"""Support tensor slicing like X[:, 0] and advanced indexing"""
# Handle advanced indexing with tensor indices
if isinstance(key, tuple):
# Convert WebGPUTensor indices to numpy arrays
converted_key = []
for k in key:
if isinstance(k, WebGPUTensor):
# Convert tensor to numpy array for indexing
converted_key.append(k._data.astype(np.int64))
else:
converted_key.append(k)
key = tuple(converted_key)
# Multi-dimensional indexing
indexed_data = self._data.reshape(self.shape)[key]
elif isinstance(key, WebGPUTensor):
# Single tensor index
indices = key._data.astype(np.int64)
indexed_data = self._data.reshape(self.shape)[indices]
else:
# Single dimension indexing (slice, int, etc.)
indexed_data = self._data.reshape(self.shape)[key]
return WebGPUTensor(indexed_data, device=self.device, dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def backward(self, gradient=None, retain_graph=False, create_graph=False):
"""Backward propagation through the computation graph"""
if not self.requires_grad:
return
# Check if graph still exists
if not hasattr(self, '_backward_fn') or self._backward_fn is None:
if hasattr(self, 'grad_fn') and self.grad_fn is not None:
raise RuntimeError("Trying to backward through the graph a second time (or directly access a leaf Variable that doesn't require grad). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved variables after calling backward.")
if gradient is None:
if self._data.size != 1:
raise RuntimeError("grad can be implicitly created only for scalar outputs")
gradient = WebGPUTensor(np.ones_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Topological sort for DAG-based backward pass
visited = set()
topo_order = []
def build_topo(node):
if id(node) in visited or not isinstance(node, WebGPUTensor):
return
visited.add(id(node))
if hasattr(node, '_inputs'):
for inp in node._inputs:
build_topo(inp)
topo_order.append(node)
build_topo(self)
# Initialize gradient for the output
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Handle gradient parameter properly
if isinstance(gradient, WebGPUTensor):
self.grad._data = gradient._data.copy()
elif hasattr(gradient, '_data'):
self.grad._data = gradient._data.copy()
else:
self.grad._data = np.array(gradient)
# Backward pass in reverse topological order
for node in reversed(topo_order):
if hasattr(node, '_backward_fn') and node._backward_fn and node.grad is not None:
# Call hooks on the gradient before propagating
grad_to_propagate = node._call_hooks(node.grad) if hasattr(node, '_call_hooks') else node.grad
# Pass create_graph flag to backward function if it accepts it
try:
node._backward_fn(grad_to_propagate, create_graph=create_graph)
except TypeError:
# Fallback for backward functions that don't accept create_graph
node._backward_fn(grad_to_propagate)
# Clean up graph if not retaining
if not retain_graph:
for node in topo_order:
if hasattr(node, '_backward_fn'):
node._backward_fn = None
if hasattr(node, '_inputs'):
node._inputs = []
def zero_(self):
"""Zero out the tensor data in-place"""
self._data.fill(0)
return self
def retain_grad(self):
"""Enable gradient retention for non-leaf tensors"""
self._retain_grad = True
return self
def register_hook(self, hook):
"""Register a backward hook on the tensor.
The hook will be called every time a gradient with respect to the tensor is computed.
The hook should have the following signature:
hook(grad) -> Tensor or None
Args:
hook: A function that takes a gradient tensor and optionally returns a modified gradient
Returns:
A handle that can be used to remove the hook by calling handle.remove()
"""
if not hasattr(self, '_hooks'):
self._hooks = []
# Store the hook
self._hooks.append(hook)
# Create a handle for removing the hook
class HookHandle:
def __init__(self, tensor, hook_fn, **kwargs):
self.tensor = tensor
self.hook_fn = hook_fn
def remove(self):
if hasattr(self.tensor, '_hooks') and self.hook_fn in self.tensor._hooks:
self.tensor._hooks.remove(self.hook_fn)
return HookHandle(self, hook)
def _call_hooks(self, grad):
"""Call all registered hooks on the gradient"""
if not hasattr(self, '_hooks') or not self._hooks:
return grad
for hook in self._hooks:
new_grad = hook(grad)
if new_grad is not None:
grad = new_grad
return grad
def __repr__(self):
return f"tensor({self._data}, device='{self.device}', dtype='{self.dtype}')"
def __float__(self):
"""Convert single-element tensor to Python float"""
if self._data.size == 1:
return float(self._data.item())
else:
raise TypeError(f"only single-element tensors can be converted to Python scalars")
def __int__(self):
"""Convert single-element tensor to Python int"""
if self._data.size == 1:
return int(self._data.item())
else:
raise TypeError(f"only single-element tensors can be converted to Python scalars")
def __len__(self):
"""Return the length of the first dimension"""
if self.ndim == 0:
raise TypeError("len() of unsized object")
return self.shape[0]
def __getitem__(self, key):
"""Support tensor indexing like tensor[indices]"""
if isinstance(key, WebGPUTensor):
# Convert WebGPUTensor indices to numpy array
indices = key._data.astype(int)
result_data = self._data[indices]
# In PyTorch, indexing with tensor indices always preserves at least 1 dimension
# even when the index tensor has 1 element
if result_data.ndim == 0:
result_data = np.array([result_data])
else:
result_data = self._data[key]
return WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad, _internal=True)
def __setitem__(self, key, value):
"""Support tensor item assignment like tensor[indices] = value"""
if isinstance(value, WebGPUTensor):
value_data = value._data
else:
value_data = value
if isinstance(key, WebGPUTensor):
# Convert WebGPUTensor indices to numpy array
indices = key._data.astype(int)
self._data[indices] = value_data
else:
self._data[key] = value_data
def eq(self, other):
"""Element-wise equality comparison (returns tensor)"""
if isinstance(other, WebGPUTensor):
result_data = self._data == other._data
else:
result_data = self._data == other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __eq__(self, other):
"""Element-wise equality comparison (returns tensor like PyTorch)"""
if isinstance(other, WebGPUTensor):
result_data = self._data == other._data
else:
result_data = self._data == other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __ne__(self, other):
"""Element-wise not-equal comparison"""
if isinstance(other, WebGPUTensor):
result_data = self._data != other._data
else:
result_data = self._data != other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __gt__(self, other):
"""Element-wise greater than comparison"""
if isinstance(other, WebGPUTensor):
result_data = self._data > other._data
else:
result_data = self._data > other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __lt__(self, other):
"""Element-wise less than comparison"""
if isinstance(other, WebGPUTensor):
result_data = self._data < other._data
else:
result_data = self._data < other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __ge__(self, other):
"""Element-wise greater than or equal comparison"""
if isinstance(other, WebGPUTensor):
result_data = self._data >= other._data
else:
result_data = self._data >= other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __le__(self, other):
"""Element-wise less than or equal comparison"""
if isinstance(other, WebGPUTensor):
result_data = self._data <= other._data
else:
result_data = self._data <= other
return WebGPUTensor(result_data, device="webgpu", dtype='bool', _internal=True)
def __hash__(self):
"""Make tensor hashable for use as dictionary keys.
Uses object identity (id) so each tensor instance is unique."""
return id(self)
# Masked operations
def masked_fill(self, mask, value):
"""Fill elements of self tensor with value where mask is True.
Args:
mask: Boolean tensor with same shape as self
value: Value to fill
Returns:
New tensor with masked elements filled
"""
if isinstance(mask, WebGPUTensor):
mask_data = mask._data
else:
mask_data = np.array(mask)
# Ensure mask is boolean type
mask_data = mask_data.astype(bool)
# Create a copy of the data
result_data = self._data.copy()
# Fill masked positions
result_data[mask_data] = value
result = WebGPUTensor(
result_data,
device=self.device,
dtype=self.dtype,
requires_grad=self.requires_grad
)
# Set up backward function if gradient tracking is enabled
if self.requires_grad:
def masked_fill_backward(grad_output):
# Gradient flows through non-masked elements only
grad_input = grad_output._data.copy()
grad_input[mask_data] = 0 # Zero out gradients for masked positions
grad_tensor = WebGPUTensor(grad_input, device="webgpu", dtype=self.dtype, _internal=True)
if self.grad is None:
self.grad = grad_tensor
else:
self.grad._data += grad_tensor._data
result._backward_fn = masked_fill_backward
result._inputs = [self]
return result
def masked_fill_(self, mask, value):
"""In-place version of masked_fill"""
if isinstance(mask, WebGPUTensor):
mask_data = mask._data
else:
mask_data = np.array(mask)
# Ensure mask is boolean type
mask_data = mask_data.astype(bool)
self._data[mask_data] = value
return self
# Arithmetic operators
def __add__(self, other):
if isinstance(other, WebGPUTensor):
result_data = self._data + other._data
else:
result_data = self._data + other
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype,
requires_grad=self.requires_grad or (isinstance(other, WebGPUTensor) and other.requires_grad))
# Set up autograd
if result.requires_grad:
result.grad_fn = 'AddBackward'
result._inputs = []
if self.requires_grad:
result._inputs.append(self)
if isinstance(other, WebGPUTensor) and other.requires_grad:
result._inputs.append(other)
def add_backward(grad, create_graph=False):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Handle broadcasting: reduce gradient to match original shape
grad_data = grad._data
# Sum out added dims and reduce broadcast dims
ndims_added = grad_data.ndim - self._data.ndim
for i in range(ndims_added):
grad_data = grad_data.sum(axis=0)
# Reduce dimensions that were broadcast
for i in range(grad_data.ndim):
if self._data.shape[i] == 1 and grad_data.shape[i] > 1:
grad_data = np.sum(grad_data, axis=i, keepdims=True)
self.grad._data += grad_data
if create_graph:
self.grad.requires_grad = True
self.grad.grad_fn = 'AddBackwardBackward'
if isinstance(other, WebGPUTensor) and other.requires_grad:
if other.grad is None:
other.grad = WebGPUTensor(np.zeros_like(other._data), device=other.device, dtype=other.dtype, _internal=True)
# Handle broadcasting for other
grad_data = grad._data
ndims_added = grad_data.ndim - other._data.ndim
for i in range(ndims_added):
grad_data = grad_data.sum(axis=0)
for i in range(grad_data.ndim):
if other._data.shape[i] == 1 and grad_data.shape[i] > 1:
grad_data = np.sum(grad_data, axis=i, keepdims=True)
other.grad._data += grad_data
if create_graph:
other.grad.requires_grad = True
result._backward_fn = add_backward
return result
def __sub__(self, other):
if isinstance(other, WebGPUTensor):
result_data = self._data - other._data
else:
result_data = self._data - other
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype,
requires_grad=self.requires_grad or (isinstance(other, WebGPUTensor) and other.requires_grad))
# Set up autograd
if result.requires_grad:
result.grad_fn = 'SubBackward'
result._inputs = []
if self.requires_grad:
result._inputs.append(self)
if isinstance(other, WebGPUTensor) and other.requires_grad:
result._inputs.append(other)
def sub_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient w.r.t. self: grad (unchanged) but handle broadcasting
grad_data = grad._data
ndims_added = grad_data.ndim - self._data.ndim
for i in range(ndims_added):
grad_data = grad_data.sum(axis=0)
for i in range(grad_data.ndim):
if self._data.shape[i] == 1 and grad_data.shape[i] > 1:
grad_data = np.sum(grad_data, axis=i, keepdims=True)
self.grad._data += grad_data
if isinstance(other, WebGPUTensor) and other.requires_grad:
if other.grad is None:
other.grad = WebGPUTensor(np.zeros_like(other._data), device=other.device, dtype=other.dtype, _internal=True)
# Gradient w.r.t. other: -grad (negated) and handle broadcasting
grad_data = grad._data
ndims_added = grad_data.ndim - other._data.ndim
for i in range(ndims_added):
grad_data = grad_data.sum(axis=0)
for i in range(grad_data.ndim):
if other._data.shape[i] == 1 and grad_data.shape[i] > 1:
grad_data = np.sum(grad_data, axis=i, keepdims=True)
other.grad._data -= grad_data
result._backward_fn = sub_backward
return result
def __mul__(self, other):
if isinstance(other, WebGPUTensor):
result_data = self._data * other._data
else:
result_data = self._data * other
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype,
requires_grad=self.requires_grad or (isinstance(other, WebGPUTensor) and other.requires_grad))
# Set up autograd
if result.requires_grad:
result.grad_fn = 'MulBackward'
result._inputs = []
if self.requires_grad:
result._inputs.append(self)
if isinstance(other, WebGPUTensor) and other.requires_grad:
result._inputs.append(other)
def mul_backward(grad, create_graph=False):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient: grad * other
if create_graph:
grad_tensor = grad if isinstance(grad, WebGPUTensor) else WebGPUTensor(grad._data, device=self.device, dtype=self.dtype, _internal=True)
if isinstance(other, WebGPUTensor):
other_tensor = WebGPUTensor(other._data, device=self.device, dtype=self.dtype, requires_grad=True, _internal=True)
grad_self = grad_tensor * other_tensor
else:
grad_self = grad_tensor * other
self.grad._data += grad_self._data
self.grad.requires_grad = True
self.grad.grad_fn = 'MulBackwardBackward'
else:
if isinstance(other, WebGPUTensor):
self.grad._data += grad._data * other._data
else:
self.grad._data += grad._data * other
if isinstance(other, WebGPUTensor) and other.requires_grad:
if other.grad is None:
other.grad = WebGPUTensor(np.zeros_like(other._data), device=other.device, dtype=other.dtype, _internal=True)
# Gradient: grad * self
if create_graph:
grad_tensor = grad if isinstance(grad, WebGPUTensor) else WebGPUTensor(grad._data, device=self.device, dtype=self.dtype, _internal=True)
self_tensor = WebGPUTensor(self._data, device=self.device, dtype=self.dtype, requires_grad=True, _internal=True)
grad_other = grad_tensor * self_tensor
other.grad._data += grad_other._data
other.grad.requires_grad = True
else:
other.grad._data += grad._data * self._data
result._backward_fn = mul_backward
return result
def __truediv__(self, other):
if isinstance(other, WebGPUTensor):
result_data = self._data / other._data
else:
result_data = self._data / other
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype,
requires_grad=self.requires_grad or (isinstance(other, WebGPUTensor) and other.requires_grad))
# Set up autograd
if result.requires_grad:
result.grad_fn = 'DivBackward'
result._inputs = []
if self.requires_grad:
result._inputs.append(self)
if isinstance(other, WebGPUTensor) and other.requires_grad:
result._inputs.append(other)
def div_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient w.r.t. self: grad / other
if isinstance(other, WebGPUTensor):
self.grad._data += grad._data / other._data
else:
self.grad._data += grad._data / other
if isinstance(other, WebGPUTensor) and other.requires_grad:
if other.grad is None:
other.grad = WebGPUTensor(np.zeros_like(other._data), device=other.device, dtype=other.dtype, _internal=True)
# Gradient w.r.t. other: -grad * self / other^2
other.grad._data -= grad._data * self._data / (other._data ** 2)
result._backward_fn = div_backward
return result
def __pow__(self, other):
if isinstance(other, WebGPUTensor):
result_data = np.power(self._data, other._data)
else:
result_data = np.power(self._data, other)
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype,
requires_grad=self.requires_grad or (isinstance(other, WebGPUTensor) and other.requires_grad))
# Set up autograd
if result.requires_grad:
result.grad_fn = 'PowBackward'
result._inputs = []
if self.requires_grad:
result._inputs.append(self)
if isinstance(other, WebGPUTensor) and other.requires_grad:
result._inputs.append(other)
def pow_backward(grad, create_graph=False):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient w.r.t. base: grad * exponent * base^(exponent-1)
if create_graph:
# Use differentiable operations for higher-order gradients
grad_tensor = grad if isinstance(grad, WebGPUTensor) else WebGPUTensor(grad._data, device=self.device, dtype=self.dtype, _internal=True)
base_tensor = WebGPUTensor(self._data, device=self.device, dtype=self.dtype, requires_grad=True, _internal=True)
if isinstance(other, WebGPUTensor):
exponent_tensor = other
else:
exponent_tensor = WebGPUTensor(np.array(other), device=self.device, dtype=self.dtype)
# grad * exponent * base^(exponent-1)
grad_base = grad_tensor * exponent_tensor * (base_tensor ** (exponent_tensor - 1))
self.grad._data += grad_base._data
self.grad.requires_grad = True
self.grad.grad_fn = 'PowBackwardBackward'
else:
# Use NumPy for efficiency when not creating graph
if isinstance(other, WebGPUTensor):
self.grad._data += grad._data * other._data * np.power(self._data, other._data - 1)
else:
self.grad._data += grad._data * other * np.power(self._data, other - 1)
if isinstance(other, WebGPUTensor) and other.requires_grad:
if other.grad is None:
other.grad = WebGPUTensor(np.zeros_like(other._data), device=other.device, dtype=other.dtype, _internal=True)
# Gradient w.r.t. exponent: grad * log(base) * base^exponent
if create_graph:
grad_tensor = grad if isinstance(grad, WebGPUTensor) else WebGPUTensor(grad._data, device=self.device, dtype=self.dtype, _internal=True)
base_tensor = WebGPUTensor(self._data, device=self.device, dtype=self.dtype, _internal=True)
result_tensor = WebGPUTensor(result_data, device=self.device, dtype=self.dtype)
# grad * log(base) * base^exponent
grad_exp = grad_tensor * WebGPUTensor(np.log(self._data), device=self.device, dtype=self.dtype) * result_tensor
other.grad._data += grad_exp._data
other.grad.requires_grad = True
else:
other.grad._data += grad._data * np.log(self._data) * result_data
result._backward_fn = pow_backward
return result
def __neg__(self):
"""Unary negation operator (-tensor)"""
result_data = -self._data
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad)
# Set up autograd
if result.requires_grad:
result.grad_fn = 'NegBackward'
result._inputs = [self]
def neg_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dtype=self.dtype, _internal=True)
# Gradient of negation: -grad
self.grad._data -= grad._data
result._backward_fn = neg_backward
return result
def __pos__(self):
"""Unary positive operator (+tensor)"""
result_data = +self._data
result = WebGPUTensor(result_data, device="webgpu", dtype=self.dtype, requires_grad=self.requires_grad)
# Set up autograd
if result.requires_grad:
result.grad_fn = 'PosBackward'
result._inputs = [self]
def pos_backward(grad):
if self.requires_grad:
if self.grad is None:
self.grad = WebGPUTensor(np.zeros_like(self._data), device=self.device, dt