gradiatorjs
Version:
GradiatorJS is a lightweight, from-scratch autodiff engine and a neural network library written in typescript. Featuring a powerful automatic differentiation engine using a computation graph to enable backpropagation on dynamic network architectures. You
463 lines (385 loc) • 19.6 kB
text/typescript
import {assert, broadcast, reduceGradient} from './utils.js'
import { Val } from './val.js';
export function add(t1: Val|number, t2: Val|number) : Val {
const originalT1 = (t1 instanceof Val) ? t1 : new Val([], t1 as number);
const originalT2 = (t2 instanceof Val) ? t2 : new Val([], t2 as number);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, ()=> `In addition: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`)
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => 'In addition: Both matrices must have the same shape')
let out = new Val(t1_.shape);
out.data = t1_.data.map((num: number, idx: number) => num + t2_.data[idx]); // Forward
out._prev = new Set([originalT1, originalT2]);
out._backward = () => {
const t1_grad = out.grad; // Backward
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad;
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
};
return out;
}
export function sub(t1: Val|number, t2: Val|number) : Val {
const originalT1 = (t1 instanceof Val) ? t1 : new Val([], t1 as number);
const originalT2 = (t2 instanceof Val) ? t2 : new Val([], t2 as number);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, ()=> `In subtraction: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`)
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => 'In subtraction: Both matrices must have the same shape')
let out = new Val(t1_.shape)
out.data = t1_.data.map((num: number, idx: number)=>num - t2_.data[idx])
out._prev = new Set([originalT1, originalT2])
out._backward = () => {
const t1_grad = out.grad;
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad.map((v) => -v);
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
}
return out;
}
export function mul(t1: Val|number, t2: Val|number) : Val {
const originalT1 = (t1 instanceof Val) ? t1 : new Val([], t1 as number);
const originalT2 = (t2 instanceof Val) ? t2 : new Val([], t2 as number);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, ()=> `In hadamard product: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`)
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => 'In hadamard product: Both matrices must have the same shape')
let out = new Val(t1_.shape);
out.data = t1_.data.map((num: number, idx: number)=> num*t2_.data[idx]);
out._prev = new Set([originalT1, originalT2]);
out._backward = () => {
const t1_grad = out.grad.map((og, i) => og * t2_.data[i]);
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad.map((og, i) => og * t1_.data[i]);
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
};
return out;
}
export function dot(t1: Val, t2: Val) : Val {
assert((t1.dim === 1 || t1.dim === 2) && (t2.dim === 1 || t2.dim === 2), () => `In dot: Both inputs must be dim 1 or 2. Got dims ${t1.dim} and ${t2.dim}`);
if (t1.dim === 1 && t2.dim === 1) {
return sum(mul(t1, t2))
}
const t1_ = t1.dim === 1 ? t1.reshape([1, t1.shape[0]!]) : t1;
const t2_ = t2.dim === 1 ? t2.reshape([t2.shape[0]!, 1]) : t2;
assert(t1_.shape[1] === t2_.shape[0], ()=> `In dot: inner dimensions didn't match. dimensioins got: ${t1.shape} and ${t2.shape}`)
const out_shape = [t1_.shape[0]!, t2_.shape[1]!];
const out_size = out_shape[0]! * out_shape[1]!;
const res_data = new Float64Array(out_size);
for (let i=0; i<t1_.shape[0]!; i++) {
for (let j=0; j<t2_.shape[1]!; j++) {
let sum=0;
for(let k=0; k<t1_.shape[1]!; k++) {
sum += t1_.data[i*t1_.shape[1]! + k] * t2_.data[k*t2_.shape[1]! + j]
}
res_data[i * out_shape[1]! + j] = sum;
}
}
const out = new Val(out_shape);
out.data = res_data;
out._prev = new Set([t1, t2]);
out._backward = () => {
// dL/dt1 = dL/dout . T(t2_)
const gradT1_ = dot(out.gradVal(), t2_.T);
const grad_to_accum_t1 = t1.dim === 1 ? gradT1_.reshape(t1.shape) : gradT1_;
t1.grad = t1.grad.map((g, i) => g + grad_to_accum_t1.data[i]);
// dL/dt2 = T(t1_) . dL/dout
const gradT2_ = dot(t1_.T, out.gradVal());
const grad_to_accum_t2 = t2.dim === 1 ? gradT2_.reshape(t2.shape) : gradT2_;
t2.grad = t2.grad.map((g, i) => g + grad_to_accum_t2.data[i]);
}
if (t1.dim === 1 && t2.dim === 1) { // if both og inputs were 1D, out should also be scalar
return out.reshape([1]);
}
return out;
}
export function pow(t: Val, num: number) : Val {
let out = new Val(t.shape)
out.data = t.data.map((k:number) => k ** num)
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g + (num * t.data[i]**(num-1)) * out.grad[i]!);
}
return out;
}
export function div(t: Val, num: number) : Val {
let out = new Val(t.shape)
out.data = t.data.map((k:number) => k / num)
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g + (out.grad[i]! / num));
}
return out;
}
export function divElementWise(t1: Val, t2: Val) : Val {
assert(t1.dim === t2.dim, ()=> `In element wise division: Both matrices must have the same dim. got t1dim: ${t1.dim} and t2dim: ${t2.dim}`)
assert(t1.shape.every((dimension, index) => dimension == t2.shape[index]), () => 'In divElementWise: Both matrices must have the same shape')
assert(t2.data.every((k:number)=> k !== 0), ()=> "Division by zero error in element-wise division");
let out = new Val(t1.shape)
out.data = t1.data.map((num: number, idx: number)=>num / t2.data[idx])
out._prev = new Set([t1, t2])
out._backward = () => {
// dL/dt1 = dL/dout * d(t1/t2)/dt1 = dL/dout * (1/t2)
t1.grad = t1.grad.map((g, i) => g + (1/t2.data[i]) * out.grad[i]!);
// dL/dt2 = dL/dout * d(t1/t2)/dt2 = dL/dout * (-t1 / t2^2)
t2.grad = t2.grad.map((g, i) => g + (-t1.data[i]/t2.data[i]**2) * out.grad[i]!);
}
return out;
}
export function negate(t: Val) : Val {
let out = new Val(t.shape)
out.data = t.data.map((k:number) => -k)
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g - out.grad[i]!);
}
return out;
}
export function abs(t: Val) : Val {
let out = new Val(t.shape)
out.data = t.data.map((k:number) => Math.abs(k))
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g + (t.data[i] > 0 ? 1 : -1) * out.grad[i]!);
}
return out;
}
export function exp(t: Val) : Val {
let out = new Val(t.shape)
out.data = t.data.map((k:number) => Math.exp(k))
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g + out.data[i] * out.grad[i]!);
}
return out;
}
export function log(t: Val) : Val {
assert(t.data.every((k:number)=>k>0), ()=> "Log input must be positive")
let out = new Val(t.shape)
out.data = t.data.map((k:number) => Math.log(k))
out._prev = new Set([t])
out._backward = () => {
t.grad = t.grad.map((g, i) => g + (1/t.data[i]) * out.grad[i]!);
}
return out;
}
export function sum(t: Val, axis?: number, keepdims = false): Val {
// Case 1: Sum all elements (no axis specified)
if (axis === undefined) {
const out = new Val(keepdims ? t.shape.map(() => 1) : [1]);
out.data[0] = t.data.reduce((a: number, c: number) => a + c, 0);
out._prev = new Set([t]);
out._backward = () => {
t.grad = t.grad.map(g => g + out.grad[0]!);
};
return out;
}
// Case 2: Sum along a specific axis
const new_shape = t.shape.map((dim, i) =>
(i === axis && !keepdims) ? 1 : (i === axis ? 1 : dim)
).filter(dim => dim !== 1 || keepdims);
const out = new Val(new_shape);
const stride = t.shape.slice(axis + 1).reduce((a, b) => a * b, 1);
// Forward pass (compute sums)
for (let i = 0; i < out.size; i++) {
let sum = 0;
for (let j = 0; j < t.shape[axis]!; j++) {
const idx = Math.floor(i / stride) * t.shape[axis]! * stride +
j * stride +
i % stride;
sum += t.data[idx];
}
out.data[i] = sum;
}
// Backward
out._prev = new Set([t]);
out._backward = () => {
for (let i = 0; i < out.size; i++) {
for (let j = 0; j < t.shape[axis]!; j++) {
const idx = Math.floor(i / stride) * t.shape[axis]! * stride +
j * stride +
i % stride;
t.grad[idx]! += out.grad[i]!;
}
}
};
return out;
}
export function mean(t: Val, axis?: number, keepdims = false) : Val {
const N = axis === undefined ? t.size : t.shape[axis];
const sum_val = sum(t, axis, keepdims);
const out = div(sum_val, N!);
out._prev = new Set([t]);
return out;
}
// TODO: Need to optimize this yesterday
/**
* X: input : [batch_size, n_height, n_width, n_channels]
* F: filter : [c_outputchannels, filter_size, filter_size, n_channels]
* st: stride : default = 1
* pad: padding : default = 0
*/
export function conv2d(X: Val, F: Val, st: number=1, pad: number=0) {
assert(X.dim === 4, () => `conv2d: inuput x must be 4d`)
assert(F.dim === 4, () => `conv2d: filter f must be 4d`)
assert(F.shape[1] === F.shape[2], () => `conv2d: kernels must be square`)
assert(X.shape[3] === F.shape[3], () => `conv2d: Input channels (${X.shape[3]}) must match kernel input channels (${F.shape[3]}`)
assert(st > 0 && Number.isInteger(st), () => `conv2d: stride must be > 0`)
assert(pad >= 0 && Number.isInteger(pad), () => `conv2d: padding must be >= 0`)
const batch_size = X.shape[0]!;
const H = X.shape[1]!;
const W = X.shape[2]!;
const C_IN = X.shape[3]!;
const C_OUT = F.shape[0]!;
const FS = F.shape[1]!; // filter size
// output dims
const H_OUT = Math.floor((H - FS + 2*pad)/st) + 1;
const W_OUT = Math.floor((W - FS + 2*pad)/st) + 1;
if (H_OUT <= 0 || W_OUT <= 0) {
throw new Error(`Conv2d: invalid output dims. Check input, filter, stride or padding. H_OUT ${H_OUT} and W_OUT ${W_OUT}`)
}
const outShape = [batch_size, H, W, C_OUT]
const out = new Val(outShape);
for (let batch=0; batch<batch_size; batch++) { // iterating over batch
for (let h_=0; h_<H_OUT; h_++) { // iterating over output height
for (let w_=0; w_<W_OUT; w_++) { // iterating over output width
for (let c_out=0; c_out<C_OUT; c_out++) { // iterating over output channels (each kernel)
// input window start coordinates
const h_start = h_ * st - pad;
const w_start = w_ * st - pad;
let sum=0.0;
for (let f=0; f<FS*FS; f++) {
const fh = Math.floor(f/FS); // filter row
const fw = f%FS; // filter col
for (let c_in=0; c_in<C_IN; c_in++) { // iterating over filter channels
const h_in_idx = h_start + fh;
const w_in_idx = w_start + fw;
if (h_in_idx>=0 && h_in_idx<H && w_in_idx>=0 && w_in_idx<W) {
const x_idx = batch * (H*W*C_IN) + h_in_idx * (W*C_IN) + w_in_idx * C_IN + c_in;
const w_idx = c_out * (FS*FS*C_IN) + fh * (FS*C_IN) + fw * C_IN + c_in;
sum += X.data[x_idx] * F.data[w_idx];
}
}
}
const out_idx = batch * (H_OUT*W_OUT*C_OUT) + h_ * (W_OUT*C_OUT) + w_ * C_OUT + c_out;
out.data[out_idx] = sum;
}
}
}
}
out._prev = new Set([X, F]);
out._backward = () => {
const dL_dOUT = out.grad;
if (!X.grad || X.grad.length !== X.size) {
console.warn(`conv2d backward: init grad for input X (shape ${X.shape})`)
X.grad = new Float64Array(X.size).fill(0);
}
if (!F.grad || F.grad.length !== F.size) {
console.warn(`conv2d backward: init grad for weights F(shape ${F.shape})`)
F.grad = new Float64Array(F.size).fill(0);
}
for (let batch=0; batch<batch_size; batch++) { // iterating over batch
for (let h_=0; h_<H_OUT; h_++) { // iterating over output height
for (let w_=0; w_<W_OUT; w_++) { // iterating over output width
for (let c_out=0; c_out<C_OUT; c_out++) { // iterating over output channels (each kernel)
const out_grad_idx = batch * (H_OUT*W_OUT*C_OUT) + h_*(W_OUT*C_OUT) + w_*C_OUT + c_out;
const grad_val = dL_dOUT[out_grad_idx];
if (grad_val === 0) continue;
const h_start = h_ * st - pad;
const w_start = w_ * st - pad;
for (let f=0; f<FS*FS; f++) {
const fh = Math.floor(f/FS); // filter row
const fw = f%FS; // filter col
for (let c_in=0; c_in<C_IN; c_in++) { // iterating over filter channels
const h_in_idx = h_start + fh;
const w_in_idx = w_start + fw;
if (h_in_idx>=0 && h_in_idx<H && w_in_idx>=0 && w_in_idx<W) {
const x_idx = batch * (H*W*C_IN)
+ h_in_idx * (W*C_IN)
+ w_in_idx * C_IN
+ c_in;
const w_idx = c_out * (FS*FS*C_IN)
+ fh * (FS*C_IN)
+ fw * C_IN
+ c_in;
// dL/dW += dL/dOut * dOut/dW = dL/dOut * X
if (w_idx < F.grad.length) {
F.grad[w_idx]! += X.data[x_idx] * grad_val!;
}
// dL/dX += dL/dOut * dOut/dX = dL/dOut * W
if (x_idx < X.grad.length) {
X.grad[x_idx]! += F.data[w_idx] * grad_val!;
}
}
}
}
}
}
}
}
}
return out;
}
export function maxPool2d(X: Val, pool_size: number, stride: number) {
assert(X.dim === 4, () => `maxPool2d input must be 4D (NHWC). Got ${X.dim}D.`);
assert(pool_size > 0 && Number.isInteger(pool_size), () => "pool_size must be a positive integer.");
assert(stride > 0 && Number.isInteger(stride), () => "stride must be a positive integer.");
const B = X.shape[0]!;
const H_in = X.shape[1]!;
const W_in = X.shape[2]!;
const C = X.shape[3]!;
const H_out = Math.floor((H_in-pool_size)/stride)+1;
const W_out = Math.floor((W_in-pool_size)/stride)+1;
if (H_out<=0 || W_out<=0) {
throw new Error(`maxPool2d results in non-positive output dimension. H_out=${H_out}, W_out=${W_out}. Input: ${H_in}x${W_in}, Pool: ${pool_size}, Stride: ${stride}`);
}
const outShape = [B, H_out, W_out, C];
const Y_data = new Float64Array(B*H_out*W_out*C);
const argmax_indices = new Uint32Array(B*H_out*W_out*C);
for (let b=0; b<B; b++) {
for (let c=0; c<C; c++) {
for (let h_o=0; h_o<H_out; h_o++) {
for (let w_o=0; w_o<W_out; w_o++) {
const h_start = h_o*stride;
const w_start = w_o*stride;
let max_val = -Infinity;
let max_idx_flat = -1;
for (let ph=0; ph<pool_size; ph++) {
for (let pw=0; pw<pool_size; pw++) {
const h_curr = h_start+ph;
const w_curr = w_start+pw;
const x_idx_flat = b*(H_in*W_in*C) + h_curr*(W_in*C) + w_curr*C + c;
const val = X.data[x_idx_flat];
if (val>max_val) {
max_val = val;
max_idx_flat = x_idx_flat;
}
}
}
const y_idx_flat = b*(H_out*W_out*C) + h_o*(W_out*C) + w_o*C + c;
Y_data[y_idx_flat] = max_val;
argmax_indices[y_idx_flat] = max_idx_flat;
}
}
}
}
const Y = new Val(outShape);
Y.data = Y_data;
Y._prev = new Set([X]);
Y._backward = ()=>{
if (!X.grad || X.grad.length !== X.size) {
X.grad = new Float64Array(X.size).fill(0);
}
for (let i = 0; i < Y.grad.length; i++) {
// Y.grad[i] = dL/dY_flat[i]
// argmax_indices[i] is the flat index in X.data that contributed to Y_flat[i]
const x_idx_to_update = argmax_indices[i]!;
if (x_idx_to_update !== -1) {
X.grad[x_idx_to_update]! += Y.grad[i]!; // only add gradient to the max value's og position
}
}
};
return Y;
}