gradiatorjs
Version:
GradiatorJS is a lightweight, from-scratch autodiff engine and a neural network library written in typescript. Featuring a powerful automatic differentiation engine using a computation graph to enable backpropagation on dynamic network architectures. You
1,420 lines (1,407 loc) • 53.3 kB
JavaScript
var __defProp = Object.defineProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
// src/accuracy.ts
var accuracy_exports = {};
__export(accuracy_exports, {
calcBinaryAccuracy: () => calcBinaryAccuracy,
calcMultiClassAccuracy: () => calcMultiClassAccuracy
});
// src/utils.ts
var utils_exports = {};
__export(utils_exports, {
arraysEqual: () => arraysEqual,
assert: () => assert,
broadcast: () => broadcast,
calculateMinMax: () => calculateMinMax,
gaussianRandom: () => gaussianRandom,
reduceGradient: () => reduceGradient
});
function assert(expr, msg) {
if (!expr) {
throw new Error(typeof msg === "string" ? msg : msg());
}
}
function gaussianRandom(mean2 = 0, stdev = 1) {
const u = 1 - Math.random();
const v = Math.random();
const z = Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
return z * stdev + mean2;
}
function arraysEqual(a, b) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
function broadcast(t1, t2) {
const v1 = t1 instanceof Val ? t1 : new Val([], t1);
const v2 = t2 instanceof Val ? t2 : new Val([], t2);
if (v1.shape.length === v2.shape.length && v1.shape.every((dim, i) => dim === v2.shape[i])) {
return [v1.clone(), v2.clone()];
}
let v1_out = v1.clone();
let v2_out = v2.clone();
let broadcast_occurred = false;
if (v1.size === 1 && v2.size > 1) {
const shape = v2.shape;
const data = new Float64Array(v2.size).fill(v1.data[0]);
v1_out = new Val(shape);
v1_out.data = data;
broadcast_occurred = true;
} else if (v2.size === 1 && v1.size > 1) {
const shape = v1.shape;
const data = new Float64Array(v1.size).fill(v2.data[0]);
v2_out = new Val(shape);
v2_out.data = data;
broadcast_occurred = true;
} else if (v1.size === 1 && v2.size === 1) {
if (v1.shape.length === 0 && v2.shape.length > 0) {
v2_out = new Val([], v2.data[0]);
broadcast_occurred = true;
} else if (v2.shape.length === 0 && v1.shape.length > 0) {
v1_out = new Val([], v1.data[0]);
broadcast_occurred = true;
}
} else if (!broadcast_occurred && v1.dim === 2 && v2.dim === 2) {
let broadcasted_data = null;
let target_shape = [];
let needs_v1_broadcast = false;
let needs_v2_broadcast = false;
if (v1.shape[0] === v2.shape[0] && v1.shape[1] === 1 && v2.shape[1] > 1) {
target_shape = v2.shape;
broadcasted_data = new Float64Array(v2.size);
for (let r = 0; r < target_shape[0]; r++) {
for (let c = 0; c < target_shape[1]; c++) {
broadcasted_data[r * target_shape[1] + c] = v1.data[r];
}
}
needs_v1_broadcast = true;
} else if (v1.shape[0] === v2.shape[0] && v2.shape[1] === 1 && v1.shape[1] > 1) {
target_shape = v1.shape;
broadcasted_data = new Float64Array(v1.size);
for (let r = 0; r < target_shape[0]; r++) {
for (let c = 0; c < target_shape[1]; c++) {
broadcasted_data[r * target_shape[1] + c] = v2.data[r];
}
}
needs_v2_broadcast = true;
} else if (v1.shape[1] === v2.shape[1] && v1.shape[0] === 1 && v2.shape[0] > 1) {
target_shape = v2.shape;
broadcasted_data = new Float64Array(v2.size);
for (let r = 0; r < target_shape[0]; r++) {
for (let c = 0; c < target_shape[1]; c++) {
broadcasted_data[r * target_shape[1] + c] = v1.data[c];
}
}
needs_v1_broadcast = true;
} else if (v1.shape[1] === v2.shape[1] && v2.shape[0] === 1 && v1.shape[0] > 1) {
target_shape = v1.shape;
broadcasted_data = new Float64Array(v1.size);
for (let r = 0; r < target_shape[0]; r++) {
for (let c = 0; c < target_shape[1]; c++) {
broadcasted_data[r * target_shape[1] + c] = v2.data[c];
}
}
needs_v2_broadcast = true;
}
if (needs_v1_broadcast && broadcasted_data) {
v1_out = new Val(target_shape);
v1_out.data = broadcasted_data;
broadcast_occurred = true;
} else if (needs_v2_broadcast && broadcasted_data) {
v2_out = new Val(target_shape);
v2_out.data = broadcasted_data;
broadcast_occurred = true;
}
}
if (v1.size > 1 && v2.size > 1 && !v1_out.shape.every((dim, i) => dim === v2_out.shape[i])) {
assert(false, () => `Tensors could not be broadcast to compatible shapes. Original Shapes: ${v1.shape} and ${v2.shape}`);
}
return [v1_out, v2_out];
}
function reduceGradient(gradient, originalShape, broadcastedShape) {
if (originalShape.length === broadcastedShape.length && originalShape.every((dim, i) => dim === broadcastedShape[i])) {
return gradient;
}
const originalSize = originalShape.reduce((a, b) => a * b, 1);
if (originalShape.length === 0 || originalShape.length === 1) {
let sum2 = 0;
for (let i = 0; i < gradient.length; i++) {
sum2 += gradient[i];
}
return new Float64Array([sum2]);
}
const reducedGrad = new Float64Array(originalSize).fill(0);
if (originalShape.length === 2 && broadcastedShape.length === 2) {
const [origRows, origCols] = originalShape;
const [bcastRows, bcastCols] = broadcastedShape;
const reduceCols = origCols === 1 && bcastCols > 1;
const reduceRows = origRows === 1 && bcastRows > 1;
if (reduceCols && reduceRows) {
let sum2 = 0;
for (let i = 0; i < gradient.length; i++) {
sum2 += gradient[i];
}
if (reducedGrad.length === 1)
reducedGrad[0] = sum2;
} else if (reduceCols && !reduceRows) {
for (let r = 0; r < bcastRows; r++) {
let sum2 = 0;
for (let c = 0; c < bcastCols; c++) {
sum2 += gradient[r * bcastCols + c];
}
if (r < reducedGrad.length) reducedGrad[r] = sum2;
}
} else if (reduceRows && !reduceCols) {
for (let c = 0; c < bcastCols; c++) {
let sum2 = 0;
for (let r = 0; r < bcastRows; r++) {
sum2 += gradient[r * bcastCols + c];
}
if (c < reducedGrad.length) reducedGrad[c] = sum2;
}
} else {
if (gradient.length === reducedGrad.length) return gradient;
console.error(
`reduceGradient: Unhandled 2D case from ${broadcastedShape} to ${originalShape}`
);
}
return reducedGrad;
}
console.warn(
`reduceGradient: Unhandled broadcast reduction from ${broadcastedShape} to ${originalShape}. Returning zero gradient.`
);
return reducedGrad;
}
function calculateMinMax(data) {
if (!data || data.length === 0) {
return { minv: 0, maxv: 0, dv: 0 };
}
let minv = data[0];
let maxv = data[0];
for (let i = 1; i < data.length; i++) {
if (data[i] < minv) minv = data[i];
if (data[i] > maxv) maxv = data[i];
}
return { minv, maxv, dv: maxv - minv };
}
// src/val.ts
var Val = class _Val {
_data;
grad;
_backward;
_prev;
shape;
size;
constructor(shape, value) {
this.shape = shape;
this.size = this.calculateSizeFromShape(this.shape);
this._data = value ? this.filled(this.size, value) : this.zeros(this.size);
this.grad = new Float64Array(this.size);
this._backward = () => {
};
this._prev = /* @__PURE__ */ new Set();
}
backward() {
this.grad.fill(0);
if (this.size === 1) this.grad[0] = 1;
const topo = [];
const visited = /* @__PURE__ */ new Set();
const buildTopo = (v) => {
if (!visited.has(v)) {
visited.add(v);
v._prev.forEach(buildTopo);
topo.push(v);
}
};
buildTopo(this);
topo.reverse().forEach((v) => {
if (v._backward) v._backward();
});
}
set data(a) {
if (a instanceof Float64Array) {
this._data = new Float64Array(a);
return;
}
let calcShape = this.calculateShape(a);
let origShape = this.shape;
assert(
typeof origShape !== "undefined" && origShape.length !== 0 && (calcShape.length === origShape.length && calcShape.every((v, i) => v === origShape[i])),
() => `Shape of the matrix doesn't match the shape of Val. Shape of the matrix: ${calcShape} Shape of val: ${this.shape}`
);
this._data = this.createArr(a);
this.grad = new Float64Array(this.size);
}
createArr(a) {
if (typeof a === "number") {
let x = new Float64Array(1);
x[0] = a;
return x;
} else if (Array.isArray(a)) {
return Float64Array.from(this.flattenND(a));
} else {
return new Float64Array(0);
}
}
flattenND(a) {
if (typeof a === "number") {
return [a];
}
let res = [];
if (Array.isArray(a)) {
for (let e of a) {
res = res.concat(this.flattenND(e));
}
}
return res;
}
get data() {
return this._data;
}
calculateSizeFromShape(shape) {
if (shape.length === 0) return 1;
return shape.reduce((acc, dim) => acc * dim, 1);
}
calculateShape(x) {
const shape = [];
let current = x;
while (Array.isArray(current)) {
shape.push(current.length);
current = current[0];
}
return shape;
}
zeros(size) {
return new Float64Array(size);
}
get dim() {
return this.shape.length;
}
filled(size, value) {
let x = new Float64Array(size);
x.fill(value);
return x;
}
clone() {
let x = new _Val([...this.shape]);
x._data = Float64Array.from(this._data);
x._backward = this._backward;
x._prev = new Set(this._prev);
x.grad = Float64Array.from(this.grad);
return x;
}
get T() {
if (this.dim === 1) return this.clone();
assert(this.dim === 2, () => "transpose only supports 2D arrays");
let newShape = [this.shape[1], this.shape[0]];
let res = new _Val(newShape);
let x = new Float64Array(this.size);
let y = this.data;
for (let i = 0; i < this.shape[0]; i++) {
for (let j = 0; j < this.shape[1]; j++) {
x[j * this.shape[0] + i] = y[i * this.shape[1] + j];
}
}
res._data = x;
res._prev = /* @__PURE__ */ new Set([this]);
res._backward = () => {
for (let i = 0; i < this.shape[0]; i++) {
for (let j = 0; j < this.shape[1]; j++) {
this.grad[i * this.shape[1] + j] += res.grad[j * this.shape[0] + i];
}
}
};
return res;
}
reshape(newShape) {
const inferredShape = [...newShape];
const requiredSize = inferredShape.reduce((a, b) => a * b, 1);
assert(this.size == requiredSize, () => `Cannot reshape array: number of elements (${this.size}) does not match the required size (${requiredSize}) for shape ${inferredShape}`);
let result = new _Val(inferredShape);
result._data = Float64Array.from(this.data);
result._prev = /* @__PURE__ */ new Set([this]);
const inputVal = this;
result._backward = () => {
if (!inputVal.grad || inputVal.grad.length !== inputVal.size) {
console.warn(`Reshape backward: Initializing gradient for input tensor (shape ${inputVal.shape})`);
inputVal.grad = new Float64Array(inputVal.size).fill(0);
}
if (!result.grad || result.grad.length !== result.size) {
console.warn(`Reshape backward: Gradient for reshaped tensor (shape ${result.shape}) is missing or has wrong size (${result.grad?.length} vs ${result.size}). Skipping accumulation.`);
return;
}
for (let i = 0; i < inputVal.grad.length; i++) {
if (i < result.grad.length) {
inputVal.grad[i] += result.grad[i];
}
}
};
return result;
}
randn() {
let x = new _Val(this.shape);
for (let i = 0; i < this.size; i++) {
x.data[i] = gaussianRandom();
}
return x;
}
gradVal() {
const x = new _Val(this.shape);
x.data = Float64Array.from(this.grad);
return x;
}
};
// src/accuracy.ts
function calcBinaryAccuracy(y_pred_val, y_true_val, threshold = 0.5) {
if (y_pred_val.size !== y_true_val.size) {
throw new Error(`Cannot cal accuracy: Prediction size ${y_pred_val.size} doesn't match true label size (${y_true_val.size}). Shapes: pred ${y_pred_val.shape}, true ${y_true_val.shape}`);
}
const n_samples = y_pred_val.size;
if (n_samples === 0) {
console.log("Empty input");
return 100;
}
const y_pred = y_pred_val.data;
const y_true = y_true_val.data;
let correct_predictions = 0;
for (let i = 0; i < n_samples; i++) {
const predicted_class = y_pred[i] > threshold ? 1 : 0;
const true_label = Math.round(y_true[i]);
if (predicted_class === true_label) {
correct_predictions++;
}
}
const accuracy = correct_predictions / n_samples * 100;
return accuracy;
}
function calcMultiClassAccuracy(y_pred_val, y_true_val) {
if (y_pred_val.dim !== 2 || y_true_val.dim !== 2 || y_pred_val.shape[0] !== y_true_val.shape[0] || y_pred_val.shape[1] !== y_true_val.shape[1]) {
throw new Error(`Shape mismatch for multi-class accuracy. Pred: [${y_pred_val.shape.join(",")}], True: [${y_true_val.shape.join(",")}]`);
}
const batchSize = y_pred_val.shape[0];
const numClasses = y_pred_val.shape[1];
if (batchSize === 0) {
console.log("Empty input");
return 100;
}
const y_pred = y_pred_val.data;
const y_true = y_true_val.data;
let correct_predictions = 0;
for (let i = 0; i < batchSize; i++) {
const predOffset = i * numClasses;
const trueOffset = i * numClasses;
let maxProb = -1;
let predicted_class = -1;
for (let j = 0; j < numClasses; j++) {
let val = y_pred[predOffset + j];
if (val > maxProb) {
maxProb = val;
predicted_class = j;
}
}
let trueClass = -1;
for (let j = 0; j < numClasses; j++) {
if (y_true[trueOffset + j] === 1) {
trueClass = j;
break;
}
}
if (predicted_class === trueClass) {
correct_predictions++;
}
}
const accuracy = correct_predictions / batchSize * 100;
return accuracy;
}
// src/activations.ts
var activations_exports = {};
__export(activations_exports, {
relu: () => relu,
sigmoid: () => sigmoid,
softmax: () => softmax,
tanh: () => tanh
});
function relu(Z) {
let out = new Val(Z.shape);
out.data = Z.data.map((k) => Math.max(k, 0));
out._prev = /* @__PURE__ */ new Set([Z]);
out._backward = () => {
for (let i = 0; i < Z.size; ++i) {
Z.grad[i] += Z.data[i] > 0 ? out.grad[i] : 0;
}
};
return out;
}
function sigmoid(Z) {
let out = new Val(Z.shape);
out.data = Z.data.map((k) => 1 / (1 + Math.exp(-k)));
out._prev = /* @__PURE__ */ new Set([Z]);
out._backward = () => {
for (let i = 0; i < Z.size; ++i) {
Z.grad[i] += out.data[i] * (1 - out.data[i]) * out.grad[i];
}
};
return out;
}
function tanh(Z) {
let out = new Val(Z.shape);
out.data = Z.data.map((k) => Math.tanh(k));
out._prev = /* @__PURE__ */ new Set([Z]);
out._backward = () => {
for (let i = 0; i < Z.size; ++i) {
Z.grad[i] += (1 - out.data[i] * out.data[i]) * out.grad[i];
}
};
return out;
}
function softmax(Z) {
assert(Z.dim === 2, () => `Softmax: input must be 2D ([Batch, Classes]). Got ${Z.dim}D.`);
return Z;
}
// src/layers.ts
var layers_exports = {};
__export(layers_exports, {
Conv: () => Conv,
Dense: () => Dense,
Flatten: () => Flatten,
MaxPool2D: () => MaxPool2D,
Module: () => Module
});
// src/ops.ts
var ops_exports = {};
__export(ops_exports, {
abs: () => abs,
add: () => add,
conv2d: () => conv2d,
div: () => div,
divElementWise: () => divElementWise,
dot: () => dot,
exp: () => exp,
log: () => log,
maxPool2d: () => maxPool2d,
mean: () => mean,
mul: () => mul,
negate: () => negate,
pow: () => pow,
sub: () => sub,
sum: () => sum
});
function add(t1, t2) {
const originalT1 = t1 instanceof Val ? t1 : new Val([], t1);
const originalT2 = t2 instanceof Val ? t2 : new Val([], t2);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, () => `In addition: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`);
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => "In addition: Both matrices must have the same shape");
let out = new Val(t1_.shape);
out.data = t1_.data.map((num, idx) => num + t2_.data[idx]);
out._prev = /* @__PURE__ */ new Set([originalT1, originalT2]);
out._backward = () => {
const t1_grad = out.grad;
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad;
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
};
return out;
}
function sub(t1, t2) {
const originalT1 = t1 instanceof Val ? t1 : new Val([], t1);
const originalT2 = t2 instanceof Val ? t2 : new Val([], t2);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, () => `In subtraction: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`);
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => "In subtraction: Both matrices must have the same shape");
let out = new Val(t1_.shape);
out.data = t1_.data.map((num, idx) => num - t2_.data[idx]);
out._prev = /* @__PURE__ */ new Set([originalT1, originalT2]);
out._backward = () => {
const t1_grad = out.grad;
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad.map((v) => -v);
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
};
return out;
}
function mul(t1, t2) {
const originalT1 = t1 instanceof Val ? t1 : new Val([], t1);
const originalT2 = t2 instanceof Val ? t2 : new Val([], t2);
let [t1_, t2_] = broadcast(originalT1, originalT2);
assert(t1_.dim === t2_.dim, () => `In hadamard product: Both matrices must have the same dim. got t1_dim: ${t1_.dim} and t2_dim: ${t2_.dim}`);
assert(t1_.shape.every((dimension, index) => dimension == t2_.shape[index]), () => "In hadamard product: Both matrices must have the same shape");
let out = new Val(t1_.shape);
out.data = t1_.data.map((num, idx) => num * t2_.data[idx]);
out._prev = /* @__PURE__ */ new Set([originalT1, originalT2]);
out._backward = () => {
const t1_grad = out.grad.map((og, i) => og * t2_.data[i]);
const t1_reduced_grad = reduceGradient(t1_grad, originalT1.shape, t1_.shape);
originalT1.grad = originalT1.grad.map((g, i) => g + (t1_reduced_grad[i] || 0));
const t2_grad = out.grad.map((og, i) => og * t1_.data[i]);
const t2_reduced_grad = reduceGradient(t2_grad, originalT2.shape, t2_.shape);
originalT2.grad = originalT2.grad.map((g, i) => g + (t2_reduced_grad[i] || 0));
};
return out;
}
function dot(t1, t2) {
assert((t1.dim === 1 || t1.dim === 2) && (t2.dim === 1 || t2.dim === 2), () => `In dot: Both inputs must be dim 1 or 2. Got dims ${t1.dim} and ${t2.dim}`);
if (t1.dim === 1 && t2.dim === 1) {
return sum(mul(t1, t2));
}
const t1_ = t1.dim === 1 ? t1.reshape([1, t1.shape[0]]) : t1;
const t2_ = t2.dim === 1 ? t2.reshape([t2.shape[0], 1]) : t2;
assert(t1_.shape[1] === t2_.shape[0], () => `In dot: inner dimensions didn't match. dimensioins got: ${t1.shape} and ${t2.shape}`);
const out_shape = [t1_.shape[0], t2_.shape[1]];
const out_size = out_shape[0] * out_shape[1];
const res_data = new Float64Array(out_size);
for (let i = 0; i < t1_.shape[0]; i++) {
for (let j = 0; j < t2_.shape[1]; j++) {
let sum2 = 0;
for (let k = 0; k < t1_.shape[1]; k++) {
sum2 += t1_.data[i * t1_.shape[1] + k] * t2_.data[k * t2_.shape[1] + j];
}
res_data[i * out_shape[1] + j] = sum2;
}
}
const out = new Val(out_shape);
out.data = res_data;
out._prev = /* @__PURE__ */ new Set([t1, t2]);
out._backward = () => {
const gradT1_ = dot(out.gradVal(), t2_.T);
const grad_to_accum_t1 = t1.dim === 1 ? gradT1_.reshape(t1.shape) : gradT1_;
t1.grad = t1.grad.map((g, i) => g + grad_to_accum_t1.data[i]);
const gradT2_ = dot(t1_.T, out.gradVal());
const grad_to_accum_t2 = t2.dim === 1 ? gradT2_.reshape(t2.shape) : gradT2_;
t2.grad = t2.grad.map((g, i) => g + grad_to_accum_t2.data[i]);
};
if (t1.dim === 1 && t2.dim === 1) {
return out.reshape([1]);
}
return out;
}
function pow(t, num) {
let out = new Val(t.shape);
out.data = t.data.map((k) => k ** num);
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g + num * t.data[i] ** (num - 1) * out.grad[i]);
};
return out;
}
function div(t, num) {
let out = new Val(t.shape);
out.data = t.data.map((k) => k / num);
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g + out.grad[i] / num);
};
return out;
}
function divElementWise(t1, t2) {
assert(t1.dim === t2.dim, () => `In element wise division: Both matrices must have the same dim. got t1dim: ${t1.dim} and t2dim: ${t2.dim}`);
assert(t1.shape.every((dimension, index) => dimension == t2.shape[index]), () => "In divElementWise: Both matrices must have the same shape");
assert(t2.data.every((k) => k !== 0), () => "Division by zero error in element-wise division");
let out = new Val(t1.shape);
out.data = t1.data.map((num, idx) => num / t2.data[idx]);
out._prev = /* @__PURE__ */ new Set([t1, t2]);
out._backward = () => {
t1.grad = t1.grad.map((g, i) => g + 1 / t2.data[i] * out.grad[i]);
t2.grad = t2.grad.map((g, i) => g + -t1.data[i] / t2.data[i] ** 2 * out.grad[i]);
};
return out;
}
function negate(t) {
let out = new Val(t.shape);
out.data = t.data.map((k) => -k);
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g - out.grad[i]);
};
return out;
}
function abs(t) {
let out = new Val(t.shape);
out.data = t.data.map((k) => Math.abs(k));
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g + (t.data[i] > 0 ? 1 : -1) * out.grad[i]);
};
return out;
}
function exp(t) {
let out = new Val(t.shape);
out.data = t.data.map((k) => Math.exp(k));
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g + out.data[i] * out.grad[i]);
};
return out;
}
function log(t) {
assert(t.data.every((k) => k > 0), () => "Log input must be positive");
let out = new Val(t.shape);
out.data = t.data.map((k) => Math.log(k));
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
t.grad = t.grad.map((g, i) => g + 1 / t.data[i] * out.grad[i]);
};
return out;
}
function sum(t, axis, keepdims = false) {
if (axis === void 0) {
const out2 = new Val(keepdims ? t.shape.map(() => 1) : [1]);
out2.data[0] = t.data.reduce((a, c) => a + c, 0);
out2._prev = /* @__PURE__ */ new Set([t]);
out2._backward = () => {
t.grad = t.grad.map((g) => g + out2.grad[0]);
};
return out2;
}
const new_shape = t.shape.map(
(dim, i) => i === axis && !keepdims ? 1 : i === axis ? 1 : dim
).filter((dim) => dim !== 1 || keepdims);
const out = new Val(new_shape);
const stride = t.shape.slice(axis + 1).reduce((a, b) => a * b, 1);
for (let i = 0; i < out.size; i++) {
let sum2 = 0;
for (let j = 0; j < t.shape[axis]; j++) {
const idx = Math.floor(i / stride) * t.shape[axis] * stride + j * stride + i % stride;
sum2 += t.data[idx];
}
out.data[i] = sum2;
}
out._prev = /* @__PURE__ */ new Set([t]);
out._backward = () => {
for (let i = 0; i < out.size; i++) {
for (let j = 0; j < t.shape[axis]; j++) {
const idx = Math.floor(i / stride) * t.shape[axis] * stride + j * stride + i % stride;
t.grad[idx] += out.grad[i];
}
}
};
return out;
}
function mean(t, axis, keepdims = false) {
const N = axis === void 0 ? t.size : t.shape[axis];
const sum_val = sum(t, axis, keepdims);
const out = div(sum_val, N);
out._prev = /* @__PURE__ */ new Set([t]);
return out;
}
function conv2d(X, F, st = 1, pad = 0) {
assert(X.dim === 4, () => `conv2d: inuput x must be 4d`);
assert(F.dim === 4, () => `conv2d: filter f must be 4d`);
assert(F.shape[1] === F.shape[2], () => `conv2d: kernels must be square`);
assert(X.shape[3] === F.shape[3], () => `conv2d: Input channels (${X.shape[3]}) must match kernel input channels (${F.shape[3]}`);
assert(st > 0 && Number.isInteger(st), () => `conv2d: stride must be > 0`);
assert(pad >= 0 && Number.isInteger(pad), () => `conv2d: padding must be >= 0`);
const batch_size = X.shape[0];
const H = X.shape[1];
const W = X.shape[2];
const C_IN = X.shape[3];
const C_OUT = F.shape[0];
const FS = F.shape[1];
const H_OUT = Math.floor((H - FS + 2 * pad) / st) + 1;
const W_OUT = Math.floor((W - FS + 2 * pad) / st) + 1;
if (H_OUT <= 0 || W_OUT <= 0) {
throw new Error(`Conv2d: invalid output dims. Check input, filter, stride or padding. H_OUT ${H_OUT} and W_OUT ${W_OUT}`);
}
const outShape = [batch_size, H, W, C_OUT];
const out = new Val(outShape);
for (let batch = 0; batch < batch_size; batch++) {
for (let h_ = 0; h_ < H_OUT; h_++) {
for (let w_ = 0; w_ < W_OUT; w_++) {
for (let c_out = 0; c_out < C_OUT; c_out++) {
const h_start = h_ * st - pad;
const w_start = w_ * st - pad;
let sum2 = 0;
for (let f = 0; f < FS * FS; f++) {
const fh = Math.floor(f / FS);
const fw = f % FS;
for (let c_in = 0; c_in < C_IN; c_in++) {
const h_in_idx = h_start + fh;
const w_in_idx = w_start + fw;
if (h_in_idx >= 0 && h_in_idx < H && w_in_idx >= 0 && w_in_idx < W) {
const x_idx = batch * (H * W * C_IN) + h_in_idx * (W * C_IN) + w_in_idx * C_IN + c_in;
const w_idx = c_out * (FS * FS * C_IN) + fh * (FS * C_IN) + fw * C_IN + c_in;
sum2 += X.data[x_idx] * F.data[w_idx];
}
}
}
const out_idx = batch * (H_OUT * W_OUT * C_OUT) + h_ * (W_OUT * C_OUT) + w_ * C_OUT + c_out;
out.data[out_idx] = sum2;
}
}
}
}
out._prev = /* @__PURE__ */ new Set([X, F]);
out._backward = () => {
const dL_dOUT = out.grad;
if (!X.grad || X.grad.length !== X.size) {
console.warn(`conv2d backward: init grad for input X (shape ${X.shape})`);
X.grad = new Float64Array(X.size).fill(0);
}
if (!F.grad || F.grad.length !== F.size) {
console.warn(`conv2d backward: init grad for weights F(shape ${F.shape})`);
F.grad = new Float64Array(F.size).fill(0);
}
for (let batch = 0; batch < batch_size; batch++) {
for (let h_ = 0; h_ < H_OUT; h_++) {
for (let w_ = 0; w_ < W_OUT; w_++) {
for (let c_out = 0; c_out < C_OUT; c_out++) {
const out_grad_idx = batch * (H_OUT * W_OUT * C_OUT) + h_ * (W_OUT * C_OUT) + w_ * C_OUT + c_out;
const grad_val = dL_dOUT[out_grad_idx];
if (grad_val === 0) continue;
const h_start = h_ * st - pad;
const w_start = w_ * st - pad;
for (let f = 0; f < FS * FS; f++) {
const fh = Math.floor(f / FS);
const fw = f % FS;
for (let c_in = 0; c_in < C_IN; c_in++) {
const h_in_idx = h_start + fh;
const w_in_idx = w_start + fw;
if (h_in_idx >= 0 && h_in_idx < H && w_in_idx >= 0 && w_in_idx < W) {
const x_idx = batch * (H * W * C_IN) + h_in_idx * (W * C_IN) + w_in_idx * C_IN + c_in;
const w_idx = c_out * (FS * FS * C_IN) + fh * (FS * C_IN) + fw * C_IN + c_in;
if (w_idx < F.grad.length) {
F.grad[w_idx] += X.data[x_idx] * grad_val;
}
if (x_idx < X.grad.length) {
X.grad[x_idx] += F.data[w_idx] * grad_val;
}
}
}
}
}
}
}
}
};
return out;
}
function maxPool2d(X, pool_size, stride) {
assert(X.dim === 4, () => `maxPool2d input must be 4D (NHWC). Got ${X.dim}D.`);
assert(pool_size > 0 && Number.isInteger(pool_size), () => "pool_size must be a positive integer.");
assert(stride > 0 && Number.isInteger(stride), () => "stride must be a positive integer.");
const B = X.shape[0];
const H_in = X.shape[1];
const W_in = X.shape[2];
const C = X.shape[3];
const H_out = Math.floor((H_in - pool_size) / stride) + 1;
const W_out = Math.floor((W_in - pool_size) / stride) + 1;
if (H_out <= 0 || W_out <= 0) {
throw new Error(`maxPool2d results in non-positive output dimension. H_out=${H_out}, W_out=${W_out}. Input: ${H_in}x${W_in}, Pool: ${pool_size}, Stride: ${stride}`);
}
const outShape = [B, H_out, W_out, C];
const Y_data = new Float64Array(B * H_out * W_out * C);
const argmax_indices = new Uint32Array(B * H_out * W_out * C);
for (let b = 0; b < B; b++) {
for (let c = 0; c < C; c++) {
for (let h_o = 0; h_o < H_out; h_o++) {
for (let w_o = 0; w_o < W_out; w_o++) {
const h_start = h_o * stride;
const w_start = w_o * stride;
let max_val = -Infinity;
let max_idx_flat = -1;
for (let ph = 0; ph < pool_size; ph++) {
for (let pw = 0; pw < pool_size; pw++) {
const h_curr = h_start + ph;
const w_curr = w_start + pw;
const x_idx_flat = b * (H_in * W_in * C) + h_curr * (W_in * C) + w_curr * C + c;
const val = X.data[x_idx_flat];
if (val > max_val) {
max_val = val;
max_idx_flat = x_idx_flat;
}
}
}
const y_idx_flat = b * (H_out * W_out * C) + h_o * (W_out * C) + w_o * C + c;
Y_data[y_idx_flat] = max_val;
argmax_indices[y_idx_flat] = max_idx_flat;
}
}
}
}
const Y = new Val(outShape);
Y.data = Y_data;
Y._prev = /* @__PURE__ */ new Set([X]);
Y._backward = () => {
if (!X.grad || X.grad.length !== X.size) {
X.grad = new Float64Array(X.size).fill(0);
}
for (let i = 0; i < Y.grad.length; i++) {
const x_idx_to_update = argmax_indices[i];
if (x_idx_to_update !== -1) {
X.grad[x_idx_to_update] += Y.grad[i];
}
}
};
return Y;
}
// src/layers.ts
var Module = class _Module {
// Properties to cache the last pre-activation (Z) and post-activation (A) outputs
last_Z = null;
last_A = null;
parameters() {
let params = [];
for (const key in this) {
const prop = this[key];
if (prop instanceof Val) {
params.push(prop);
} else if (prop instanceof _Module) {
params = params.concat(prop.parameters());
} else if (Array.isArray(prop)) {
prop.forEach((item) => {
if (item instanceof _Module) {
params = params.concat(item.parameters());
} else if (item instanceof Val) {
params.push(item);
}
});
}
}
return [...new Set(params)];
}
zeroGrad() {
this.parameters().forEach((p) => p.grad.fill(0));
}
forward(X) {
throw new Error("Forward method not implemented.");
}
toJSON() {
throw new Error(`toJSON() not implemented for ${this.constructor.name}`);
}
};
var Dense = class extends Module {
W;
B;
activation;
nin;
nout;
constructor(nin, nout, activation) {
super();
this.nin = nin;
this.nout = nout;
this.W = new Val([nin, nout]);
this.W.data = this.W.data.map(() => gaussianRandom() * Math.sqrt(2 / nin));
this.B = new Val([1, nout], 0.1);
this.activation = activation;
}
forward(X_input) {
let X = X_input;
if (X_input.dim === 1) {
X = X.reshape([1, X.shape[0]]);
}
assert(X.dim === 2, () => `Dense layer expects dim 1 or 2, got ${X_input.dim}`);
assert(X.shape[1] === this.nin, () => `Input features ${X.shape[1]} don't match layer input size ${this.nin}`);
const Z = add(dot(X, this.W), this.B);
const A = this.activation ? this.activation(Z) : Z;
this.last_Z = Z;
this.last_A = A;
return A;
}
toJSON() {
return {
layerType: "Dense",
nin: this.nin,
nout: this.nout,
activation: this.activation?.name || "none",
weights: Array.from(this.W.data),
biases: Array.from(this.B.data)
};
}
};
var Conv = class extends Module {
kernel;
biases;
stride;
padding;
activation;
in_channels;
out_channels;
kernel_size;
constructor(in_channels, out_channels, kernel_size, stride, padding, activation) {
super();
this.in_channels = in_channels;
this.out_channels = out_channels;
this.kernel_size = kernel_size;
this.stride = stride;
this.padding = padding;
this.activation = activation;
this.kernel = new Val([out_channels, kernel_size, kernel_size, in_channels]);
const fan_in = in_channels * kernel_size * kernel_size;
this.kernel.data = this.kernel.data.map(() => gaussianRandom() * Math.sqrt(2 / fan_in));
this.biases = new Val([out_channels], 0.1);
}
forward(X_input) {
let X = X_input;
assert(X.dim === 4, () => `Conv2DLayer: Input must be 4D. got ${X.dim} dims with shape : ${X.shape}`);
const Z_conv = conv2d(X, this.kernel, this.stride, this.padding);
const B_batch = X.shape[0];
const H_out = Z_conv.shape[1];
const W_out = Z_conv.shape[2];
const Z_with_bias = new Val([B_batch, H_out, W_out, this.out_channels]);
for (let b = 0; b < B_batch; b++) {
for (let h = 0; h < H_out; h++) {
for (let w = 0; w < W_out; w++) {
for (let c = 0; c < this.out_channels; c++) {
const z_conv_idx = b * (H_out * W_out * this.out_channels) + h * (W_out * this.out_channels) + w * this.out_channels + c;
Z_with_bias.data[z_conv_idx] = Z_conv.data[z_conv_idx] + this.biases.data[c];
}
}
}
}
Z_with_bias._prev = /* @__PURE__ */ new Set([Z_conv, this.biases]);
Z_with_bias._backward = () => {
if (!Z_conv.grad || Z_conv.grad.length !== Z_conv.size) Z_conv.grad = new Float64Array(Z_conv.size).fill(0);
for (let i = 0; i < Z_with_bias.grad.length; i++) {
Z_conv.grad[i] += Z_with_bias.grad[i];
}
if (!this.biases.grad || this.biases.grad.length !== this.biases.size) this.biases.grad = new Float64Array(this.biases.size).fill(0);
for (let b = 0; b < B_batch; b++) {
for (let h = 0; h < H_out; h++) {
for (let w = 0; w < W_out; w++) {
for (let c = 0; c < this.out_channels; c++) {
const z_with_bias_grad_idx = b * (H_out * W_out * this.out_channels) + h * (W_out * this.out_channels) + w * this.out_channels + c;
this.biases.grad[c] += Z_with_bias.grad[z_with_bias_grad_idx];
}
}
}
}
};
const A = this.activation ? this.activation(Z_with_bias) : Z_with_bias;
this.last_Z = Z_with_bias;
this.last_A = A;
return A;
}
toJSON() {
return {
layerType: "Conv2D",
in_channels: this.in_channels,
out_channels: this.out_channels,
kernel_size: this.kernel_size,
stride: this.stride,
padding: this.padding,
activation: this.activation?.name || "none",
kernels: Array.from(this.kernel.data),
biases: Array.from(this.biases.data)
};
}
};
var Flatten = class extends Module {
forward(X) {
assert(X.dim > 1, () => `FlattenLayer expects input with at least 2 dimensions.`);
if (X.dim === 2) {
this.last_Z = X;
this.last_A = X;
return X;
}
const batchSize = X.shape[0];
const features = X.size / batchSize;
const Y = X.reshape([batchSize, features]);
this.last_Z = Y;
this.last_A = Y;
return Y;
}
toJSON() {
return {
layerType: "Flatten"
};
}
};
var MaxPool2D = class extends Module {
pool_size;
stride;
constructor(pool_size, stride) {
super();
this.pool_size = pool_size;
this.stride = stride ?? pool_size;
}
forward(X) {
const Y = maxPool2d(X, this.pool_size, this.stride);
this.last_Z = Y;
this.last_A = Y;
return Y;
}
toJSON() {
return {
layerType: "MaxPooling2D",
pool_size: this.pool_size,
stride: this.stride
};
}
};
// src/model.ts
var model_exports = {};
__export(model_exports, {
Sequential: () => Sequential
});
var Sequential = class extends Module {
layers;
constructor(...layers) {
super();
this.layers = layers;
}
forward(X) {
let currentOutput = X;
for (const layer of this.layers) {
currentOutput = layer.forward(currentOutput);
}
this.last_A = currentOutput;
return currentOutput;
}
// Performs a forward pass and returns the intermediate pre- and post-activation
// outputs of each layer in the sequence.
getLayerOutputs(X) {
this.forward(X);
const outputs = this.layers.map((layer) => ({
Z: layer.last_Z,
A: layer.last_A
}));
return outputs;
}
toJSON() {
const modelJSON = {
modelType: "Sequential",
layers: this.layers.map((layer) => layer.toJSON())
};
return modelJSON;
}
};
// src/loss.ts
var loss_exports = {};
__export(loss_exports, {
crossEntropyLoss_binary: () => crossEntropyLoss_binary,
crossEntropyLoss_categorical: () => crossEntropyLoss_categorical,
crossEntropyLoss_softmax: () => crossEntropyLoss_softmax,
meanSquaredErrorLoss: () => meanSquaredErrorLoss
});
function meanSquaredErrorLoss(y_pred, y_true) {
return div(sum(pow(sub(y_pred, y_true), 2)), y_true.size);
}
function crossEntropyLoss_binary(y_pred, y_true, e = 1e-9) {
if (y_pred.shape.join(",") !== y_true.shape.join(",")) {
throw new Error(`Shape mismatch for BCE Loss: pred ${y_pred.shape}, true ${y_true.shape}`);
}
const batch_size = y_true.shape[0] || 1;
const t1 = add(y_pred, e);
const t2 = add(sub(1, y_pred), e);
let total_sum = sum(add(
mul(y_true, log(t1)),
mul(sub(1, y_true), log(t2))
));
if (batch_size === 0) return new Val([], 0);
let avg_loss = mul(-1 / batch_size, total_sum);
return avg_loss;
}
function crossEntropyLoss_categorical(y_pred, y_true) {
if (y_pred.shape.join(",") !== y_true.shape.join(",")) {
throw new Error(`Shape mismatch for Cross-Entropy Loss. Pred: [${y_pred.shape}], True: [${y_true.shape}]`);
}
const epsilon = 1e-9;
const log_pred = log(add(y_pred, epsilon));
const product = mul(y_true, log_pred);
const negatedSum = mul(sum(product, 1), -1);
return mean(negatedSum);
}
function crossEntropyLoss_softmax(logits, y_true) {
if (logits.dim !== 2 || y_true.dim !== 2 || logits.shape.join(",") !== y_true.shape.join(",")) {
throw new Error(`Shape mismatch for softmax `);
}
const batchSize = logits.shape[0];
const numClasses = logits.shape[1];
const probs = new Val([batchSize, numClasses]);
for (let b = 0; b < batchSize; b++) {
const rawOffset = b * numClasses;
let max_logit = -Infinity;
for (let j = 0; j < numClasses; j++) {
if (logits.data[rawOffset + j] > max_logit) {
max_logit = logits.data[rawOffset + j];
}
}
let sum_exps = 0;
for (let j = 0; j < numClasses; j++) {
const exp_val = Math.exp(logits.data[rawOffset + j] - max_logit);
probs.data[rawOffset + j] = exp_val;
sum_exps += exp_val;
}
for (let j = 0; j < numClasses; j++) {
probs.data[rawOffset + j] /= sum_exps;
}
}
const epsilon = 1e-9;
let totalLoss = 0;
for (let i = 0; i < y_true.data.length; i++) {
if (y_true.data[i] === 1) {
totalLoss += -Math.log(probs.data[i] + epsilon);
}
}
const avgLoss = totalLoss / batchSize;
const lossVal = new Val([], avgLoss);
lossVal._prev = /* @__PURE__ */ new Set([logits, y_true]);
lossVal._backward = () => {
const dL_dLogits = new Float64Array(logits.size);
for (let i = 0; i < logits.size; i++) {
dL_dLogits[i] = (probs.data[i] - y_true.data[i]) / batchSize;
}
if (!logits.grad || logits.grad.length !== logits.size) {
logits.grad = new Float64Array(logits.size).fill(0);
}
for (let i = 0; i < logits.size; i++) {
logits.grad[i] += dL_dLogits[i];
}
};
return lossVal;
}
// src/state_management.ts
var state_management_exports = {};
__export(state_management_exports, {
advanceEpoch: () => advanceEpoch,
endTraining: () => endTraining,
getIsPaused: () => getIsPaused,
getIsTraining: () => getIsTraining,
getStopTraining: () => getStopTraining,
getTrainingContext: () => getTrainingContext,
requestPause: () => requestPause,
requestResume: () => requestResume,
requestStopTraining: () => requestStopTraining,
setTrainingState: () => setTrainingState,
setupTrainingContext: () => setupTrainingContext,
startTraining: () => startTraining
});
// src/train.ts
function createBatchVal(ogVal, batchIndices, currentBatchSize, features) {
const batchShape = [currentBatchSize, ...ogVal.shape.slice(1)];
const batchVal = new Val(batchShape);
for (let k = 0; k < currentBatchSize; k++) {
const ogIdx = batchIndices[k];
const sourceOffset = ogIdx * features;
const destOffset = k * features;
batchVal.data.set(ogVal.data.subarray(sourceOffset, sourceOffset + features), destOffset);
}
return batchVal;
}
function* getMiniBatch(X, Y, batchSize, shuffle = true) {
const numSamples = X.shape[0];
const indices = Array.from({ length: numSamples }, (_, i) => i);
if (shuffle) {
for (let i = indices.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[indices[i], indices[j]] = [indices[j], indices[i]];
}
}
const xFeatures = X.size / numSamples;
const yFeatures = Y.size / numSamples;
for (let i = 0; i < numSamples; i += batchSize) {
const batchIndices = indices.slice(i, i + batchSize);
const currentBatchSize = batchIndices.length;
const xBatchVal = createBatchVal(X, batchIndices, currentBatchSize, xFeatures);
const yBatchVal = createBatchVal(Y, batchIndices, currentBatchSize, yFeatures);
yield { x: xBatchVal, y: yBatchVal };
}
}
function getActSample(batch) {
const sampleX = new Val(batch.x.shape.slice(1)).reshape([1, ...batch.x.shape.slice(1)]);
sampleX.data = batch.x.data.slice(0, batch.x.size / batch.x.shape[0]);
let sampleY_label = -1;
const y_features = batch.y.size / batch.y.shape[0];
if (y_features === 1) {
sampleY_label = batch.y.data[0];
} else {
const y_sample_data = batch.y.data.slice(0, y_features);
sampleY_label = y_sample_data.indexOf(1);
}
return [sampleX, sampleY_label];
}
async function trainModel(model, X_train, Y_train, params, messenger) {
const { loss_fn, l_rate, epochs, batch_size, multiClass } = params;
console.log(`----starting training. ${epochs} epochs, batch size ${batch_size}----`);
let totalProcessingTime = 0;
let iteration = 0;
try {
for (let e = 0; e < epochs; e++) {
const batchGenerator = getMiniBatch(X_train, Y_train, batch_size);
let batch_idx = 0;
for (const batch of batchGenerator) {
while (getIsPaused()) {
if (getStopTraining()) {
console.log("training stopped during pause");
return;
}
await new Promise((resolve) => setTimeout(resolve, 200));
}
if (getStopTraining()) {
console.log(`Training stopped at epoch ${e}`);
return;
}
const iterStartTime = performance.now();
const { x: X_batch, y: Y_batch } = batch;
model.zeroGrad();
const Y_pred = model.forward(X_batch);
const loss = loss_fn(Y_pred, Y_batch);
loss.backward();
const params2 = model.parameters();
for (const p of params2) {
if (!p.grad || p.data.length !== p.grad.length) {
console.warn(`Skipping update for parameter due to missing/mismatched gradient at iteration ${e}. Param shape: ${p.shape}`);
continue;
}
let hasInvalidGrad = false;
for (const gradVal of p.grad) {
if (isNaN(gradVal) || !isFinite(gradVal)) {
console.warn("Invalid gradient found. halting update for this batch", p);
hasInvalidGrad = true;
break;
}
}
if (hasInvalidGrad) continue;
for (let j = 0; j < p.data.length; j++) {
if (j < p.grad.length) {
p.data[j] -= l_rate * p.grad[j];
}
}
}
const iterEndTime = performance.now();
const iterTime = iterEndTime - iterStartTime;
totalProcessingTime += iterTime;
iteration++;
if (messenger) {
assert(model instanceof Sequential, () => `Model is not an instance of sequential.`);
let accuracy = multiClass ? calcMultiClassAccuracy(Y_pred, Y_batch) : calcBinaryAccuracy(Y_pred, Y_batch);
const [sampleX, sampleY_label] = getActSample(batch);
const rawLayerOutputs = model.getLayerOutputs(sampleX).map((val) => ({
Zdata: val["Z"]?.data.buffer,
Zshape: val["Z"]?.shape,
Adata: val["A"]?.data.buffer,
Ashape: val["A"]?.shape
}));
const transferableBuffersSet = /* @__PURE__ */ new Set();
rawLayerOutputs.forEach((layer) => {
if (layer.Zdata) transferableBuffersSet.add(layer.Zdata);
if (layer.Adata) transferableBuffersSet.add(layer.Adata);
});
transferableBuffersSet.add(sampleX.data.buffer);
const transferableBuffers = Array.from(transferableBuffersSet);
messenger.postMessage({
type: "batchEnd",
epoch: e,
batch_idx: iteration,
loss: loss.data[0],
accuracy,
iterTime,
visData: {
sampleX: {
data: sampleX.data.buffer,
shape: sampleX.shape
},
sampleY_label,
layerOutputs: rawLayerOutputs
}
}, transferableBuffers);
}
batch_idx++;
}
}
console.log(`Training finished.`);
console.log(`Total processing time: ${(totalProcessingTime / 1e3).toFixed(3)}s over ${iteration} iterations.`);
console.log(`Average time per batch: ${(totalProcessingTime / iteration / 1e3).toFixed(4)}s`);
console.log(model);
} catch (error) {
console.error("Error during training loop execution:", error);
throw error;
} finally {
endTraining();
}
}
async function trainSingleBatch(messenger) {
const C = getTrainingContext();
if (getStopTraining() || getIsPaused() || !C.model || !C.batchGenerator || !C.params) {
if (getStopTraining()) {
endTraining();
messenger.postMessage({ type: "complete", reason: "stopped by user" });
return;
}
return;
}
C.iteration++;
const iterStartTime = performance.now();
const batchResult = C.batchGenerator.next();
if (batchResult.done) {
if (advanceEpoch()) {
messenger.postMessage({ type: "epochEnd", epoch: C.currentEpoch });
} else {
messenger.postMessage({ type: "complete", reason: "All epochs finished" });
}
return;
}
const batch = batchResult.value;
const { x: X_batch, y: Y_batch } = batch;
const { loss_fn, l_rate, epochs, batch_size, multiClass } = C.params;
C.model.zeroGrad();
const Y_pred = C.model.forward(X_batch);
const loss = loss_fn(Y_pred, Y_batch);
loss.backward();
const modelParams = C.model.parameters();
for (const p of modelParams) {
if (!p.grad || p.data.length !== p.grad.length) {
console.warn(`Skipping update for parameter due to missing/mismatched gradient. Param shape: ${p.shape}`);
continue;
}
let hasInvalidGrad = false;
for (const gradVal of p.grad) {
if (isNaN(gradVal) || !isFinite(gradVal)) {
console.warn("Invalid gradient found. halting update for this batch", p);
hasInvalidGrad = true;
break;
}
}
if (hasInvalidGrad) continue;
for (let j = 0; j < p.data.length; j++) {
if (j < p.grad.length) {
p.data[j] -= l_rate * p.grad[j];
}
}
}
const iterEndTime = performance.now();
const iterTime = iterEndTime - iterStartTime;
assert(C.model instanceof Sequential, () => `Model is not an instance of sequential.`);
let accuracy = multiClass ? calcMultiClassAccuracy(Y_pred, Y_batch) : calcBinaryAccuracy(Y_pred, Y_batch);
const [sampleX, sampleY_label] = getActSample(batch);
const rawLayerOutputs = C.model.getLayerOutputs(sampleX).map((val) => ({
Zdata: val["Z"]?.data.buffer,
Zshape: val["Z"]?.shape,
Adata: val["A"]?.data.buffer,
Ashape: val["A"]?.shape
}));
const transferableBuffersSet = /* @__PURE__ */ new Set();
rawLayerOutputs.forEach((layer) => {
if (layer.Zdata) transferableBuffersSet.add(layer.Zdata);
if (layer.Adata) transferableBuffersSet.add(layer.Adata);
});
transferableBuffersSet.add(sampleX.data.buffer);
const transferableBuffers = Array.from(transferableBuffersSet);
messenger.postMessage({
type: "batchEnd",
epoch: C.currentEpoch,
batch_idx: C.iteration,
loss: loss.data[0],
accuracy,
iterTime,
visData: {
sampleX: {
data: sampleX.data.buffer,
shape: sampleX.shape
},
sampleY_label,
layerOutputs: rawLayerOutputs
}
}, transferableBuffers);
}
// src/state_management.ts
var isTraining = false