scalar-autograd
Version:
Scalar-based reverse-mode automatic differentiation in TypeScript.
119 lines (118 loc) • 3.83 kB
TypeScript
import { Value } from "./Value";
/**
* Abstract base class for all optimizers.
* Ensures only requiresGrad parameters are optimized.
*/
export declare abstract class Optimizer {
protected trainables: Value[];
learningRate: number;
/**
* Constructs an Optimizer.
* @param trainables Array of Value parameters to optimize.
* @param learningRate Learning rate for updates.
*/
constructor(trainables: Value[], learningRate: number);
/**
* Performs a parameter update step.
*/
abstract step(): void;
abstract resetStateFor(trainable: Value): void;
/**
* Sets grads of all trainables to zero.
*/
zeroGrad(): void;
/**
* Clips global norm of gradients as regularization.
* @param maxNorm Maximum allowed norm for gradients.
*/
clipGradients(maxNorm: number): void;
}
/**
* Optional arguments for basic optimizers.
* @property learningRate: Overrides the step size for parameter updates (default varies by optimizer).
* @property weightDecay: L2 regularization multiplier (default 0). Ignored for plain SGD.
* @property gradientClip: Maximum absolute value for gradient updates (default 0: no clipping).
*/
export interface OptimizerOptions {
learningRate?: number;
weightDecay?: number;
gradientClip?: number;
}
/**
* Stochastic Gradient Descent (SGD) optimizer. Accepts weightDecay and gradientClip for API consistency (ignored).
*/
export declare class SGD extends Optimizer {
private weightDecay;
private gradientClip;
/**
* Constructs an SGD optimizer.
* @param trainables Array of Value parameters to optimize.
* @param opts Optional parameters (learningRate, weightDecay, gradientClip).
*/
constructor(trainables: Value[], opts?: OptimizerOptions);
/**
* Performs a parameter update using standard SGD.
*/
step(): void;
resetStateFor(trainable: Value): void;
}
/**
* Adam and AdamW optimizer parameters.
* Extends OptimizerOptions.
* @property beta1: Exponential decay rate for 1st moment (default 0.9).
* @property beta2: Exponential decay rate for 2nd moment (default 0.999).
* @property epsilon: Numerical stability fudge factor (default 1e-8).
*/
export interface AdamOptions extends OptimizerOptions {
beta1?: number;
beta2?: number;
epsilon?: number;
}
/**
* Adam optimizer, supports decoupled weight decay and gradient clipping.
*/
export declare class Adam extends Optimizer {
private beta1;
private beta2;
private epsilon;
private weightDecay;
private gradientClip;
private m;
private v;
private stepCount;
/**
* Constructs an Adam optimizer.
* @param trainables Array of Value parameters to optimize.
* @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
*/
constructor(trainables: Value[], opts?: AdamOptions);
/**
* Performs a parameter update using Adam optimization.
*/
step(): void;
resetStateFor(trainable: Value): void;
}
/**
* AdamW optimizer, supports decoupled weight decay and gradient clipping (same options as Adam).
*/
export declare class AdamW extends Optimizer {
private beta1;
private beta2;
private epsilon;
private weightDecay;
private gradientClip;
private m;
private v;
private stepCount;
/**
* Constructs an AdamW optimizer.
* @param trainables Array of Value parameters to optimize.
* @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
*/
constructor(trainables: Value[], opts?: AdamOptions);
/**
* Performs a parameter update using AdamW optimization (decoupled weight decay).
*/
step(): void;
resetStateFor(trainable: Value): void;
}