@tensorflow/tfjs-core
Version:
Hardware-accelerated JavaScript library for machine intelligence
169 lines (151 loc) • 5.78 kB
text/typescript
/**
* @license
* Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
import {dispose} from '../globals';
import {variableGrads} from '../gradients';
import {scalar} from '../ops/ops';
import {Serializable} from '../serialization';
import {Scalar, Variable} from '../tensor';
import {NamedTensor, NamedTensorMap} from '../tensor_types';
/**
* A variable that belongs to an optimizer.
*
* The `originalName` field is required for keeping track of the canonical
* name of the variable, which is usually the name of the model weight that
* the variable is related to plus a suffix, e.g., 'dense1/kernel/momentum'.
* The name of the `Variable` object itself cannot be used directly due to
* possible deduplication: Every `Variable` must have a unique name but more
* than one optimizer objects of the same type may be created for the same model
* or the same `Variable`.
*/
export interface OptimizerVariable {
originalName: string;
variable: Variable;
}
/** @doc {heading: 'Training', subheading: 'Classes', namespace: 'train'} */
export abstract class Optimizer extends Serializable {
protected iterations_: number;
/**
* Executes `f()` and minimizes the scalar output of `f()` by computing
* gradients of y with respect to the list of trainable variables provided by
* `varList`. If no list is provided, it defaults to all trainable variables.
*
* @param f The function to execute and whose output to minimize.
* @param returnCost Whether to return the scalar cost value produced by
* executing `f()`.
* @param varList An optional list of variables to update. If specified, only
* the trainable variables in varList will be updated by minimize. Defaults to
* all trainable variables.
*/
/** @doc {heading: 'Training', subheading: 'Optimizers'} */
minimize(f: () => Scalar, returnCost = false, varList?: Variable[]): Scalar
|null {
const {value, grads} = this.computeGradients(f, varList);
if (varList != null) {
const gradArray: NamedTensor[] =
varList.map(v => ({name: v.name, tensor: grads[v.name]}));
this.applyGradients(gradArray);
} else {
this.applyGradients(grads);
}
// Dispose gradients.
dispose(grads);
if (returnCost) {
return value as Scalar;
} else {
value.dispose();
return null;
}
}
/**
* The number of iterations that this optimizer instance has been invoked for.
*/
get iterations(): number {
if (this.iterations_ == null) {
this.iterations_ = 0;
}
return this.iterations_;
}
protected incrementIterations() {
this.iterations_ = this.iterations + 1;
}
/**
* Executes f() and computes the gradient of the scalar output of f() with
* respect to the list of trainable variables provided by `varList`. If no
* list is provided, it defaults to all trainable variables.
*
* @param f The function to execute and whose output to use for computing
* gradients with respect to variables.
* @param varList An optional list of variables to compute gradients with
* respect to. If specified, only the trainable variables in varList will have
* gradients computed with respect to. Defaults to all trainable variables.
*/
computeGradients(f: () => Scalar, varList?: Variable[]):
{value: Scalar, grads: NamedTensorMap} {
return variableGrads(f, varList);
}
/**
* Updates variables by using the computed gradients.
*
* @param variableGradients A mapping of variable name to its gradient value.
*/
abstract applyGradients(variableGradients: NamedTensorMap|
NamedTensor[]): void;
/**
* Dispose the variables (if any) owned by this optimizer instance.
*/
dispose(): void {
if (this.iterations_ != null) {
dispose(this.iterations_);
}
}
async saveIterations(): Promise<NamedTensor> {
if (this.iterations_ == null) {
this.iterations_ = 0;
}
return {
name: 'iter', // Named for Python compatibility.
// TODO(cais): Use 'int64' type when available.
tensor: scalar(this.iterations_, 'int32')
};
}
async getWeights(): Promise<NamedTensor[]> {
throw new Error('getWeights() is not implemented for this optimizer yet.');
}
async setWeights(weightValues: NamedTensor[]): Promise<void> {
throw new Error(
`setWeights() is not implemented for this optimizer class ` +
`${this.getClassName()}`);
}
/**
* Extract the first element of the weight values and set it
* as the iterations counter variable of this instance of optimizer.
*
* @param weightValues
* @returns Weight values with the first element consumed and excluded.
*/
protected async extractIterations(weightValues: NamedTensor[]):
Promise<NamedTensor[]> {
this.iterations_ = (await weightValues[0].tensor.data())[0];
return weightValues.slice(1);
}
}
Object.defineProperty(Optimizer, Symbol.hasInstance, {
value: (instance: Optimizer) => {
return instance.minimize != null && instance.computeGradients != null &&
instance.applyGradients != null;
}
});