UNPKG

@stdlib/ml

Version:

Machine learning algorithms.

636 lines (578 loc) 16.5 kB
/** * @license Apache-2.0 * * Copyright (c) 2018 The Stdlib Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* eslint-disable no-restricted-syntax, no-invalid-this */ 'use strict'; // MODULES // var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); var setReadOnlyAccessor = require( '@stdlib/utils/define-nonenumerable-read-only-accessor' ); var format = require( '@stdlib/string/format' ); var gdot = require( '@stdlib/blas/base/gdot' ).ndarray; var gaxpy = require( '@stdlib/blas/base/gaxpy' ).ndarray; var dcopy = require( '@stdlib/blas/base/dcopy' ); var dscal = require( '@stdlib/blas/base/dscal' ); var max = require( '@stdlib/math/base/special/max' ); var exp = require( '@stdlib/math/base/special/exp' ); var pow = require( '@stdlib/math/base/special/pow' ); var sigmoid = require( '@stdlib/math/base/special/expit' ); var Float64Array = require( '@stdlib/array/float64' ); var ndarray = require( '@stdlib/ndarray/ctor' ); var shape2strides = require( '@stdlib/ndarray/base/shape2strides' ); var numel = require( '@stdlib/ndarray/base/numel' ); var vind2bind = require( '@stdlib/ndarray/base/vind2bind' ); // VARIABLES // var MIN_SCALING_FACTOR = 1.0e-7; var MIN_SCALE = 1.0e-11; var LEARNING_RATE_METHODS = { 'basic': '_basicLearningRate', 'constant': '_constantLearningRate', 'invscaling': '_inverseScalingLearningRate', 'pegasos': '_pegasosLearningRate' }; var LOSS_METHODS = { 'hinge': '_hingeLoss', 'log': '_logLoss', 'modifiedHuber': '_modifiedHuberLoss', 'perceptron': '_perceptronLoss', 'squaredHinge': '_squaredHingeLoss' }; // MAIN // /** * Model constructor. * * ## Notes * * - The model (weight vector) implementation is inspired by the [sofia-ml][sofia-ml] library. * * [sofia-ml]: https://code.google.com/archive/p/sofia-ml/ * * @private * @constructor * @param {PositiveInteger} N - number of feature weights (excluding bias/intercept term) * @param {Options} opts - model options * @param {PositiveNumber} opts.lambda - regularization parameter * @param {ArrayLikeObject} opts.learningRate - learning rate function and associated parameters * @param {string} opts.loss - loss function * @param {boolean} opts.intercept - boolean indicating whether to include an intercept * @returns {Model} model */ function Model( N, opts ) { var len; // Set internal properties: this._N = N; this._opts = opts; this._scaleFactor = 1.0; this._t = 0; // iteration counter (i.e., number of updates) // Determine the learning rate function: this._learningRateMethod = LEARNING_RATE_METHODS[ opts.learningRate[ 0 ] ]; // Determine the loss function: this._lossMethod = LOSS_METHODS[ opts.loss ]; // Determine the number of model coefficients: len = N; if ( opts.intercept ) { len += 1; } // Initialize a model weight vector with all weights set to zero: this._weights = new Float64Array( len ); // Initialize model coefficients to zero: this._coefficients = new ndarray( 'float64', new Float64Array( len ), [ len ], [ 1 ], 0, 'row-major' ); return this; } /** * Adds a provided input vector to the model weight vector. * * @private * @name _add * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - input vector * @param {number} scale - scale factor * @returns {Model} model instance */ setReadOnly( Model.prototype, '_add', function add( x, scale ) { var s = scale / this._scaleFactor; var w = this._weights; // Scale `x` and add to the model weight vector: gaxpy( x.shape[ 0 ], s, x.data, x.strides[ 0 ], x.offset, w, 1, 0 ); // If an intercept is assumed, treat `x` as containing one additional element equal to one... if ( this._opts.intercept ) { w[ this._N ] += s; } return this; }); /** * Computes a learning rate. * * ## Notes * * - This learning rate function is based on the learning rate function of the same name in the [sofia-ml][sofia-ml] library. * * [sofia-ml]: https://code.google.com/archive/p/sofia-ml/ * * @private * @name _basicLearningRate * @memberof Model.prototype * @type {Function} * @returns {number} learning rate */ setReadOnly( Model.prototype, '_basicLearningRate', function basic() { return 10.0 / ( 10.0+this._t ); }); /** * Returns a constant learning rate. * * @private * @name _constantLearningRate * @memberof Model.prototype * @type {Function} * @returns {number} learning rate */ setReadOnly( Model.prototype, '_constantLearningRate', function constant() { return this._opts.learningRate[ 1 ]; }); /** * Calculates the dot product of the model weight vector and a provided vector `x`. * * @private * @name _dot * @memberof Model.prototype * @type {Function} * @param {NumericArray} buf - ndarray data buffer * @param {integer} stride - stride * @param {NonNegativeInteger} offset - index offset * @returns {number} dot product */ setReadOnly( Model.prototype, '_dot', function dot( buf, stride, offset ) { var v = gdot( this._N, this._weights, 1, 0, buf, stride, offset ); if ( this._opts.intercept ) { v += this._weights[ this._N ]; } v *= this._scaleFactor; return v; }); /** * Updates the model weight vector using the hinge loss function. * * ## Notes * * - The hinge loss function is defined as * * ```tex * L(y, f(x)) = \max\{ 0, 1 - y\,f(x) \} * ``` * * where * * ```tex * f(x) = w^T x + b * ``` * * with \\(w\\) being the model weight vector and \\(b\\) being the intercept. * * @private * @name _hingeLoss * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, '_hingeLoss', function hingeLoss( x, y ) { var eta; var d; eta = this[ this._learningRateMethod ](); this._regularize( eta ); d = this._dot( x.data, x.strides[ 0 ], x.offset ); if ( ( y*d ) < 1.0 ) { this._add( x, y*eta ); } return this; }); /** * Computes a learning rate according to an inverse scaling formula. * * ## Notes * * - The inverse scaling formula is defined as * * ```tex * \eta = \frac{\eta_0}{t^{k}} * ``` * * where \\(\eta_0\\) is an initial learning rate, \\(t\\) is the current iteration, and \\(k\\) is an exponent controlling how quickly the learning rate decreases. * * @private * @name _inverseScalingLearningRate * @memberof Model.prototype * @type {Function} * @returns {number} learning rate */ setReadOnly( Model.prototype, '_inverseScalingLearningRate', function invscaling() { var params = this._opts.learningRate; return params[ 1 ] / pow( this._t, params[ 2 ] ); }); /** * Updates the model weight vector using the log loss function. * * ## Notes * * - The log loss function is defined as * * ```tex * L(y, f(x)) = \ln( 1 + \exp( -y\,f(x) ) ) * ``` * * where * * ```tex * f(x) = w^T x + b * ``` * * with \\(w\\) being the model weight vector and \\(b\\) being the intercept. * * @private * @name _logLoss * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, '_logLoss', function logLoss( x, y ) { var loss; var eta; var d; eta = this[ this._learningRateMethod ](); this._regularize( eta ); d = this._dot( x.data, x.strides[ 0 ], x.offset ); loss = y / ( 1.0 + exp( y*d ) ); this._add( x, eta*loss ); return this; }); /** * Updates the model weight vector using the modified Huber loss function. * * ## Notes * * - The modified Huber loss function is defined as * * ```tex * L(y, f(x)) = \begin{cases} * \max(0, 1 - y\,f(x))^2 & \textrm{for}\,\,y\,f(x) \geq -1\\ * -4y\,f(x) & \textrm{otherwise} * \end{cases} * ``` * * where * * ```tex * f(x) = w^T x + b * ``` * * with \\(w\\) being the model weight vector and \\(b\\) being the intercept. * * ## References * * - Zhang, Tong. 2004. "Solving Large Scale Linear Prediction Problems Using Stochastic Gradient Descent Algorithms." In _Proceedings of the Twenty-First International Conference on Machine Learning_, 116. New York, NY, USA: Association for Computing Machinery. doi:[10.1145/1015330.1015332][@zhang:2004a]. * * [@zhang:2004a]: https://doi.org/10.1145/1015330.1015332 * * @private * @name _modifiedHuberLoss * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, '_modifiedHuberLoss', function modifiedHuber( x, y ) { var eta; var d; eta = this[ this._learningRateMethod ](); this._regularize( eta ); d = y * this._dot( x.data, x.strides[ 0 ], x.offset ); if ( d < -1.0 ) { this._add( x, 4.0*eta*y ); } else { this._add( x, eta*( y-(d*y) ) ); } return this; }); /** * Computes a learning rate using Pegasos. * * ## References * * - Shalev-Shwartz, Shai, Yoram Singer, Nathan Srebro, and Andrew Cotter. 2011. "Pegasos: primal estimated sub-gradient solver for SVM." _Mathematical Programming_ 127 (1): 3–30. doi:[10.1007/s10107-010-0420-4][@shalevshwartz:2011a]. * * [@shalevshwartz:2011a]: https://doi.org/10.1007/s10107-010-0420-4 * * @private * @name _pegasos * @memberof Model.prototype * @type {Function} * @returns {number} learning rate */ setReadOnly( Model.prototype, '_pegasosLearningRate', function pegasos() { return 1.0 / ( this._opts.lambda*this._t ); }); /** * Updates the model weight vector using the perceptron loss function. * * ## Notes * * - The perceptron loss function is defined as * * ```tex * L(y, f(x)) = \max(0, -y\,f(x)) * ``` * * where * * ```tex * f(x) = w^T x + b * ``` * * with \\(w\\) being the model weight vector and \\(b\\) being the intercept. * * - The perceptron loss function is equivalent to the hinge loss function without a margin. * * - The perceptron loss function does not update the model weight vector when the response is correctly classified. * * ## References * * - Rosenblatt, Frank. 1957. "The Perceptron–a perceiving and recognizing automaton." 85-460-1. Buffalo, NY, USA: Cornell Aeronautical Laboratory. * * @private * @name _perceptronLoss * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, '_perceptronLoss', function perceptron( x, y ) { var eta; var d; eta = this[ this._learningRateMethod ](); this._regularize( eta ); d = this._dot( x.data, x.strides[ 0 ], x.offset ); if ( ( y*d ) <= 0.0 ) { this._add( x, y*eta ); } return this; }); /** * Performs L2 regularization of the model weights. * * @private * @name _regularize * @memberof Model.prototype * @type {Function} * @param {PositiveNumber} eta - learning rate * @returns {Model} model instance */ setReadOnly( Model.prototype, '_regularize', function regularize( eta ) { var lambda = this._opts.lambda; if ( lambda <= 0.0 ) { return this; } this._scale( max( 1.0-( eta*lambda ), MIN_SCALING_FACTOR ) ); return this; }); /** * Scale the model weight vector by a provided scaling factor. * * @private * @name _scale * @memberof Model.prototype * @type {Function} * @param {number} factor - scaling factor * @throws {RangeError} scaling factor must be a positive number * @returns {Model} model instance */ setReadOnly( Model.prototype, '_scale', function scale( factor ) { var s; if ( factor <= 0.0 ) { throw new RangeError( format( 'invalid argument. Attempting to scale a weight vector by a nonpositive value. This is likely due to too large a value of eta * lambda. Value: `%f`.', factor ) ); } // Check whether we need to scale the weight vector to unity in order to avoid numerical issues... s = this._scaleFactor; if ( s < MIN_SCALE ) { // Note: we only scale/shrink the feature weights, not the intercept... dscal( this._N, s, this._weights, 1 ); this._scaleFactor = 1.0; } this._scaleFactor *= factor; return this; }); /** * Updates the model weight vector using the squared hinge loss function. * * ## Notes * * - The squared hinge loss function is defined as * * ```tex * L(y, f(x)) = \max\{ 0, 1 - y\,f(x) \}^2 * ``` * * where * * ```tex * f(x) = w^T x + b * ``` * * with \\(w\\) being the model weight vector and \\(b\\) being the intercept. * * @private * @name _squaredHingeLoss * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, '_squaredHingeLoss', function squaredHingeLoss( x, y ) { var eta; var d; eta = this[ this._learningRateMethod ](); this._regularize( eta ); d = y * this._dot( x.data, x.strides[ 0 ], x.offset ); if ( d < 1.0 ) { this._add( x, eta*( y-(d*y) ) ); } return this; }); /** * Returns the model coefficients. * * @private * @name coefficients * @memberof Model.prototype * @type {Function} * @returns {ndarray} model coefficients */ setReadOnlyAccessor( Model.prototype, 'coefficients', function coefficients() { var c = this._coefficients.data; var w = this._weights; dcopy( w.length, w, 1, c, 1 ); dscal( this._N, this._scaleFactor, c, 1 ); return this._coefficients; }); /** * Returns the number of model features. * * @private * @name nfeatures * @memberof Model.prototype * @type {PositiveInteger} */ setReadOnlyAccessor( Model.prototype, 'nfeatures', function nfeatures() { return this._N; }); /** * Predicts the response value for one or more observation vectors `X`. * * @private * @name predict * @memberof Model.prototype * @type {Function} * @param {ndarray} X - feature vector * @param {string} type - prediction type * @returns {ndarray} ndarray containing response values */ setReadOnly( Model.prototype, 'predict', function predict( X, type ) { var ndims; var xbuf; var ybuf; var xsh; var ysh; var ord; var ptr; var sxn; var sx; var sy; var ox; var M; var N; var Y; var v; var i; // Cache input array properties in case of lazy evaluation: xbuf = X.data; xsh = X.shape; sx = X.strides; ox = X.offset; ord = X.order; ndims = xsh.length - 1; // The output array shape is the same as the input array shape without the last dimension (i.e., the number of dimensions is reduced by one)... ysh = []; for ( i = 0; i < ndims; i++ ) { ysh.push( xsh[ i ] ); } // Create an output array... if ( ndims === 0 ) { M = 1; ybuf = new Float64Array( 1 ); sy = [ 0 ]; } else { M = numel( ysh ); ybuf = new Float64Array( M ); sy = shape2strides( ysh, ord ); } Y = new ndarray( 'int8', ybuf, ysh, sy, 0, ord ); // Loop over all observation vectors... N = this._N; // number of features (i.e., size of last `X` dimension) sxn = sx[ ndims ]; // stride of the last `X` dimension for ( i = 0; i < M; i++ ) { // Compute the index offset into the underlying data buffer pointing to the start of the current observation vector: ptr = vind2bind( xsh, sx, ox, ord, i*N, 'throw' ); // Compute the dot product of the current observation vector with the model weight vector: v = this._dot( xbuf, sxn, ptr ); // Determine the output value: if ( type === 'label' ) { v = ( v > 0 ) ? 1 : -1; } else if ( type === 'probability' ) { v = sigmoid( v ); } // else type === 'linear' (i.e., linear predictor) // Set the element in the output array: if ( ndims === 0 ) { Y.iset( v ); } else { Y.iset( i, v ); } } return Y; }); /** * Updates a model given a provided observation vector and response value. * * @private * @name update * @memberof Model.prototype * @type {Function} * @param {VectorLike} x - feature vector * @param {integer} y - response value * @returns {Model} model instance */ setReadOnly( Model.prototype, 'update', function update( x, y ) { this._t += 1; return this[ this._lossMethod ]( x, y ); }); // EXPORTS // module.exports = Model;