UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

ai-on-browser.github.io/docs

ai-on-browser/ai-on-browser.github.io

457 lines (404 loc) • 10.8 kB

JavaScript

import Layer, { NeuralnetworkLayerException } from './base.js' import Tensor from '../../../util/tensor.js' import Matrix from '../../../util/matrix.js' /** * GRU layer */ export default class GRULayer extends Layer { /** * @param {object} config object * @param {number} config.size Size of output * @param {boolean} [config.return_sequences] Return sequences or not * @param {number[][] | Matrix | string} [config.w_z] Weight of z from input to sequence * @param {number[][] | Matrix | string} [config.w_r] Weight of r from input to sequence * @param {number[][] | Matrix | string} [config.w_h] Weight of h from input to sequence * @param {number[][] | Matrix | string} [config.u_z] Weight of z from sequence to sequence * @param {number[][] | Matrix | string} [config.u_r] Weight of r from sequence to sequence * @param {number[][] | Matrix | string} [config.u_h] Weight of h from sequence to sequence * @param {number[][] | Matrix | string} [config.b_z] Bias of z of output * @param {number[][] | Matrix | string} [config.b_r] Bias of r of output * @param {number[][] | Matrix | string} [config.b_h] Bias of h of output * @param {number} [config.sequence_dim] Dimension of the timesteps */ constructor({ size, return_sequences = false, w_z = null, w_r = null, w_h = null, u_z = null, u_r = null, u_h = null, b_z = null, b_r = null, b_h = null, sequence_dim = 1, ...rest }) { super(rest) this._size = size this._unit = new GRUUnitLayer({ layer: this, size, w_z, w_r, w_h, u_z, u_r, u_h, b_z, b_r, b_h }) this._return_sequences = return_sequences this._sequence_dim = sequence_dim if (this._sequence_dim !== 0 && this._sequence_dim !== 1) { throw new NeuralnetworkLayerException('Invalid sequence dimension.') } } get dependentLayers() { return this._unit.dependentLayers } calc(x) { if (this._sequence_dim === 1) { x = x.transpose(1, 0, 2) } this._x = [] for (let k = 0; k < x.sizes[0]; k++) { this._x[k] = x.index(k).toMatrix() } const s = [] for (let k = 0; k < this._x.length; k++) { s[k] = this._unit.calc(this._x[k], k) } if (this._return_sequences) { const t = Tensor.fromArray(s.map(v => v.toArray())) return this._sequence_dim === 1 ? t.transpose(1, 0, 2) : t } return s[s.length - 1] } grad(bo) { return this._grad_bptt(bo) } _grad_bptt(bo) { const s = this._x.length this._bo = Array(s) if (this._return_sequences) { if (this._sequence_dim === 1) { bo = bo.transpose(1, 0, 2) } for (let i = 0; i < s; i++) { this._bo[i] = bo.index(i).toMatrix() } } else { this._bo[s - 1] = bo } const bi = [] let gp = null for (let t = s - 1; t >= 0; t--) { const bit = this._unit.grad(this._bo[t], t) if (t === 0 && Array.isArray(bit)) { bi[t] = bit[0] gp = bit[1] } else { bi[t] = bit } } let t = Tensor.fromArray(bi.map(b => b.toArray())) if (this._sequence_dim === 1) { t = t.transpose(1, 0, 2) } return gp ? [t, gp] : t } update(optimizer) { this._unit.update(optimizer) } toObject() { return { type: 'gru', size: this._size, return_sequences: this._return_sequences, sequence_dim: this._sequence_dim, ...this._unit.toObject(), } } } class GRUUnitLayer extends Layer { constructor({ layer, size, w_z = null, w_r = null, w_h = null, u_z = null, u_r = null, u_h = null, b_z = null, b_r = null, b_h = null, ...rest }) { super(rest) this._size = size this._w_z = new Variable(layer, w_z) this._w_r = new Variable(layer, w_r) this._w_h = new Variable(layer, w_h) this._u_z = new Variable(layer, u_z, [this._size, this._size]) this._u_r = new Variable(layer, u_r, [this._size, this._size]) this._u_h = new Variable(layer, u_h, [this._size, this._size]) this._b_z = new Variable(layer, b_z, [1, this._size]) this._b_r = new Variable(layer, b_r, [1, this._size]) this._b_h = new Variable(layer, b_h, [1, this._size]) this._s0 = Matrix.zeros(1, this._size) this._x = [] this._h = [] this._s = [] this._z = [] this._r = [] this._bo = [] this._dy = [] this._dr = [] this._dz = [] this._dh = [] } get dependentLayers() { const layers = [] if (this._w_z.name) { layers.push(this._w_z.name) } if (this._w_r.name) { layers.push(this._w_r.name) } if (this._w_h.name) { layers.push(this._w_h.name) } if (this._u_z.name) { layers.push(this._u_z.name) } if (this._u_r.name) { layers.push(this._u_r.name) } if (this._u_h.name) { layers.push(this._u_h.name) } if (this._b_z.name) { layers.push(this._b_z.name) } if (this._b_r.name) { layers.push(this._b_r.name) } if (this._b_h.name) { layers.push(this._b_h.name) } return layers } _sigmoid(x) { return Matrix.map(x, v => 1 / (1 + Math.exp(-v))) } _grad_sigmoid(y) { return Matrix.map(y, v => v * (1 - v)) } _tanh(x) { return Matrix.map(x, Math.tanh) } _grad_tanh(y) { return Matrix.map(y, v => 1 - v ** 2) } calc(x, k) { if (k === 0) { this._s = [] } this._x[k] = x const pre_s = k === 0 ? this._s0 : this._s[k - 1] const zb = this._x[k].dot(this._w_z.get(x.cols, this._size)) zb.add(pre_s.dot(this._u_z.get())) zb.add(this._b_z.get()) const z = this._sigmoid(zb) this._z[k] = z const rb = this._x[k].dot(this._w_r.get(x.cols, this._size)) rb.add(pre_s.dot(this._u_r.get())) rb.add(this._b_r.get()) const r = this._sigmoid(rb) this._r[k] = r const hb = this._x[k].dot(this._w_h.get(x.cols, this._size)) hb.add(Matrix.mult(r, pre_s).dot(this._u_h.get())) hb.add(this._b_h.get()) const h = this._tanh(hb) this._h[k] = h this._s[k] = Matrix.sub(1, z) this._s[k].mult(h) this._s[k].add(Matrix.mult(z, pre_s)) return this._s[k] } grad(bo, t) { return this._grad_bptt(bo, t) } _grad_bptt(bo, t) { const s = this._s.length this._bo[t] = bo const pre_s = t === 0 ? this._s0 : this._s[t - 1] this._dy[t] = this._bo[t] || Matrix.zeros(1, 1) if (t < s - 1) { this._dy[t].add(this._dz[t + 1].dot(this._u_z.value.t)) this._dy[t].add(this._dr[t + 1].dot(this._u_r.value.t)) this._dy[t].add(Matrix.mult(this._dh[t + 1].dot(this._u_h.value.t), this._r[t + 1])) this._dy[t].add(Matrix.mult(this._z[t + 1], this._dy[t + 1])) } this._dz[t] = Matrix.mult(pre_s, this._dy[t]) this._dz[t].sub(Matrix.mult(this._h[t], this._dy[t])) this._dz[t].mult(this._grad_sigmoid(this._z[t])) this._dh[t] = Matrix.sub(1, this._z[t]) this._dh[t].mult(this._dy[t]) this._dh[t].mult(this._grad_tanh(this._h[t])) this._dr[t] = this._dh[t].dot(this._u_h.value.t) this._dr[t].mult(pre_s) this._dr[t].mult(this._grad_sigmoid(this._r[t])) const bi = this._dh[t].dot(this._w_h.value.t) bi.add(this._dz[t].dot(this._w_z.value.t)) bi.add(this._dr[t].dot(this._w_r.value.t)) if (t === 0) { this._diff_bptt() const gp = {} if (this._w_r.name) { gp[this._w_r.name] = this._dw_r } if (this._w_z.name) { gp[this._w_z.name] = this._dw_z } if (this._w_h.name) { gp[this._w_h.name] = this._dw_h } if (this._b_r.name) { gp[this._b_r.name] = this._db_r } if (this._b_z.name) { gp[this._b_z.name] = this._db_z } if (this._b_h.name) { gp[this._b_h.name] = this._db_h } if (this._u_r.name) { gp[this._u_r.name] = this._du_r } if (this._u_z.name) { gp[this._u_z.name] = this._du_z } if (this._u_h.name) { gp[this._u_h.name] = this._du_h } if (Object.keys(gp).length > 0) { return [bi, gp] } } return bi } _diff_bptt() { const s = this._s.length const n = this._x[0].rows this._dw_r = Matrix.zeros(...this._w_r.sizes) this._dw_z = Matrix.zeros(...this._w_z.sizes) this._dw_h = Matrix.zeros(...this._w_h.sizes) this._db_r = Matrix.zeros(1, this._size) this._db_z = Matrix.zeros(1, this._size) this._db_h = Matrix.zeros(1, this._size) for (let t = 0; t < s; t++) { const dwr = this._x[t].tDot(this._dr[t]) dwr.div(n) this._dw_r.add(dwr) const dwz = this._x[t].tDot(this._dz[t]) dwz.div(n) this._dw_z.add(dwz) const dwh = this._x[t].tDot(this._dh[t]) dwh.div(n) this._dw_h.add(dwh) this._db_r.add(this._dr[t].mean(0)) this._db_z.add(this._dz[t].mean(0)) this._db_h.add(this._dh[t].mean(0)) } this._du_r = Matrix.zeros(this._size, this._size) this._du_z = Matrix.zeros(this._size, this._size) this._du_h = Matrix.zeros(this._size, this._size) for (let t = 0; t < s - 1; t++) { const dur = this._s[t].tDot(this._dr[t + 1]) dur.div(n) this._du_r.add(dur) const duz = this._s[t].tDot(this._dz[t + 1]) duz.div(n) this._du_z.add(duz) const duh = this._s[t].tDot(this._dh[t + 1]) duh.div(n) this._du_h.add(duh) } } update(optimizer) { this._update_bptt(optimizer) } _update_bptt(optimizer) { if (!this._w_r.name) { this._w_r.value.sub(optimizer.delta('w_r', this._dw_r)) } if (!this._w_z.name) { this._w_z.value.sub(optimizer.delta('w_z', this._dw_z)) } if (!this._w_h.name) { this._w_h.value.sub(optimizer.delta('w_h', this._dw_h)) } if (!this._b_r.name) { this._b_r.value.sub(optimizer.delta('b_r', this._db_r)) } if (!this._b_z.name) { this._b_z.value.sub(optimizer.delta('b_z', this._db_z)) } if (!this._b_h.name) { this._b_h.value.sub(optimizer.delta('b_h', this._db_h)) } if (!this._u_r.name) { this._u_r.value.sub(optimizer.delta('u_r', this._du_r)) } if (!this._u_z.name) { this._u_z.value.sub(optimizer.delta('u_z', this._du_z)) } if (!this._u_h.name) { this._u_h.value.sub(optimizer.delta('u_h', this._du_h)) } } toObject() { return { w_z: this._w_z?.toObject(), w_r: this._w_r?.toObject(), w_h: this._w_h?.toObject(), u_z: this._u_z?.toObject(), u_r: this._u_r?.toObject(), u_h: this._u_h?.toObject(), b_z: this._b_z?.toObject(), b_r: this._b_r?.toObject(), b_h: this._b_h?.toObject(), } } } class Variable { constructor(layer, value, sizes) { this._layer = layer this._sizes = sizes if (typeof value === 'string') { this._name = value } else if (value) { this._value = Matrix.fromArray(value) } } get name() { return this._name } get value() { return this._value } get sizes() { return this._value.sizes } get(...sizes) { if (sizes.length === 0) { sizes = this._sizes } if (this._name) { return (this._value = this._layer.graph.getNode(this._name).outputValue) } else if (this._value) { return this._value } return (this._value = Matrix.randn(...sizes)) } toObject() { if (this._name) { return this._name } return this._value?.toArray() } } GRULayer.registLayer('gru')