UNPKG

weblas

Version:

GPU accelerated BLAS for node and the browser

162 lines (119 loc) 4.16 kB
var globals = require('./globals'), SGEMMCalculator = require("./sgemmcalculator"), SAXPYCalculator = require("./saxpycalculator"), SSCALCalculator = require("./sscalcalculator"), SDWNSCalculator = require("./sdwnscalculator"), SCLMPCalculator = require("./sclmpcalculator"), SLOKNCalculator = require("./slokncalculator"), Tensor = require('./tensor'); // do we have a WebGL context? if(globals.gl){ // yes, load the library module.exports = createModule(globals.gl); } else { // no, abort and export null module.exports = null; } function createModule(gl){ var sgemmcalculator = new SGEMMCalculator(gl, false), saxpycalculator = new SAXPYCalculator(gl, false), sscalcalculator = new SSCALCalculator(gl, false), sdwnscalculator = new SDWNSCalculator(gl, false), sclmpcalculator = new SCLMPCalculator(gl, false), slokncalculator = new SLOKNCalculator(gl, false); return { "Tensor" : Tensor, "sscal" : sscal, "sgemm" : sgemm, "sdwns" : sdwns, "sclmp" : sclmp, "slokn" : slokn, "sgemmcalculator" : sgemmcalculator, "saxpycalculator" : saxpycalculator, "sscalcalculator" : sscalcalculator, "sdwnscalculator" : sdwnscalculator, "sclmpcalculator" : sclmpcalculator, "slokncalculator" : slokncalculator } /* scale (and optionally offset) a Tensor, elementwise */ function sscal(a, b, t0){ var M = t0.shape[0], N = t0.shape[1]; // create an empty output Tensor var tOut = new Tensor([M, N], null); sscalcalculator.calculate(M, N, a, b, t0.texture, tOut.texture); return tOut; } /* matrix multiply on t0 and t1 with additive t2. t1 must be transposed */ function sgemm(alpha, t0, t1, beta, t2){ if(t1.shape[1] !== t0.shape[1]) throw new Error("Second dimension must be of same size for input Tensors (second Tensor is transposed)."); var M = t0.shape[0], N = t1.shape[0], K = t0.shape[1]; var texture2; if(t2){ texture2 = t2.texture; } else { texture2 = null; } // create an empty output Tensor var tOut = new Tensor([M, N], null); sgemmcalculator.calculate(M, N, K, alpha, t0.texture, t1.texture, beta, texture2, tOut.texture); return tOut; } function sdwns(channels, factor, stride, t0){ if(t0.shape[1] % channels !== 0) throw new Error("Second dimension of tensor must be a multiple of channels"); var M = t0.shape[0], N = t0.shape[1] / channels; var M_out = Math.floor((M - factor) / stride) + 1; var N_out = Math.floor((N - factor) / stride) + 1; // create an empty output Tensor var tOut = new Tensor([M_out, N_out * channels], null); sdwnscalculator.calculate(M, N, channels, factor, stride, t0.texture, tOut.texture); return tOut; } function sclmp(a, b, t0){ a = (a != null) ? a : Number.MIN_VALUE; b = (b != null) ? b : Number.MAX_VALUE; var M = t0.shape[0], N = t0.shape[1]; // create an empty output Tensor var tOut = new Tensor([M, N], null); sclmpcalculator.calculate(M, N, a, b, t0.texture, tOut.texture); return tOut; } /* Linearize onto Kernels, Transform input into one row per patch, for use in convolution. channels - number of channels in the input factor - width (and height) of kernels (and patches) stride - number of elements between patches t0 - the input Tensor */ function slokn(channels, factor, stride, margin, t0){ if(t0.shape[1] % channels !== 0) throw new Error("Second dimension of tensor must be a multiple of channels"); var M = t0.shape[0], N = t0.shape[1] / channels; var N_p, M_p; // number of patches (columns and rows) if(!margin){ margin = 0; N_p = Math.ceil((N - factor) / stride) + 1; M_p = Math.ceil((M - factor) / stride) + 1; } else { N_p = Math.ceil((N + (2 * margin) - factor) / stride) + 1; M_p = Math.ceil((M + (2 * margin) - factor) / stride) + 1; } var P_p = factor * factor * channels; // elements per kernel var M_out = (M_p * N_p), N_out = P_p; // create an empty output Tensor var tOut = new Tensor([M_out, N_out], null); slokncalculator.calculate(M, N, channels, M_out, N_out, N_p, factor, stride, margin, t0.texture, tOut.texture); return tOut; } }