weblas
Version:
GPU accelerated BLAS for node and the browser
30 lines • 75.9 kB
JavaScript
/**
* Modules in this bundle
* @license
*
* weblas:
* license: MIT (http://opensource.org/licenses/MIT)
* author: Waylon Flinn <waylonflinn@gmail.com>
* homepage: https://github.com/waylonflinn/weblas
* version: 0.9.0
*
* This header is generated by licensify (https://github.com/twada/licensify)
*/
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.weblas = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
function createModule(e){function t(t,r,a,l,u,c,n,o){if(null!=o&&o.length!=r)throw new Error("Only vector C with length matching rows in A is currently supported.");var s,p=u,d=o;s=i(a,r,c);var T=e.createDataTexture(t,a,p),f=e.createDataTexture(r,a,s),w=null;null!=d&&(w=e.createDataTexture(1,r,d));var g=e.createOutputTexture(t,r);return x.calculate(t,r,a,l,T,f,n,w,g),rawBuffer=e.readData(t,r),e.context.deleteTexture(T),e.context.deleteTexture(f),null!=w&&e.context.deleteTexture(w),e.context.deleteTexture(g),new Float32Array(rawBuffer)}function r(t,r,l,u){var c,n,o=l;a(u)?n=u:(n=new Float32Array(t),n.fill(u));var i=e.createDataTexture(1,t,o),x=e.createDataTexture(1,t,n),p=e.createOutputTexture(1,t);return s.calculate(t,r,i,x,p),c=e.readData(1,t),e.context.deleteTexture(i),e.context.deleteTexture(x),e.context.deleteTexture(p),new Float32Array(c)}function a(e){return"[object Float32Array]"===Object.prototype.toString.call(e)}function l(t,r,a,l,u){var c,n=u,o=e.createDataTexture(t,r,n),i=e.createOutputTexture(t,r);return p.calculate(t,r,a,l,o,i),c=e.readData(t,r),e.context.deleteTexture(o),e.context.deleteTexture(i),new Float32Array(c)}function u(t,r,a,l,u){var c,n=u,o=e.createDataTexture(t,r,n),i=e.createOutputTexture(t,r);return p.calculate(t,r,1/l,-1*a/l,o,i),c=e.readData(t,r),e.context.deleteTexture(o),e.context.deleteTexture(i),new Float32Array(c)}function c(t,r,a,l,u,c){var n=e.createDataTexture(t,r*a,c),o=Math.floor((r-l)/u)+1,i=Math.floor((t-l)/u)+1,x=e.createOutputTexture(i,o*a);return d.calculate(t,r,a,l,u,n,x),rawBuffer=e.readData(i,o*a),e.context.deleteTexture(n),e.context.deleteTexture(x),new Float32Array(rawBuffer)}function n(t,r,a,l,u){a=null!=a?a:Number.MIN_VALUE,l=null!=l?l:Number.MAX_VALUE;var c,n=u,o=e.createDataTexture(t,r,n),i=e.createOutputTexture(t,r);return T.calculate(t,r,a,l,o,i),c=e.readData(t,r),e.context.deleteTexture(o),e.context.deleteTexture(i),new Float32Array(c)}function o(e,t,r){var a,l,u=[];r?(u[1]=e.length,u[0]=e[0].length):(u[0]=e.length,u[1]=e[0].length),l=u[1],t=t||Float32Array,a=new t(u[0]*u[1]);for(var c=0;c<u[0];++c)for(var n=0;n<u[1];++n)r?a[c*l+n]=e[n][c]:a[c*l+n]=e[c][n];return a}function i(e,t,r){for(var a=new r.constructor(e*t),l=0;e>l;l++)for(var u=0;t>u;u++)a[u*e+l]=r[l*t+u];return a}var x=new SGEMMCalculator(e),s=new SAXPYCalculator(e),p=new SSCALCalculator(e),d=new SDWNSCalculator(e),T=new SCLMPCalculator(e);return{saxpy:r,sscal:l,sgemm:t,sstd:u,sdwns:c,sclmp:n,pipeline:pipeline,gpu:{gl:e,sgemm:pipeline.sgemmcalculator.calculate.bind(pipeline.sgemmcalculator),sscal:pipeline.sscalcalculator.calculate.bind(pipeline.sscalcalculator),sclmp:pipeline.sclmpcalculator.calculate.bind(pipeline.sclmpcalculator),sdwns:pipeline.sdwnscalculator.calculate.bind(pipeline.sdwnscalculator),encode:e.encode.bind(e)},util:{fromArray:o,transpose:i}}}var globals=require("./lib/globals"),pipeline=require("./lib/pipeline"),SGEMMCalculator=require("./lib/sgemmcalculator"),SAXPYCalculator=require("./lib/saxpycalculator"),SSCALCalculator=require("./lib/sscalcalculator"),SDWNSCalculator=require("./lib/sdwnscalculator"),SCLMPCalculator=require("./lib/sclmpcalculator");globals.gl?module.exports=createModule(globals.gl):module.exports=null;
},{"./lib/globals":2,"./lib/pipeline":3,"./lib/saxpycalculator":4,"./lib/sclmpcalculator":5,"./lib/sdwnscalculator":6,"./lib/sgemmcalculator":7,"./lib/sscalcalculator":9}],2:[function(require,module,exports){
var WebGL=require("./webgl"),gl;try{gl=new WebGL}catch(e){gl=null,console.log("No support for WebGL!")}module.exports={gl:gl};
},{"./webgl":11}],3:[function(require,module,exports){
function createModule(e){function l(e,l,r){var a=r.shape[0],t=r.shape[1],u=new Tensor([a,t],null);return s.calculate(a,t,e,l,r.texture,u.texture),u}function r(e,l,r,a,t){if(r.shape[1]!==l.shape[1])throw new Error("Second dimension must be of same size for input Tensors (second Tensor is transposed).");var u,n=l.shape[0],s=r.shape[0],c=l.shape[1];u=t?t.texture:null;var i=new Tensor([n,s],null);return o.calculate(n,s,c,e,l.texture,r.texture,a,u,i.texture),i}function a(e,l,r,a){if(a.shape[1]%e!==0)throw new Error("Second dimension of tensor must be a multiple of channels");var t=a.shape[0],u=a.shape[1]/e,o=Math.floor((t-l)/r)+1,n=Math.floor((u-l)/r)+1,s=new Tensor([o,n*e],null);return c.calculate(t,u,e,l,r,a.texture,s.texture),s}function t(e,l,r){e=null!=e?e:Number.MIN_VALUE,l=null!=l?l:Number.MAX_VALUE;var a=r.shape[0],t=r.shape[1],u=new Tensor([a,t],null);return i.calculate(a,t,e,l,r.texture,u.texture),u}function u(e,l,r,a,t){if(t.shape[1]%e!==0)throw new Error("Second dimension of tensor must be a multiple of channels");var u,o,n=t.shape[0],s=t.shape[1]/e;a?(u=Math.ceil((s+2*a-l)/r)+1,o=Math.ceil((n+2*a-l)/r)+1):(a=0,u=Math.ceil((s-l)/r)+1,o=Math.ceil((n-l)/r)+1);var c=l*l*e,i=o*u,p=c,m=new Tensor([i,p],null);return h.calculate(n,s,e,i,p,u,l,r,a,t.texture,m.texture),m}var o=new SGEMMCalculator(e,!1),n=new SAXPYCalculator(e,!1),s=new SSCALCalculator(e,!1),c=new SDWNSCalculator(e,!1),i=new SCLMPCalculator(e,!1),h=new SLOKNCalculator(e,!1);return{Tensor:Tensor,sscal:l,sgemm:r,sdwns:a,sclmp:t,slokn:u,sgemmcalculator:o,saxpycalculator:n,sscalcalculator:s,sdwnscalculator:c,sclmpcalculator:i,slokncalculator:h}}var globals=require("./globals"),SGEMMCalculator=require("./sgemmcalculator"),SAXPYCalculator=require("./saxpycalculator"),SSCALCalculator=require("./sscalcalculator"),SDWNSCalculator=require("./sdwnscalculator"),SCLMPCalculator=require("./sclmpcalculator"),SLOKNCalculator=require("./slokncalculator"),Tensor=require("./tensor");globals.gl?module.exports=createModule(globals.gl):module.exports=null;
},{"./globals":2,"./saxpycalculator":4,"./sclmpcalculator":5,"./sdwnscalculator":6,"./sgemmcalculator":7,"./slokncalculator":8,"./sscalcalculator":9,"./tensor":10}],4:[function(require,module,exports){
function SAXPYCalculator(t,n){this.webgl=t,this.standalone=n||!0;var e="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform sampler2D Y; // texture with data from padded transpose of B\nuniform int N;\nuniform float a; // coefficient to multiplication\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 y = texture2D( Y, vec2(col, row));\n vec4 sum_v = (a * x) + y;\n int channel = int(mod(col * float(N), 4.0 ));\n float sum = select_index_1604150559(sum_v, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1540259130(sum);\n}\n";this.standalone?this.program=this.webgl.createProgram(e):this.program=this.webgl.createProgram(p)}var WebGL=require("./webgl");module.exports=SAXPYCalculator,SAXPYCalculator.TEXTURE_UNIFORM_NAME_0="X",SAXPYCalculator.TEXTURE_UNIFORM_NAME_1="Y",SAXPYCalculator.LENGTH_UNIFORM_NAME="N",SAXPYCalculator.COEFFICIENT_UNIFORM_NAME="a",SAXPYCalculator.prototype.calculate=function(t,n,e,o,a){var l=this.webgl.context;this.webgl.selectProgram(this.program),this.bindInputTexture(e,l.TEXTURE0,SAXPYCalculator.TEXTURE_UNIFORM_NAME_0),this.bindInputTexture(o,l.TEXTURE1,SAXPYCalculator.TEXTURE_UNIFORM_NAME_1);var i=this.webgl.getPad(t);this.bindUniforms(t+i,n),this.webgl.bindOutputTexture(1,t+i,a),l.drawElements(l.TRIANGLES,6,l.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(l.TEXTURE0),this.webgl.unbindInputTexture(l.TEXTURE1)},SAXPYCalculator.prototype.bindInputTexture=function(t,n,e){var o=this.webgl.context,a=this.program;o.activeTexture(n),o.bindTexture(o.TEXTURE_2D,t);var l=o.getUniformLocation(a,e);o.uniform1i(l,n-o.TEXTURE0)},SAXPYCalculator.prototype.bindUniforms=function(t,n){var e=this.webgl.context,o=e.getUniformLocation(this.program,SAXPYCalculator.LENGTH_UNIFORM_NAME),a=e.getUniformLocation(this.program,SAXPYCalculator.COEFFICIENT_UNIFORM_NAME);e.uniform1i(o,t),e.uniform1f(a,n)};
},{"./webgl":11}],5:[function(require,module,exports){
function SCLMPCalculator(n,t){this.webgl=n,this.standalone=null!=t?t:!0;var e="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float a; // lower bound\nuniform float b; // upper bound\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1604150559(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1540259130(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // return 0.0 if in padded region of output texture\n if(col * float(N + pad) > float(N) ) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 val = clamp(x, a, b);\n\n // select and output channel (standalone version only)\n int channel = int(mod(col * float(N + pad), 4.0));\n float sum = select_index_1540259130(val, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1604150559(sum);\n}\n",o="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float a; // lower bound\nuniform float b; // upper bound\n\n// set pad values to 0.0, if in padded region of output texture\nvoid fix_pad_1540259130(inout vec4 v, int pad){\n v.a = 0.0;\n if(pad == 2){\n v.b = 0.0;\n } else if(pad == 3){\n v.b = 0.0;\n v.g = 0.0;\n }\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col_t, row_t));\n vec4 val_v = clamp(x, a, b);\n\n // is last element in pixel past row length?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // fix elements in padded region\n fix_pad_1540259130(val_v, pad);\n }\n\n gl_FragColor = val_v;\n}\n";this.standalone?this.program=this.webgl.createProgram(e):this.program=this.webgl.createProgram(o)}var WebGL=require("./webgl");module.exports=SCLMPCalculator,SCLMPCalculator.TEXTURE_UNIFORM_NAME_0="X",SCLMPCalculator.LENGTH_UNIFORM_NAME="N",SCLMPCalculator.LOWER_UNIFORM_NAME="a",SCLMPCalculator.UPPER_UNIFORM_NAME="b",SCLMPCalculator.prototype.calculate=function(n,t,e,o,a,l){e=null!=e?e:Number.MIN_VALUE,o=null!=o?o:Number.MAX_VALUE;var i=this.webgl.context;this.webgl.selectProgram(this.program),this.bindInputTexture(a,i.TEXTURE0,SCLMPCalculator.TEXTURE_UNIFORM_NAME_0);var r=this.webgl.getPad(t);this.bindUniforms(t,r,e,o),this.standalone?this.webgl.bindOutputTexture(n,t+r,l):this.webgl.bindOutputTexture(n,(t+r)/4,l),i.drawElements(i.TRIANGLES,6,i.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(i.TEXTURE0)},SCLMPCalculator.prototype.bindInputTexture=function(n,t,e){var o=this.webgl.context,a=this.program;o.activeTexture(t),o.bindTexture(o.TEXTURE_2D,n);var l=o.getUniformLocation(a,e);o.uniform1i(l,t-o.TEXTURE0)},SCLMPCalculator.prototype.bindUniforms=function(n,t,e,o){var a=this.webgl.context,l=a.getUniformLocation(this.program,SCLMPCalculator.LENGTH_UNIFORM_NAME),i=a.getUniformLocation(this.program,SCLMPCalculator.UPPER_UNIFORM_NAME),r=a.getUniformLocation(this.program,SCLMPCalculator.LOWER_UNIFORM_NAME),u=a.getUniformLocation(this.program,"pad");a.uniform1i(l,n),a.uniform1i(u,t),a.uniform1f(r,e),a.uniform1f(i,o)};
},{"./webgl":11}],6:[function(require,module,exports){
function DownsampleCalculator(n,o){this.webgl=n,this.standalone=null!=o?o:!0;var t="// TODO: unroll loop for stride == factor and small values (2, 3)\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int factor; // width of image patch\nuniform float stride; // width between image patches\nuniform float C; // number of channels\nuniform float M;\nuniform float N;\nuniform float N_out;\nuniform float M_out;\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate and translate to output pixel space.\n float row = floor(outTex.y * M_out); // row on output texture (matrix space)\n float col = floor(outTex.x * N_out); // column on output texture (matrix space)\n float vcol = floor(col / C); // virtual column on output texture (matrix space)\n float vchannel = floor(mod(col, C)); // virtual channel on output texture\n\n const float min = -1.0e+08;\n vec4 currentMax = vec4(min, min, min, min);\n\n float deltaY = 1.0/M;\n float deltaX = 1.0/N;\n float y = ((row * stride) + 0.5)*deltaY; // texture position of input row\n float x;\n float z = vchannel * deltaX;\n for (int i = 0; i < 100; i += 1) {\n if (i >= factor) {\n break;\n }\n x = ((vcol * stride * C) + 0.5) * deltaX; // texture position of input column\n\n for (int j = 0; j < 100; j += 1) {\n if (j >= factor) {\n break;\n }\n\n vec2 coords = vec2(x + z, y);\n vec4 x_v = texture2D(X, coords);\n currentMax = max(currentMax, x_v);\n\n x += (deltaX * C);\n }\n y += deltaY;\n }\n int chan = int(mod(outTex.x * N_out, 4.0 ));\n float val = select_index_1604150559(currentMax, int(chan));\n if (val == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n gl_FragColor = encode_float_1540259130(val);\n}\n";p="// TODO: unroll loop for stride == factor and small values (2, 3)\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int factor; // width of image patch\nuniform float stride; // width between image patches\nuniform float C; // number of channels\nuniform float M;\nuniform float N;\nuniform float N_out;\nuniform float M_out;\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate and translate to output pixel space.\n float row = floor(outTex.y * M_out); // row on output texture (pixel space)\n float col = floor(outTex.x * N_out); // column on output texture (matrix space)\n float vcol = floor(col / C); // virtual column on output texture (matrix space)\n float vchannel = floor(mod(col, C)); // virtual channel on output texture\n\n const float min = -1.0e+08;\n vec4 currentMax = vec4(min, min, min, min);\n\n float deltaY = 1.0/M;\n float deltaX = 1.0/N;\n float y = ((row * stride) + 0.5)*deltaY; // texture position of input row\n float x;\n float z = vchannel * deltaX;\n for (int i = 0; i < 100; i += 1) {\n if (i >= factor) {\n break;\n }\n x = ((vcol * stride * C) + 0.5) * deltaX; // texture position of input column\n\n for (int j = 0; j < 100; j += 1) {\n if (j >= factor) {\n break;\n }\n\n vec2 coords = vec2(x + z, y);\n vec4 x_v = texture2D(X, coords);\n currentMax = max(currentMax, x_v);\n\n x += (deltaX * C);\n }\n y += deltaY;\n }\n\n gl_FragColor = currentMax;\n}\n",this.standalone?this.program=this.webgl.createProgram(t):this.program=this.webgl.createProgram(p)}var WebGL=require("./webgl");module.exports=DownsampleCalculator,DownsampleCalculator.TEXTURE_UNIFORM_NAME_0="X",DownsampleCalculator.INPUT_ROW_COUNT_UNIFORM_NAME="M",DownsampleCalculator.INPUT_COLUMN_COUNT_UNIFORM_NAME="N",DownsampleCalculator.OUTPUT_ROW_COUNT_UNIFORM_NAME="M_out",DownsampleCalculator.OUTPUT_COLUMN_COUNT_UNIFORM_NAME="N_out",DownsampleCalculator.FACTOR_UNIFORM_NAME="factor",DownsampleCalculator.STRIDE_UNIFORM_NAME="stride",DownsampleCalculator.CHANNEL_COUNT_UNIFORM_NAME="C",DownsampleCalculator.prototype.calculate=function(n,o,t,a,e,l,r){if(t%WebGL.COMPONENTS_PER_TEXEL!=0)throw new Error("Channel count must be a multiple of "+WebGL.COMPONENTS_PER_TEXEL);var i=this.webgl.context,u=(Math.floor((o-a)/e)+1)*t,f=Math.floor((n-a)/e)+1;this.webgl.selectProgram(this.program),this.bindInputTexture(l,i.TEXTURE0,DownsampleCalculator.TEXTURE_UNIFORM_NAME_0),this.bindUniforms(n,o*t,f,u,a,e,t),this.standalone?this.webgl.bindOutputTexture(f,u,r):this.webgl.bindOutputTexture(f,u/WebGL.COMPONENTS_PER_TEXEL,r),i.drawElements(i.TRIANGLES,6,i.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(i.TEXTURE0)},DownsampleCalculator.prototype.bindInputTexture=function(n,o,t){var a=this.webgl.context,e=this.program;a.activeTexture(o),a.bindTexture(a.TEXTURE_2D,n);var l=a.getUniformLocation(e,t);a.uniform1i(l,o-a.TEXTURE0)},DownsampleCalculator.prototype.bindUniforms=function(n,o,t,a,e,l,r){var i=this.webgl.context,u=i.getUniformLocation(this.program,DownsampleCalculator.INPUT_ROW_COUNT_UNIFORM_NAME),f=i.getUniformLocation(this.program,DownsampleCalculator.INPUT_COLUMN_COUNT_UNIFORM_NAME),m=i.getUniformLocation(this.program,DownsampleCalculator.OUTPUT_ROW_COUNT_UNIFORM_NAME),c=i.getUniformLocation(this.program,DownsampleCalculator.OUTPUT_COLUMN_COUNT_UNIFORM_NAME),s=i.getUniformLocation(this.program,DownsampleCalculator.FACTOR_UNIFORM_NAME),p=i.getUniformLocation(this.program,DownsampleCalculator.STRIDE_UNIFORM_NAME),d=i.getUniformLocation(this.program,DownsampleCalculator.CHANNEL_COUNT_UNIFORM_NAME);i.uniform1f(u,n),i.uniform1f(f,o),i.uniform1f(m,t),i.uniform1f(c,a),i.uniform1i(s,e),i.uniform1f(p,l),i.uniform1f(d,r)};
},{"./webgl":11}],7:[function(require,module,exports){
function SGEMMCalculator(t,e){this.webgl=t,this.standalone=null!=e?e:!0;var n="// fragment shader that calculates the matrix product and renders each\n// element to the bytes representing a 32-bit IEEE754 floating point in\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1604150559(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n\n // sum row x col for the passed pixel\n float sum = alpha * dot_rowcol_1540259130(row_t, col_t * float(N + pad)/float(N), A, B_t, K);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1604150559(sum);\n}\n",o="// fragment shader that calculates the matrix product (with additive 'C' term)\n// and renders each element to the bytes representing a 32-bit IEEE754 floating\n// point in the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform sampler2D C; // texture with data from C\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\nuniform float beta; // coefficient to additive term\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1117569599(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n vec4 c_vec = texture2D(C, vec2(col_t, 0.5));\n\n // should be -0.5, but that subtly breaks at zero\n float col = col_t * float(N + pad); // index of first element in pixel (matrix space)\n int channel = int(mod(col, 4.0 ));\n float c = select_index_1604150559(c_vec, channel);\n\n // sum row x col for the passed pixel\n float sum = alpha * dot_rowcol_1540259130(row_t, col_t * float(N + pad)/float(N), A, B_t, K);\n sum += beta * c;\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1117569599(sum);\n}\n",a="// fragment shader that calculates the matrix product and writes each\n// element to a pixel component in a floating point texture.\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n\n vec4 sum_v = vec4(0.0, 0.0, 0.0, 0.0);\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n sum_v.r = alpha * dot_rowcol_1540259130(row_t, (col + 0.5)/float(N), A, B_t, K);\n // is last element in pixel past row length?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // compute elements in padded region\n if(pad < 3){\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n }\n if(pad < 2){\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n }\n } else {\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n sum_v.a = alpha * dot_rowcol_1540259130(row_t, (col + 3.5)/float(N), A, B_t, K);\n }\n\n gl_FragColor = sum_v;\n}\n",r="// fragment shader that calculates the matrix product and writes each\n// element to a pixel component in a floating point texture.\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform sampler2D C; // texture with data from C\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\nuniform float beta; // coefficient to addition\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n vec4 c_v = texture2D(C, vec2(col_t, 0.5));\n\n vec4 sum_v = vec4(0.0, 0.0, 0.0, 0.0);\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n sum_v.r = alpha * dot_rowcol_1540259130(row_t, (col + 0.5)/float(N), A, B_t, K);\n // in the padding region?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // pad\n if(pad < 3){\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n }\n if(pad < 2){\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n }\n } else {\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n sum_v.a = alpha * dot_rowcol_1540259130(row_t, (col + 3.5)/float(N), A, B_t, K);\n }\n\n gl_FragColor = sum_v + beta*c_v;\n}\n";this.standalone?(this.program_=this.webgl.createProgram(n),this.program_c=this.webgl.createProgram(o)):(this.program_=this.webgl.createProgram(a),this.program_c=this.webgl.createProgram(r))}var WebGL=require("./webgl");module.exports=SGEMMCalculator,SGEMMCalculator.TEXTURE_UNIFORM_NAME_0="A",SGEMMCalculator.TEXTURE_UNIFORM_NAME_1="B_t",SGEMMCalculator.TEXTURE_UNIFORM_NAME_2="C",SGEMMCalculator.SHARED_LENGTH_UNIFORM_NAME="K",SGEMMCalculator.COLUMN_COUNT_UNIFORM_NAME="N",SGEMMCalculator.PAD_UNIFORM_NAME="pad",SGEMMCalculator.ALPHA_UNIFORM_NAME="alpha",SGEMMCalculator.BETA_UNIFORM_NAME="beta",SGEMMCalculator.prototype.calculate=function(t,e,n,o,a,r,i,l,s){var u=this.webgl.context;null!=l?this.program=this.program_c:(i=null,this.program=this.program_),this.webgl.selectProgram(this.program),this.bindInputTexture(a,u.TEXTURE0,SGEMMCalculator.TEXTURE_UNIFORM_NAME_0),this.bindInputTexture(r,u.TEXTURE1,SGEMMCalculator.TEXTURE_UNIFORM_NAME_1),null!=l&&this.bindInputTexture(l,u.TEXTURE2,SGEMMCalculator.TEXTURE_UNIFORM_NAME_2);var m=this.webgl.getPad(n),d=this.webgl.getPad(e);this.bindUniforms(e,n+m,d,o,i),this.standalone?this.webgl.bindOutputTexture(t,e+d,s):this.webgl.bindOutputTexture(t,(e+d)/4,s),u.drawElements(u.TRIANGLES,6,u.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(u.TEXTURE0),this.webgl.unbindInputTexture(u.TEXTURE1),this.webgl.unbindInputTexture(u.TEXTURE2)},SGEMMCalculator.prototype.bindInputTexture=function(t,e,n){var o=this.webgl.context,a=this.program;o.activeTexture(e),o.bindTexture(o.TEXTURE_2D,t);var r=o.getUniformLocation(a,n);o.uniform1i(r,e-o.TEXTURE0)},SGEMMCalculator.prototype.bindUniforms=function(t,e,n,o,a){var r=this.webgl.context,i=r.getUniformLocation(this.program,SGEMMCalculator.SHARED_LENGTH_UNIFORM_NAME),l=r.getUniformLocation(this.program,SGEMMCalculator.ALPHA_UNIFORM_NAME),s=r.getUniformLocation(this.program,SGEMMCalculator.BETA_UNIFORM_NAME),u=r.getUniformLocation(this.program,SGEMMCalculator.COLUMN_COUNT_UNIFORM_NAME),m=m=r.getUniformLocation(this.program,SGEMMCalculator.PAD_UNIFORM_NAME);r.uniform1f(s,a),r.uniform1i(u,t),r.uniform1i(m,n),r.uniform1i(i,e),r.uniform1f(l,o)};
},{"./webgl":11}],8:[function(require,module,exports){
function SLOKNCalculator(n,o){this.webgl=n,this.standalone=null!=o?o:!0;var e="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform float factor; // width of image patch\nuniform float stride; // width between image patches\nuniform float margin;\nuniform float N_p; // patches across\nuniform float M;\nuniform float N;\nuniform float pad;\nuniform float M_in;\nuniform float N_in;\nuniform float C; // number of channels in input\nuniform float pad_in;\n\n// select an element from a vector based on index\nfloat select_index_1540259130(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\n// translate a linear index into x, y coordinates for a matrix\nvec2 linear_index_coords_1604150559(float linear_index, float row_length){\n vec2 coords;\n\n coords.x = floor(mod(linear_index + 0.5, row_length)); // column\n coords.y = floor((linear_index + 0.5) / row_length); // row\n\n return coords;\n}\n\n// set pad values to 0.0, if in padded region of output texture\nvoid fix_pad_1117569599(inout vec4 v, int pad){\n v.a = 0.0;\n if(pad == 2){\n v.b = 0.0;\n } else if(pad == 3){\n v.b = 0.0;\n v.g = 0.0;\n }\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate\n float row_t = outTex.y;\n float col_t = outTex.x;\n\n // row corresponds to patch\n float row = floor(row_t * M) + 0.5;\n // column corresponds to placement in patch\n float col_0 = floor(col_t * (N + pad) - 1.5); // index of first element in output pixel (matrix space)\n\n // N_p = patches across\n float col_patch = floor(mod(row, N_p)); // column index in grid of patches\n float row_patch = floor(row / N_p); // row index in grid of patches\n float col_in_0 = (col_patch * stride - margin) * C; // input column index of left element in patch\n float row_in_0 = row_patch * stride - margin; // input row index of top element in patch\n\n vec4 pixel_in;\n vec4 result = vec4(0.0, 0.0, 0.0, 0.0);\n vec2 coords = linear_index_coords_1604150559(col_0, factor * C); // coords inside patch\n vec2 ncoords;\n int channel_in = int(mod(col_in_0 + coords.x, 4.0));\n vec2 scale_in = vec2(1.0/(N_in + pad_in), 1.0/M_in); // scale from matrix to input texture coords\n vec2 offset_in = vec2(col_in_0 + 2.0 - float(channel_in), row_in_0 + 0.5); // offset into patch (and pixel)\n\n const vec2 pixel_scale = vec2(1.0/4.0, 1.0); // scale from matrix to pixel coords\n\n pixel_in = texture2D(X, (coords + offset_in) * scale_in);\n\n // go through channels for current output pixel\n for(int channel = 0; channel < 4; channel++){\n\n // are we on a new input pixel?\n ncoords = linear_index_coords_1604150559(col_0 + float(channel), factor * C);\n\n // are we in the margin or outside the input texture?\n if((col_in_0 + ncoords.x + 0.5 < 0.0) || (row_in_0 + ncoords.y + 0.5 < 0.0) ||\n (col_in_0 + ncoords.x + 0.5) > (N_in) || row_in_0 + ncoords.y + 0.5 > M_in){\n // yes, create a virtual pixel\n pixel_in = vec4(0.0, 0.0, 0.0, 0.0);\n } else if(floor(ncoords * pixel_scale) != floor(coords * pixel_scale)){\n // no, get the get the next real pixel\n coords = ncoords;\n offset_in.x += float(channel_in);\n channel_in = 0;\n pixel_in = texture2D(X, (coords + offset_in) * scale_in);\n }\n\n if(channel == 0){\n result.r = select_index_1540259130(pixel_in, channel_in);\n } else if(channel == 1){\n result.g = select_index_1540259130(pixel_in, channel_in);\n } else if(channel == 2){\n result.b = select_index_1540259130(pixel_in, channel_in);\n } else {\n result.a = select_index_1540259130(pixel_in, channel_in);\n }\n\n channel_in++;\n offset_in.x -= 1.0;\n }\n\n // fix padded region\n if(pad > 0.0 && col_0 + 4.0 > N ) {\n fix_pad_1117569599(result, int(pad));\n }\n\n //gl_FragColor = vec4(row_in_0, col_in_0, channel_in, N_p);\n gl_FragColor = result;\n}\n";this.standalone?this.program=this.webgl.createProgram(s):this.program=this.webgl.createProgram(e)}var WebGL=require("./webgl");module.exports=SLOKNCalculator,SLOKNCalculator.TEXTURE_UNIFORM_NAME_0="X",SLOKNCalculator.STRIDE_UNIFORM_NAME="stride",SLOKNCalculator.KERNEL_WIDTH_UNIFORM_NAME="factor",SLOKNCalculator.prototype.calculate=function(n,o,e,i,t,r,a,l,c,f,s){var _=this.webgl.context,d=this.webgl.getPad(o*e),u=this.webgl.getPad(t);this.webgl.selectProgram(this.program),this.bindInputTexture(f,_.TEXTURE0,SLOKNCalculator.TEXTURE_UNIFORM_NAME_0),this.bindUniforms(i,t,u,n,o*e,e,d,r,a,l,c),this.standalone?this.webgl.bindOutputTexture(i,t+u,s):this.webgl.bindOutputTexture(i,(t+u)/4,s),_.drawElements(_.TRIANGLES,6,_.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(_.TEXTURE0)},SLOKNCalculator.prototype.bindInputTexture=function(n,o,e){var i=this.webgl.context,t=this.program;i.activeTexture(o),i.bindTexture(i.TEXTURE_2D,n);var r=i.getUniformLocation(t,e);i.uniform1i(r,o-i.TEXTURE0)},SLOKNCalculator.prototype.bindUniforms=function(n,o,e,i,t,r,a,l,c,f,s){var _=this.webgl.context,d=_.getUniformLocation(this.program,"M"),u=_.getUniformLocation(this.program,"N"),p=_.getUniformLocation(this.program,"C"),m=_.getUniformLocation(this.program,"M_in"),h=_.getUniformLocation(this.program,"N_in"),x=_.getUniformLocation(this.program,SLOKNCalculator.STRIDE_UNIFORM_NAME),g=_.getUniformLocation(this.program,SLOKNCalculator.KERNEL_WIDTH_UNIFORM_NAME),v=_.getUniformLocation(this.program,"pad"),w=_.getUniformLocation(this.program,"pad_in"),N=_.getUniformLocation(this.program,"N_p");margin_gl=_.getUniformLocation(this.program,"margin"),_.uniform1f(d,n),_.uniform1f(u,o),_.uniform1f(v,e),_.uniform1f(m,i),_.uniform1f(h,t),_.uniform1f(p,r),_.uniform1f(w,a),_.uniform1f(N,l),_.uniform1f(g,c),_.uniform1f(x,f),_.uniform1f(margin_gl,s)};
},{"./webgl":11}],9:[function(require,module,exports){
function SSCALCalculator(n,t){this.webgl=n,this.standalone=null!=t?t:!0;var e="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded X\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float b; // additive term\nuniform float a; // multiplicative term\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 sum_v = (a * x) + b;\n int channel = int(mod(col * float(N + pad), 4.0 ));\n float sum = select_index_1604150559(sum_v, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1540259130(sum);\n}\n",o="precision highp float;\n#define GLSLIFY 1\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded X\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float b; // additive term\nuniform float a; // multiplicative term\n\n// set pad values to 0.0, if in padded region of output texture\nvoid fix_pad_1540259130(inout vec4 v, int pad){\n v.a = 0.0;\n if(pad == 2){\n v.b = 0.0;\n } else if(pad == 3){\n v.b = 0.0;\n v.g = 0.0;\n }\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col_t, row_t));\n vec4 sum_v = (a * x) + b;\n\n // fix padded region\n if(pad > 0 && col + 4.0 > float(N) ) {\n fix_pad_1540259130(sum_v, pad);\n }\n\n gl_FragColor = sum_v;\n}\n