UNPKG

@tensorflow/tfjs-core

Version:

Hardware-accelerated JavaScript library for machine intelligence

81 lines 5.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var DepthwiseConvPacked2DProgram = (function () { function DepthwiseConvPacked2DProgram(convInfo) { this.variableNames = ['x', 'W']; this.usesPackedTextures = true; this.outputShape = convInfo.outShape; var xNumRows = convInfo.inHeight; var xNumCols = convInfo.inWidth; var padTop = convInfo.padInfo.top; var padLeft = convInfo.padInfo.left; var strideHeight = convInfo.strideHeight; var strideWidth = convInfo.strideWidth; var filterHeight = convInfo.filterHeight; var filterWidth = convInfo.filterWidth; var texelsAcross = Math.ceil((filterWidth + 1) / 2); var mainLoop = "int xR; int xC;"; for (var r = 0; r < filterHeight; r++) { for (var c = -padLeft; c < texelsAcross * 2; c++) { mainLoop += "vec4 " + xTexelName(r, c) + " = vec4(0.);"; } for (var c = 0; c < filterWidth; c++) { mainLoop += "\n vec4 wR" + r + "C" + c + " = vec4(0.);\n vec4 xR" + r + "C" + c + " = vec4(0.);"; } } for (var r = 0; r < filterHeight; r++) { for (var c = 0; c < texelsAcross; c++) { var col = c * 2; var left = c * 2 + padLeft; mainLoop += "\n xR = xRCorner + " + r + ";\n xC = xCCorner + " + left + ";\n\n if(xR >= 0 && xR < " + xNumRows + " && xC >= 0 && xC < " + xNumCols + ") {\n " + xTexelName(r, left) + " = getX(batch, xR, xC, d1);\n }"; if (padLeft === 0) { if (col < filterWidth && c === texelsAcross - 1) { if (strideWidth > 1) { mainLoop += "\n vec4 " + xTexelName(r, left + 2) + " = vec4(0.);\n\n if(xR >= 0 && xR < " + xNumRows + " && xC + 2 < " + xNumCols + ") {\n " + xTexelName(r, left + 2) + " = getX(batch, xR, xC + 2, d1);\n }"; } mainLoop += "\n xR" + r + "C" + left + " = " + constructTexel(r, left, strideWidth, padLeft) + ";\n "; } } else if (c === 0) { mainLoop += "\n if(xR >= 0 && xR < " + xNumRows + " && xC - 2 >= 0) {\n " + xTexelName(r, left - 2) + " = getX(batch, xR, xC - 2, d1);\n }"; } if (col > 0) { mainLoop += "xR" + r + "C" + (left - 2) + " =\n " + constructTexel(r, left - 2, strideWidth, padLeft) + ";"; } if (left - 1 >= 0 && left - 1 < filterWidth) { mainLoop += "xR" + r + "C" + (left - 1) + " =\n " + constructTexel(r, left - 1, strideWidth, padLeft) + ";"; } if (col < filterWidth) { mainLoop += "\n vec4 wTexel" + r + "C" + col + " = getW(" + r + ", " + col + ", d1, q);\n wR" + r + "C" + col + " = vec4(wTexel" + r + "C" + col + ".xz, wTexel" + r + "C" + col + ".xz);\n "; if (col + 1 < filterWidth) { mainLoop += "\n vec4 wTexelR" + r + "C" + (col + 1) + " = getW(" + r + ", " + (col + 1) + ", d1, q);\n wR" + r + "C" + (col + 1) + " =\n vec4(wTexelR" + r + "C" + (col + 1) + ".xz, wTexelR" + r + "C" + (col + 1) + ".xz);"; } } } } for (var r = 0; r < filterHeight; r++) { for (var c = 0; c < filterWidth; c++) { mainLoop += "result += xR" + r + "C" + c + " * wR" + r + "C" + c + ";"; } } this.userCode = "\n const ivec2 strides = ivec2(" + strideHeight + ", " + strideWidth + ");\n const ivec2 pads = ivec2(" + padTop + ", " + padLeft + ");\n\n void main() {\n ivec4 coords = getOutputCoords();\n int batch = coords.x;\n ivec2 xRCCorner = coords.yz * strides - pads;\n int d2 = coords.w;\n int d1 = d2;\n int q = 0;\n int xRCorner = xRCCorner.x;\n int xCCorner = xRCCorner.y;\n\n vec4 result = vec4(0.);\n\n " + mainLoop + "\n\n setOutput(result);\n }\n "; } return DepthwiseConvPacked2DProgram; }()); exports.DepthwiseConvPacked2DProgram = DepthwiseConvPacked2DProgram; function xTexelName(r, c) { return "xTexelR" + r + "C" + (c < 0 ? 'minus' + Math.abs(c).toString() : c); } function constructTexel(r, c, stride, padLeft) { if (stride === 1) { if (padLeft % 2 === c % 2) { return xTexelName(r, c); } return "vec4(" + xTexelName(r, c - 1) + ".zw, " + xTexelName(r, c + 1) + ".xy)"; } if (padLeft % 2 === c % 2) { return "vec4(" + xTexelName(r, c) + ".xy, " + xTexelName(r, c + 2) + ".xy)"; } return "vec4(" + xTexelName(r, c - 1) + ".zw, " + xTexelName(r, c + 1) + ".zw)"; } //# sourceMappingURL=conv_packed_gpu_depthwise.js.map