@tensorflow/tfjs-core
Version:
Hardware-accelerated JavaScript library for machine intelligence
81 lines • 5.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var DepthwiseConvPacked2DProgram = (function () {
function DepthwiseConvPacked2DProgram(convInfo) {
this.variableNames = ['x', 'W'];
this.usesPackedTextures = true;
this.outputShape = convInfo.outShape;
var xNumRows = convInfo.inHeight;
var xNumCols = convInfo.inWidth;
var padTop = convInfo.padInfo.top;
var padLeft = convInfo.padInfo.left;
var strideHeight = convInfo.strideHeight;
var strideWidth = convInfo.strideWidth;
var filterHeight = convInfo.filterHeight;
var filterWidth = convInfo.filterWidth;
var texelsAcross = Math.ceil((filterWidth + 1) / 2);
var mainLoop = "int xR; int xC;";
for (var r = 0; r < filterHeight; r++) {
for (var c = -padLeft; c < texelsAcross * 2; c++) {
mainLoop += "vec4 " + xTexelName(r, c) + " = vec4(0.);";
}
for (var c = 0; c < filterWidth; c++) {
mainLoop += "\n vec4 wR" + r + "C" + c + " = vec4(0.);\n vec4 xR" + r + "C" + c + " = vec4(0.);";
}
}
for (var r = 0; r < filterHeight; r++) {
for (var c = 0; c < texelsAcross; c++) {
var col = c * 2;
var left = c * 2 + padLeft;
mainLoop += "\n xR = xRCorner + " + r + ";\n xC = xCCorner + " + left + ";\n\n if(xR >= 0 && xR < " + xNumRows + " && xC >= 0 && xC < " + xNumCols + ") {\n " + xTexelName(r, left) + " = getX(batch, xR, xC, d1);\n }";
if (padLeft === 0) {
if (col < filterWidth && c === texelsAcross - 1) {
if (strideWidth > 1) {
mainLoop += "\n vec4 " + xTexelName(r, left + 2) + " = vec4(0.);\n\n if(xR >= 0 && xR < " + xNumRows + " && xC + 2 < " + xNumCols + ") {\n " + xTexelName(r, left + 2) + " = getX(batch, xR, xC + 2, d1);\n }";
}
mainLoop += "\n xR" + r + "C" + left + " = " + constructTexel(r, left, strideWidth, padLeft) + ";\n ";
}
}
else if (c === 0) {
mainLoop += "\n if(xR >= 0 && xR < " + xNumRows + " && xC - 2 >= 0) {\n " + xTexelName(r, left - 2) + " = getX(batch, xR, xC - 2, d1);\n }";
}
if (col > 0) {
mainLoop += "xR" + r + "C" + (left - 2) + " =\n " + constructTexel(r, left - 2, strideWidth, padLeft) + ";";
}
if (left - 1 >= 0 && left - 1 < filterWidth) {
mainLoop += "xR" + r + "C" + (left - 1) + " =\n " + constructTexel(r, left - 1, strideWidth, padLeft) + ";";
}
if (col < filterWidth) {
mainLoop += "\n vec4 wTexel" + r + "C" + col + " = getW(" + r + ", " + col + ", d1, q);\n wR" + r + "C" + col + " = vec4(wTexel" + r + "C" + col + ".xz, wTexel" + r + "C" + col + ".xz);\n ";
if (col + 1 < filterWidth) {
mainLoop += "\n vec4 wTexelR" + r + "C" + (col + 1) + " = getW(" + r + ", " + (col + 1) + ", d1, q);\n wR" + r + "C" + (col + 1) + " =\n vec4(wTexelR" + r + "C" + (col + 1) + ".xz, wTexelR" + r + "C" + (col + 1) + ".xz);";
}
}
}
}
for (var r = 0; r < filterHeight; r++) {
for (var c = 0; c < filterWidth; c++) {
mainLoop += "result += xR" + r + "C" + c + " * wR" + r + "C" + c + ";";
}
}
this.userCode = "\n const ivec2 strides = ivec2(" + strideHeight + ", " + strideWidth + ");\n const ivec2 pads = ivec2(" + padTop + ", " + padLeft + ");\n\n void main() {\n ivec4 coords = getOutputCoords();\n int batch = coords.x;\n ivec2 xRCCorner = coords.yz * strides - pads;\n int d2 = coords.w;\n int d1 = d2;\n int q = 0;\n int xRCorner = xRCCorner.x;\n int xCCorner = xRCCorner.y;\n\n vec4 result = vec4(0.);\n\n " + mainLoop + "\n\n setOutput(result);\n }\n ";
}
return DepthwiseConvPacked2DProgram;
}());
exports.DepthwiseConvPacked2DProgram = DepthwiseConvPacked2DProgram;
function xTexelName(r, c) {
return "xTexelR" + r + "C" + (c < 0 ? 'minus' + Math.abs(c).toString() : c);
}
function constructTexel(r, c, stride, padLeft) {
if (stride === 1) {
if (padLeft % 2 === c % 2) {
return xTexelName(r, c);
}
return "vec4(" + xTexelName(r, c - 1) + ".zw, " + xTexelName(r, c + 1) + ".xy)";
}
if (padLeft % 2 === c % 2) {
return "vec4(" + xTexelName(r, c) + ".xy, " + xTexelName(r, c + 2) + ".xy)";
}
return "vec4(" + xTexelName(r, c - 1) + ".zw, " + xTexelName(r, c + 1) + ".zw)";
}
//# sourceMappingURL=conv_packed_gpu_depthwise.js.map