onnxruntime-web
Version:
A Javascript library for running ONNX models on browsers
253 lines (240 loc) • 11.3 kB
JavaScript
'use strict';
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
Object.defineProperty(exports, '__esModule', { value: true });
exports.parseResizeAttributesV11 = exports.parseResizeAttributesV10 = exports.resize = void 0;
const glsl_source_1 = require('../glsl-source');
const types_1 = require('../types');
const utils_1 = require('../utils');
const packing_utils_1 = require('./packing-utils');
const upsample_1 = require('./upsample');
const resizeProgramMetadata = {
name: 'Resize',
inputNames: ['A'],
inputTypes: [types_1.TextureType.packed],
};
const resize = (inferenceHandler, inputs, attributes) => {
(0, upsample_1.validateInputs)(inputs, attributes);
const output = inferenceHandler.run(
{
...resizeProgramMetadata,
cacheHint: attributes.cacheKey,
get: () => createPackedResizeProgramInfo(inferenceHandler, inputs, attributes),
},
inputs,
);
return [output];
};
exports.resize = resize;
const parseResizeAttributesV10 = (node) => (0, upsample_1.parseUpsampleAttributes)(node, 10);
exports.parseResizeAttributesV10 = parseResizeAttributesV10;
const parseResizeAttributesV11 = (node) => (0, upsample_1.parseUpsampleAttributes)(node, 11);
exports.parseResizeAttributesV11 = parseResizeAttributesV11;
const createPackedResizeProgramInfo = (inferenceHandler, inputs, attributes) => {
const glsl = (0, glsl_source_1.getGlsl)(inferenceHandler.session.backend.glContext.version);
const [scales, outputShape] = prepareInputs(inputs, attributes);
const isSame = scales.every((s) => s === 1) && attributes.coordinateTransformMode !== 'tf_crop_and_resize';
if (isSame) {
return {
...resizeProgramMetadata,
output: { dims: outputShape, type: inputs[0].type, textureType: types_1.TextureType.packed },
hasMain: true,
shaderSource: `void main() {
vec4 v = ${glsl.texture2D}(X, TexCoords);
${glsl.output} = v;
}`,
};
}
const dim = outputShape.length;
if (dim < 2) {
throw new Error(`output dimension should be at least 2, but got ${dim}`);
}
const outputHeight = outputShape[dim - 2];
const outputWidth = outputShape[dim - 1];
const inputShape = inputs[0].dims;
if (dim !== inputShape.length) {
throw new Error(`output dimension should match input ${inputShape.length}, but got ${dim}`);
}
const inputHeight = inputShape[dim - 2];
const inputWidth = inputShape[dim - 1];
const scalesHeight = scales[dim - 2];
const scalesWidth = scales[dim - 1];
let getSourceFracIndex = '';
if (attributes.mode !== 'linear') {
// TODO: support other modes
throw new Error(`resize (packed) does not support mode: '${attributes.mode}'`);
}
switch (attributes.coordinateTransformMode) {
case 'asymmetric':
getSourceFracIndex = `
vec4 getSourceFracIndex(ivec4 coords) {
return vec4(coords) / scaleWHWH;
}
`;
break;
case 'half_pixel':
getSourceFracIndex = `
vec4 getSourceFracIndex(ivec4 coords) {
return (vec4(coords) + 0.5) / scaleWHWH - 0.5;
}
`;
break;
case 'pytorch_half_pixel':
getSourceFracIndex = `
vec4 getSourceFracIndex(ivec4 coords) {
vec4 fcoords = vec4(coords);
return vec4(
${outputWidth}.0 > 1.0 ? (fcoords.x + 0.5) / scaleWHWH.x - 0.5 : 0.0,
${outputHeight}.0 > 1.0 ? (fcoords.y + 0.5) / scaleWHWH.y - 0.5 : 0.0,
${outputWidth}.0 > 1.0 ? (fcoords.z + 0.5) / scaleWHWH.z - 0.5 : 0.0,
${outputHeight}.0 > 1.0 ? (fcoords.w + 0.5) / scaleWHWH.w - 0.5 : 0.0
);
}
`;
break;
case 'align_corners':
getSourceFracIndex = `
vec4 getSourceFracIndex(ivec4 coords) {
vec4 resized = vec4(${outputWidth}.0 - 1.0, ${outputHeight}.0 - 1.0, ${outputWidth}.0 - 1.0,
${outputHeight}.0 - 1.0);
vec4 original = vec4(${inputWidth}.0 - 1.0, ${inputHeight}.0 - 1.0, ${inputWidth}.0 - 1.0,
${inputHeight}.0 - 1.0);
vec4 new_scale = original / resized;
return vec4(coords) * new_scale;
}
`;
break;
default:
// TODO:supporting other coordinateTransformModes
throw new Error(`resize (packed) does not support coordinateTransformMode: \
'${attributes.coordinateTransformMode}'`);
}
const coordsDataType = (0, utils_1.getCoordsDataType)(dim);
const unpackChannel = (0, packing_utils_1.unpackFromChannel)();
const shaderSource = `
const vec2 inputWH = vec2(${inputHeight}.0, ${inputWidth}.0);
const vec4 scaleWHWH = vec4(float(${scalesHeight}), float(${scalesWidth}), float(${scalesHeight}), float(${scalesWidth}));
${unpackChannel}
${getSourceFracIndex}
float getAValue(int x10, int r, int c, int d) {
return getChannel(getA(x10, r, c, d), vec2(c, d));
}
void main() {
${coordsDataType} rc = getOutputCoords();
int batch = rc[0];
int depth = rc[1];
// retrieve the 4 coordinates that is used in the 4 packed output values.
ivec4 coords = ivec4(rc.wz, rc.w + 1, rc.z + 1);
// calculate the source index in fraction
vec4 sourceFrac = getSourceFracIndex(coords);
// get the lower and upper bound of the 4 values that will be packed into one texel.
ivec4 x00 = ivec4(max(sourceFrac.xy, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.xy)));
ivec4 x01 = ivec4(max(sourceFrac.xw, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.xw)));
ivec4 x10 = ivec4(max(sourceFrac.zy, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.zy)));
ivec4 x11 = ivec4(max(sourceFrac.zw, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.zw)));
bool hasNextRow = rc.w < ${outputHeight - 1};
bool hasNextCol = rc.z < ${outputWidth - 1};
// pack x00, x01, x10, x11's top-left corner into one vec4 structure
vec4 topLeft = vec4(
getAValue(batch, depth, x00.x, x00.y),
hasNextCol ? getAValue(batch, depth, x01.x, x01.y) : 0.0,
hasNextRow ? getAValue(batch, depth, x10.x, x10.y) : 0.0,
(hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.x, x11.y) : 0.0);
// pack x00, x01, x10, x11's top-right corner into one vec4 structure
vec4 topRight = vec4(
getAValue(batch, depth, x00.x, x00.w),
hasNextCol ? getAValue(batch, depth, x01.x, x01.w) : 0.0,
hasNextRow ? getAValue(batch, depth, x10.x, x10.w) : 0.0,
(hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.x, x11.w) : 0.0);
// pack x00, x01, x10, x11's bottom-left corner into one vec4 structure
vec4 bottomLeft = vec4(
getAValue(batch, depth, x00.z, x00.y),
hasNextCol ? getAValue(batch, depth, x01.z, x01.y) : 0.0,
hasNextRow ? getAValue(batch, depth, x10.z, x10.y) : 0.0,
(hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.z, x11.y) : 0.0);
// pack x00, x01, x10, x11's bottom-right corner into one vec4 structure
vec4 bottomRight = vec4(
getAValue(batch, depth, x00.z, x00.w),
hasNextCol ? getAValue(batch, depth, x01.z, x01.w) : 0.0,
hasNextRow ? getAValue(batch, depth, x10.z, x10.w) : 0.0,
(hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.z, x11.w) : 0.0);
// calculate the interpolation fraction on u and v direction
vec4 frac = vec4(sourceFrac) - floor(sourceFrac);
vec4 clampFrac = clamp(frac, vec4(0.0), vec4(1.0));
vec4 top = mix(topLeft, topRight, clampFrac.ywyw);
vec4 bottom = mix(bottomLeft, bottomRight, clampFrac.ywyw);
vec4 newValue = mix(top, bottom, clampFrac.xxzz);
${glsl.output} = vec4(newValue);
}
`;
return {
...resizeProgramMetadata,
output: { dims: outputShape, type: inputs[0].type, textureType: types_1.TextureType.packed },
hasMain: true,
shaderSource,
};
};
const prepareInputs = (inputs, attributes) => {
const x = inputs[0];
const xDims = x.dims;
let scales = attributes.scales;
let outputSizes;
if (scales.length === 0) {
const scalesTensor = inputs[attributes.scalesInputIdx];
if (scalesTensor && scalesTensor.size !== 0) {
if (inputs[attributes.sizesInputIdx]) {
throw new Error('Only one of scales or sizes must be provided as input.');
}
scales = parseScalesData(scalesTensor, attributes.mode, attributes.isResize);
} else {
const sizesTensor = inputs[attributes.sizesInputIdx];
if (!sizesTensor || sizesTensor.size === 0) {
throw new Error('Either scales or sizes MUST be provided as input.');
}
outputSizes = Array.from(sizesTensor.integerData);
scales = parseScalesDataFromOutputSize(outputSizes, xDims, attributes.mode, attributes.isResize);
}
} else {
if (inputs[attributes.sizesInputIdx]) {
throw new Error('Only one of scales or sizes must be provided as input.');
}
}
const yDims = outputSizes || xDims.map((dim, i) => Math.floor(dim * scales[i]));
return [scales, yDims];
};
const parseScalesData = (scale, mode, isResize) => {
const scales = Array.from(scale.floatData);
(0, upsample_1.scalesValidation)(scales, mode, isResize);
return scales;
};
const parseScalesDataFromOutputSize = (yDims, xDims, mode, isResize) => {
const length = xDims.length;
const scales = new Array(length);
for (let i = 0, end = length; i < end; i++) {
if (xDims[i] === 0) {
if (yDims[i] !== 0) {
throw new Error('Input dim is zero but required output dim is non-zero.');
}
scales[i] = 1;
} else {
scales[i] = yDims[i] / xDims[i];
}
}
(0, upsample_1.scalesValidation)(scales, mode, isResize);
return scales;
};
// roi data is not used yet. but leave here for future usage.
// const getRoi = (inputs: Tensor[], attributes: UpsampleAttributes) : number[] => {
// let roi: number[] = [];
// if (attributes.needRoiInput) {
// if (attributes.roiInputIdx <= 0) {
// throw new Error('Invalid roi input index.');
// }
// const roiTensor = inputs[attributes.roiInputIdx];
// roi = roiTensor.size > 0 ? Array.from(roiTensor.floatData) : [];
// } else {
// roi = new Array(inputs[0].dims.length * 2).fill(0);
// }
// return roi;
// };
//# sourceMappingURL=resize-packed.js.map