onnxruntime-web

// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. import { createAttributeWithCacheKey } from '../../../attribute-with-cache-key'; import { InferenceHandler } from '../../../backend'; import { Graph } from '../../../graph'; import { OperatorImplementation, OperatorInitialization } from '../../../operators'; import { Tensor } from '../../../tensor'; import { getGlsl } from '../glsl-source'; import { WebGLInferenceHandler } from '../inference-handler'; import { ProgramInfo, ProgramInfoLoader, ProgramMetadata, TextureType } from '../types'; import { ConvAttributes } from './conv'; import { getActivationSnippet, parseInternalActivationAttributes } from './fuse-utils'; const computeTotalPad = ( inDim: number, stride: number, adj: number, kernel: number, dilation: number, outSize: number, ) => (inDim - 1) * stride + adj + (kernel - 1) * dilation + 1 - outSize; const distributePadding = (totalPad: number, autoPad: string, pads: number[], head: number, tail: number) => { const smallPad = Math.floor(totalPad / 2); if (autoPad === 'SAME_UPPER') { pads[head] = smallPad; pads[tail] = totalPad - smallPad; } else if (autoPad === 'SAME_LOWER') { pads[head] = totalPad - smallPad; pads[tail] = smallPad; } }; const calculateOutputShapeAndPads = ( inputShape: readonly number[], kernelShape: readonly number[], dilations: readonly number[], autoPad: string, pads: number[], strides: readonly number[], outputPadding: readonly number[], outputShape: number[], ) => { const spatialRank = inputShape.length - 2; const updateShape = outputShape.length === 0; for (let i = 0; i < spatialRank; ++i) { const outSize = updateShape ? inputShape[i + 2] * strides[i] : outputShape[i]; const totalPad = computeTotalPad(inputShape[i + 2], strides[i], pads[i], kernelShape[i], dilations[i], outSize); distributePadding(totalPad, autoPad, pads, i, i + spatialRank); if (updateShape) { outputShape.push( strides[i] * (inputShape[i + 2] - 1) + outputPadding[i] + (kernelShape[i] - 1) * dilations[i] + 1 - pads[i] - pads[i + spatialRank], ); } } }; export interface ConvTransposeAttributes extends ConvAttributes { readonly outputPadding: readonly number[]; readonly outputShape: readonly number[]; } export const convTranspose: OperatorImplementation<ConvTransposeAttributes> = ( inferenceHandler: InferenceHandler, inputs: Tensor[], attributes: ConvTransposeAttributes, ): Tensor[] => { validateInputs(inputs, attributes); // currently will fail if not convTranspose2D return convTranspose2d(inferenceHandler, inputs, attributes); }; const convTranspose2d: OperatorImplementation<ConvTransposeAttributes> = ( inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], attributes: ConvTransposeAttributes, ): Tensor[] => { const adjustedAttributes = getAdjustedConvTransposeAttributes(attributes, inputs); return [convTranspose2DUnpacked(inferenceHandler, inputs, adjustedAttributes)]; }; const createConvTransposeProgramMetadata = (hasBias: boolean, cacheHint: string) => ({ name: 'ConvTranspose', inputNames: hasBias ? ['X', 'W', 'B'] : ['X', 'W'], inputTypes: hasBias ? [TextureType.unpacked, TextureType.unpacked, TextureType.unpacked] : [TextureType.unpacked, TextureType.unpacked], cacheHint, }); const createUnpackedConvTransposeProgramInfo = ( inferenceHandler: WebGLInferenceHandler, inputs: readonly Tensor[], metadata: ProgramMetadata, attributes: ConvTransposeAttributes, ): ProgramInfo => { const hasBias = inputs.length > 2; const valueInit = hasBias ? 'getB(output_channel)' : '0.0'; const xShape = inputs[0].dims; const wShape = inputs[1].dims; const outputChannelsPerGroup = wShape[1]; const inputChannelsPerGroup = wShape[0] / attributes.group; const outputShape = [inputs[0].dims[0], inputs[1].dims[1] * attributes.group, ...attributes.outputShape]; const glsl = getGlsl(inferenceHandler.session.backend.glContext.version); const { activationFunction, applyActivation } = getActivationSnippet(attributes); const shaderSource = ` const ivec2 strides = ivec2(${attributes.strides[0]}, ${attributes.strides[1]}); const ivec2 pads = ivec2(${attributes.pads[0]}, ${attributes.pads[1]}); ${activationFunction} void main() { ivec4 coords = getOutputCoords(); int batch = coords.x; int output_channel = coords.y; ivec2 loc = coords.zw + pads; int group_id = output_channel / ${outputChannelsPerGroup}; int wOutChannel = output_channel - group_id * ${outputChannelsPerGroup}; float value = ${valueInit}; for (int inChannelOffset = 0; inChannelOffset < ${inputChannelsPerGroup}; inChannelOffset++) { int input_channel = group_id * ${inputChannelsPerGroup} + inChannelOffset; for (int wWOff = 0; wWOff < ${wShape[2]}; wWOff++) { for (int wHOff = 0; wHOff < ${wShape[3]}; wHOff++) { ivec2 wOff = ivec2(wWOff * ${attributes.dilations[0]}, wHOff * ${attributes.dilations[1]}); ivec2 wLoc = loc - wOff; ivec2 wLocIn = wLoc / strides; if ( wLocIn * strides == wLoc && wLocIn.x >= 0 && wLocIn.x < ${xShape[2]} && wLocIn.y >= 0 && wLocIn.y < ${xShape[3]} ) { float xVal = getX(batch, input_channel, wLocIn.y, wLocIn.x); float wVal = getW(input_channel, wOutChannel, wHOff, wWOff); value += xVal * wVal; } } } } ${applyActivation} ${glsl.output} = vec4(value, .0, .0, .0); } `; return { ...metadata, output: { dims: outputShape, type: inputs[0].type, textureType: TextureType.unpacked }, shaderSource, hasMain: true, }; }; const createUnpackedConvTransposeProgramInfoLoader = ( inferenceHandler: WebGLInferenceHandler, inputs: readonly Tensor[], attributes: ConvTransposeAttributes, ): ProgramInfoLoader => { const metadata = createConvTransposeProgramMetadata(inputs.length > 2, attributes.cacheKey); return { ...metadata, get: () => createUnpackedConvTransposeProgramInfo(inferenceHandler, inputs, metadata, attributes), }; }; const convTranspose2DUnpacked = ( inferenceHandler: WebGLInferenceHandler, inputs: readonly Tensor[], attributes: ConvTransposeAttributes, ): Tensor => { const result = inferenceHandler.run( createUnpackedConvTransposeProgramInfoLoader(inferenceHandler, inputs, attributes), inputs, ); return result; }; const getAdjustedConvTransposeAttributes = <T extends ConvTransposeAttributes>(attributes: T, inputs: Tensor[]): T => { const kernelShape = attributes.kernelShape.slice(); // if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims if (attributes.kernelShape.length === 0) { for (let i = 2; i < inputs[1].dims.length; ++i) { kernelShape.push(inputs[1].dims[i]); } } const pads = attributes.pads.slice(); const outputShape = attributes.outputShape.slice(); const inputShape = inputs[0].dims; // If outputShape is not specified in the attributes of this op, infer it from the parameters // Similarly, automatically infer pads if not specified calculateOutputShapeAndPads( inputShape, kernelShape, attributes.dilations, attributes.autoPad, pads, attributes.strides, attributes.outputPadding, outputShape, ); // always return a new object so does not modify the original attributes const newAttributes: T = Object.assign({}, attributes); Object.assign(newAttributes, { kernelShape, pads, outputShape, cacheKey: attributes.cacheKey }); return newAttributes; }; export const parseConvTransposeAttributes: OperatorInitialization<ConvTransposeAttributes> = ( node: Graph.Node, ): ConvTransposeAttributes => { const attributes = node.attributes; const activationAttributes = parseInternalActivationAttributes(attributes); // TODO : Make this generic enough to compute default attributes for multi-dimensional conv const autoPad = attributes.getString('auto_pad', 'NOTSET'); const dilations = attributes.getInts('dilations', [1, 1]); const group = attributes.getInt('group', 1); const kernelShape = attributes.getInts('kernel_shape', []); const outputPadding = attributes.getInts('output_padding', [0, 0]); const outputShape = attributes.getInts('output_shape', []); const pads = attributes.getInts('pads', [0, 0, 0, 0]); const strides = attributes.getInts('strides', [1, 1]); return createAttributeWithCacheKey({ autoPad, dilations, group, kernelShape, outputPadding, outputShape, pads, strides, ...activationAttributes, }); }; const validateInputs = (inputs: Tensor[], attributes: ConvTransposeAttributes): void => { // Refer to the below link for all input checks // https://github.com/onnx/onnx/blob/main/docs/Operators.md#Conv if (!inputs || (inputs.length !== 2 && inputs.length !== 3)) { throw new Error('Conv requires 2 or 3 inputs'); } // TODO : Need to add support for multi-dimensional conv if (inputs[0].dims.length !== 4 || inputs[1].dims.length !== 4) { throw new Error('currently only support 2-dimensional conv'); } // FILTER_IN_CHANNEL should be equal to DATA_CHANNEL const dataChannel = inputs[0].dims[1]; const filterInChannel = inputs[1].dims[0]; if (dataChannel !== filterInChannel) { throw new Error('FILTER_IN_CHANNEL should be equal to DATA_CHANNEL'); } const featureMaps = inputs[1].dims[1] * attributes.group; // if bias is provided it should be 1D and the number of elements should be equal to the number of feature maps if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[2].dims[0] !== featureMaps)) { throw new Error('invalid bias'); } const spatialRank = inputs[0].dims.length - 2; // wrong dilations dimension if (attributes.dilations.length !== spatialRank) { throw new Error(`dilations should be ${spatialRank}D`); } // Wrong strides dimension if (attributes.strides.length !== spatialRank) { throw new Error(`strides should be ${spatialRank}D`); } // Wrong pads dimension if (attributes.pads.length !== spatialRank * 2) { throw new Error(`pads should be ${spatialRank * 2}D`); } // Wrong output padding dimension if (attributes.outputPadding.length !== spatialRank) { throw new Error(`output_padding should be ${spatialRank}D`); } // if kernelShape is specified, it's data length must be 2 less than dims length of the weights tensor // (the first 2 dims are batch_size and channels) if (attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) { throw new Error('invalid kernel shape'); } // as with kernelShape, must have same number of spatial dims as input if (attributes.outputShape.length !== 0 && attributes.outputShape.length !== inputs[0].dims.length - 2) { throw new Error('invalid output shape'); } // TODO : Need to add support for float64 if (inputs[0].type !== 'float32' || inputs[1].type !== 'float32') { throw new Error('ConvTranspose input(X,W) should be float tensor'); } if (inputs.length === 3 && inputs[2].type !== 'float32') { throw new Error('ConvTranspose input(bias) should be float tensor'); } };