@tensorflow/tfjs-core
Version:
Hardware-accelerated JavaScript library for machine intelligence
1,543 lines (1,315 loc) • 138 kB
text/typescript
/**
* @license
* Copyright 2017 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
import * as seedrandom from 'seedrandom';
import {ENGINE} from '../../engine';
import {env} from '../../environment';
import {warn} from '../../log';
import * as array_ops_util from '../../ops/array_ops_util';
import * as axis_util from '../../ops/axis_util';
import * as broadcast_util from '../../ops/broadcast_util';
import {complex, imag, real} from '../../ops/complex_ops';
import * as concat_util from '../../ops/concat_util';
import {Conv2DInfo, Conv3DInfo} from '../../ops/conv_util';
import * as erf_util from '../../ops/erf_util';
import {Activation, FusedBatchMatMulConfig, FusedConv2DConfig} from '../../ops/fused_util';
import * as gather_nd_util from '../../ops/gather_nd_util';
import * as ops from '../../ops/ops';
import {buffer, scalar, tensor, tensor4d} from '../../ops/ops';
import * as scatter_nd_util from '../../ops/scatter_nd_util';
import * as selu_util from '../../ops/selu_util';
import {computeFlatOffset, computeOutShape, isSliceContinous} from '../../ops/slice_util';
import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor';
import {BackendValues, DataType, DataValues, NumericDataType, Rank, ShapeMap, TypedArray, upcastType} from '../../types';
import * as util from '../../util';
import {getArrayFromDType, inferDtype, now, sizeFromShape} from '../../util';
import {BackendTimingInfo, DataStorage, EPSILON_FLOAT32, KernelBackend} from '../backend';
import * as backend_util from '../backend_util';
import * as complex_util from '../complex_util';
import {nonMaxSuppressionV3} from '../non_max_suppression_impl';
import {split} from '../split_shared';
import {tile} from '../tile_impl';
import {topkImpl} from '../topk_impl';
import {whereImpl} from '../where_impl';
import {assertNotComplex} from './cpu_util';
function mapActivation(
backend: MathBackendCPU, x: Tensor, activation: Activation,
preluActivationWeights?: Tensor): Tensor {
if (activation === 'linear') {
return backend.linear(x);
} else if (activation === 'relu') {
return backend.relu(x);
} else if (activation === 'elu') {
return backend.elu(x);
} else if (activation === 'relu6') {
return backend.relu6(x);
} else if (activation === 'prelu') {
return backend.prelu(x, preluActivationWeights);
}
throw new Error(
`Activation ${activation} has not been implemented for the CPU backend.`);
}
export interface TensorData<D extends DataType> {
values?: BackendValues;
dtype: D;
// For complex numbers, the real and imaginary parts are stored as their own
// individual tensors, with a parent joining the two with the
// complexTensors field.
// TODO(smilkov): Replace Tensor with TensorInfo when you modularize ops
// that work with complex tensors.
complexTensors?: {real: Tensor, imag: Tensor};
}
export class MathBackendCPU extends KernelBackend {
public blockSize = 48;
data: DataStorage<TensorData<DataType>>;
private firstUse = true;
constructor() {
super();
this.data = new DataStorage(this, ENGINE);
}
write(values: BackendValues, shape: number[], dtype: DataType): DataId {
if (this.firstUse) {
this.firstUse = false;
if (env().get('IS_NODE')) {
warn(
'\n============================\n' +
'Hi there 👋. Looks like you are running TensorFlow.js in ' +
'Node.js. To speed things up dramatically, install our node ' +
'backend, which binds to TensorFlow C++, by running ' +
'npm i @tensorflow/tfjs-node, ' +
'or npm i @tensorflow/tfjs-node-gpu if you have CUDA. ' +
'Then call require(\'@tensorflow/tfjs-node\'); (-gpu ' +
'suffix for CUDA) at the start of your program. ' +
'Visit https://github.com/tensorflow/tfjs-node for more details.' +
'\n============================');
}
}
const dataId = {};
this.data.set(dataId, {values, dtype});
return dataId;
}
move(dataId: DataId, values: BackendValues, shape: number[], dtype: DataType):
void {
this.data.set(dataId, {values, dtype});
}
numDataIds(): number {
return this.data.numDataIds();
}
async read(dataId: DataId): Promise<BackendValues> {
return this.readSync(dataId);
}
readSync(dataId: DataId): BackendValues {
const {dtype, complexTensors} = this.data.get(dataId);
if (dtype === 'complex64') {
const realValues =
this.readSync(complexTensors.real.dataId) as Float32Array;
const imagValues =
this.readSync(complexTensors.imag.dataId) as Float32Array;
return complex_util.mergeRealAndImagArrays(realValues, imagValues);
}
return this.data.get(dataId).values;
}
private bufferSync<R extends Rank>(t: Tensor<R>): TensorBuffer<R> {
const data = this.readSync(t.dataId);
let decodedData = data as DataValues;
if (t.dtype === 'string') {
try {
// Decode the bytes into string.
decodedData = (data as Uint8Array[]).map(d => util.decodeString(d));
} catch {
throw new Error('Failed to decode encoded string bytes into utf-8');
}
}
return buffer(t.shape, t.dtype, decodedData) as TensorBuffer<R>;
}
private makeOutput<T extends Tensor>(
values: BackendValues, shape: number[], dtype: DataType): T {
const dataId = this.write(values, shape, dtype);
return ENGINE.makeTensorFromDataId(dataId, shape, dtype, this) as T;
}
disposeData(dataId: DataId): void {
if (this.data.has(dataId)) {
const {complexTensors} = this.data.get(dataId);
if (complexTensors != null) {
complexTensors.real.dispose();
complexTensors.imag.dispose();
}
this.data.delete(dataId);
}
}
async time(f: () => void): Promise<BackendTimingInfo> {
const start = now();
f();
const kernelMs = now() - start;
return {kernelMs};
}
memory() {
return {
// Unreliable due to automatic gc. The numbers above are cumulative.
unreliable: true,
reasons:
['The reported memory is an upper bound. Due to automatic garbage ' +
'collection, the true allocated memory may be less.']
};
}
complex<T extends Tensor>(real: T, imag: T): T {
const result = this.makeOutput(null, real.shape, 'complex64');
const resultData = this.data.get(result.dataId);
// The backend owns the reference to the underlying real and imaginary
// clones. These will explicitly get disposed when the complex tensor is
// disposed.
resultData.complexTensors = {
real: ENGINE.keep(real.clone()),
imag: ENGINE.keep(imag.clone())
};
return result as T;
}
real<T extends Tensor>(input: T): T {
const resultData = this.data.get(input.dataId);
return resultData.complexTensors.real.clone() as T;
}
imag<T extends Tensor>(input: T): T {
const resultData = this.data.get(input.dataId);
return resultData.complexTensors.imag.clone() as T;
}
slice<T extends Tensor>(x: T, begin: number[], size: number[]): T {
assertNotComplex(x, 'slice');
const isContinous = isSliceContinous(x.shape, begin, size);
if (isContinous) {
const flatOffset = computeFlatOffset(begin, x.strides);
const length = util.sizeFromShape(size);
const vals = this.readSync(x.dataId) as TypedArray;
return tensor(
vals.subarray(flatOffset, flatOffset + length), size,
x.dtype) as T;
}
const buffer = ops.buffer(size, x.dtype);
const xBuf = this.bufferSync(x);
for (let i = 0; i < buffer.size; ++i) {
const loc = buffer.indexToLoc(i);
const xLoc = loc.map((idx, j) => idx + begin[j]);
buffer.values[i] = xBuf.get(...xLoc);
}
return buffer.toTensor() as T;
}
stridedSlice<T extends Tensor>(
x: T, begin: number[], end: number[], strides: number[]): T {
assertNotComplex(x, 'stridedSlice');
const outShape = computeOutShape(begin, end, strides);
if (outShape.some(axis => axis === 0)) {
return ops.tensor([], outShape) as T;
}
const buffer = ops.buffer(outShape, x.dtype);
const xBuf = this.bufferSync(x);
for (let i = 0; i < buffer.size; i++) {
const loc = buffer.indexToLoc(i);
const newLoc: number[] = new Array(loc.length);
for (let j = 0; j < newLoc.length; j++) {
newLoc[j] = loc[j] * strides[j] + begin[j];
}
buffer.set(xBuf.get(...newLoc), ...loc);
}
return buffer.toTensor() as T;
}
diag(x: Tensor): Tensor {
const xVals = this.readSync(x.dataId) as TypedArray;
const buffer = ops.buffer([x.size, x.size], x.dtype);
const vals = buffer.values;
for (let i = 0; i < xVals.length; i++) {
vals[i * x.size + i] = xVals[i];
}
return buffer.toTensor();
}
unstack(x: Tensor, axis: number): Tensor[] {
const num = x.shape[axis];
const outShape: number[] = new Array(x.rank - 1);
let outIndex = 0;
for (let i = 0; i < x.rank; i++) {
if (i !== axis) {
outShape[outIndex++] = x.shape[i];
}
}
const begin = new Array(x.rank).fill(0);
const size = x.shape.slice();
size[axis] = 1;
const res = new Array(num);
for (let i = 0; i < res.length; i++) {
begin[axis] = i;
res[i] = this.slice(x, begin, size).reshape(outShape);
}
return res;
}
reverse<T extends Tensor>(x: T, axis: number[]): T {
assertNotComplex(x, 'reverse');
const buffer = ops.buffer(x.shape, x.dtype);
const xBuf = this.bufferSync(x);
for (let i = 0; i < buffer.size; i++) {
const outLoc = buffer.indexToLoc(i);
const inLoc = outLoc.slice();
axis.forEach(ax => inLoc[ax] = x.shape[ax] - 1 - inLoc[ax]);
buffer.set(xBuf.get(...inLoc), ...outLoc);
}
return buffer.toTensor() as T;
}
concat(tensors: Tensor[], axis: number): Tensor {
if (tensors[0].dtype === 'complex64') {
const reals = tensors.map((t) => real(t));
const imags = tensors.map((t) => imag(t));
return complex(this.concat(reals, axis), this.concat(imags, axis));
}
const tensors2D = tensors.map(t => {
const innerSize = util.sizeFromShape(t.shape.slice(axis));
return t.as2D(-1, innerSize);
});
const outShape =
concat_util.computeOutShape(tensors2D.map(t => t.shape), 1 /* axis */);
const values =
ops.buffer(outShape as [number, number], tensors[0].dtype as 'float32')
.values;
if (tensors2D[0].shape[0] === 1) {
// Use built-in TypedArray.set() method for speed.
let offset = 0;
tensors2D.forEach(t => {
values.set(this.readSync(t.dataId) as TypedArray, offset);
offset += t.size;
});
} else {
let colOffset = 0;
tensors2D.forEach(t => {
const tVals = this.readSync(t.dataId) as TypedArray;
let tIdx = 0;
for (let row = 0; row < t.shape[0]; ++row) {
const resIdx = row * outShape[1] + colOffset;
for (let col = 0; col < t.shape[1]; ++col) {
values[resIdx + col] = tVals[tIdx++];
}
}
colOffset += t.shape[1];
});
}
const finalOutShape =
concat_util.computeOutShape(tensors.map(t => t.shape), axis);
return tensor(values, finalOutShape, tensors[0].dtype);
}
neg<T extends Tensor>(x: T): T {
assertNotComplex(x, 'neg');
return this.multiply(ops.scalar(-1), x) as T;
}
add(a: Tensor, b: Tensor): Tensor {
if (a.dtype === 'complex64' || b.dtype === 'complex64') {
return this.broadcastedBinaryComplexOp(
a.cast('complex64'), b.cast('complex64'),
(aReal, aImag, bReal, bImag) => {
return {real: aReal + bReal, imag: aImag + bImag};
});
}
return this.broadcastedBinaryOp(
a, b, upcastType(a.dtype, b.dtype),
(aValue, bValue) => aValue + bValue);
}
addN<T extends Tensor>(tensors: T[]): T {
assertNotComplex(tensors, 'addN');
const vals = tensors.map(t => this.readSync(t.dataId) as TypedArray);
const result = ops.buffer(tensors[0].shape, tensors[0].dtype as 'float32');
const resultVals = result.values;
for (let i = 0; i < tensors.length; i++) {
const currVals = vals[i];
for (let j = 0; j < resultVals.length; j++) {
resultVals[j] += currVals[j];
}
}
return result.toTensor() as T;
}
softmax<T extends Tensor>(logits: T, dim: number): T {
const axes = util.parseAxisParam([dim], logits.shape);
const maxLogit = this.max(logits, axes);
const expandedShape = axis_util.expandShapeToKeepDim(maxLogit.shape, axes);
const a = this.subtract(logits, maxLogit.reshape(expandedShape));
const b = this.exp(a);
const sumExp = this.sum(b, axes).reshape(expandedShape);
return this.realDivide(b, sumExp) as T;
}
subtract(a: Tensor, b: Tensor): Tensor {
if (a.dtype === 'complex64' || b.dtype === 'complex64') {
return this.broadcastedBinaryComplexOp(
a.cast('complex64'), b.cast('complex64'),
(aReal, aImag, bReal, bImag) => {
return {real: aReal - bReal, imag: aImag - bImag};
});
}
return this.broadcastedBinaryOp(
a, b, upcastType(a.dtype, b.dtype),
(aValue, bValue) => aValue - bValue);
}
pow<T extends Tensor>(a: T, b: Tensor): T {
assertNotComplex([a, b], 'pow');
return this.broadcastedBinaryOp(
a, b, a.dtype, (aValue, bValue) => Math.pow(aValue, bValue)) as
T;
}
batchMatMul(
a: Tensor3D, b: Tensor3D, transposeA: boolean,
transposeB: boolean): Tensor3D {
assertNotComplex([a, b], 'matMul');
const sharedDim = transposeA ? a.shape[1] : a.shape[2];
const leftDim = transposeA ? a.shape[2] : a.shape[1];
const rightDim = transposeB ? b.shape[1] : b.shape[2];
const batchDim = a.shape[0];
const aValues = this.readSync(a.dataId) as TypedArray;
const bValues = this.readSync(b.dataId) as TypedArray;
const [aBatch, aOuterStep, aInnerStep] = transposeA ?
[a.strides[0], 1, a.strides[1]] :
[a.strides[0], a.strides[1], 1];
const [bInnerStep, bOuterStep, bBatch] = transposeB ?
[1, b.strides[1], b.strides[0]] :
[b.strides[1], 1, b.strides[0]];
const size = leftDim * rightDim;
const result = buffer([batchDim, leftDim, rightDim], a.dtype);
const resVals = result.values as TypedArray;
const blockSize = this.blockSize;
for (let b = 0; b < batchDim; b++) {
for (let i0 = 0; i0 < leftDim; i0 += blockSize) {
for (let j0 = 0; j0 < rightDim; j0 += blockSize) {
for (let k0 = 0; k0 < sharedDim; k0 += blockSize) {
// for when blockSize doesn't evenly divide the input
const iBlock = Math.min(i0 + blockSize, leftDim);
const jBlock = Math.min(j0 + blockSize, rightDim);
const kBlock = Math.min(k0 + blockSize, sharedDim);
for (let i = i0; i < iBlock; i++) {
for (let j = j0; j < jBlock; j++) {
let sum = 0.0;
for (let k = k0; k < kBlock; k++) {
sum += aValues[b * aBatch + i * aOuterStep + k * aInnerStep] *
bValues[k * bInnerStep + j * bOuterStep + b * bBatch];
}
resVals[b * size + (i * rightDim + j)] += sum;
}
}
}
}
}
}
return result.toTensor() as Tensor3D;
}
fusedBatchMatMul(
{a, b, transposeA, transposeB, bias, activation, preluActivationWeights}:
FusedBatchMatMulConfig): Tensor3D {
let result = this.batchMatMul(a, b, transposeA, transposeB);
if (bias) {
result = this.add(result, bias) as Tensor3D;
}
if (activation) {
result =
mapActivation(this, result, activation, preluActivationWeights) as
Tensor3D;
}
return result;
}
multiply(a: Tensor, b: Tensor): Tensor {
if (a.dtype === 'complex64' || b.dtype === 'complex64') {
return this.broadcastedBinaryComplexOp(
a.cast('complex64'), b.cast('complex64'),
(aReal, aImag, bReal, bImag) => {
return {
real: aReal * bReal - aImag * bImag,
imag: aReal * bImag + aImag * bReal
};
});
}
return this.broadcastedBinaryOp(
a, b, upcastType(a.dtype, b.dtype),
(aValue, bValue) => aValue * bValue);
}
realDivide(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'realDivide');
const op = (a: number, b: number) => a / b;
const outputDtype = 'float32';
return this.broadcastedBinaryOp(a, b, outputDtype, op);
}
floorDiv(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'floorDiv');
const op = (a: number, b: number) => Math.floor(a / b);
const outputDtype = 'int32';
return this.broadcastedBinaryOp(a, b, outputDtype, op);
}
sum(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'sum');
axis_util.assertAxesAreInnerMostDims('sum', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const resultDtype = upcastType(x.dtype, 'int32');
const result = ops.zeros(outShape, resultDtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let sum = 0;
for (let j = 0; j < reduceSize; ++j) {
sum += aVals[offset + j];
}
vals[i] = sum;
}
return result;
}
prod(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'sum');
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const resultDtype = upcastType(x.dtype, 'int32');
const result = ops.zeros(outShape, resultDtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let prod = 1;
for (let j = 0; j < reduceSize; ++j) {
prod *= aVals[offset + j];
}
vals[i] = prod;
}
return result;
}
unsortedSegmentSum<T extends Tensor>(
x: T, segmentIds: Tensor1D, numSegments: number): Tensor {
assertNotComplex(x, 'unsortedSegmentSum');
const res = [];
// Reshape the segment id's so that they can be broadcast with
// x. The new shape should be [segmentIds.shape, 1, ..., 1]
const numIters = x.rank - segmentIds.rank;
for (let i = 0; i < numIters; ++i) {
segmentIds = segmentIds.expandDims(i + 1);
}
for (let i = 0; i < numSegments; ++i) {
const segmentId = ops.scalar(i, 'int32');
const mask = ops.equal(segmentId, segmentIds).asType('float32');
const sum = mask.mul(x).sum(0);
res.push(sum);
}
return ops.stack(res);
}
argMin(x: Tensor, axis: number): Tensor {
assertNotComplex(x, 'argMin');
const axes = [axis];
axis_util.assertAxesAreInnerMostDims('argMin', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, 'int32');
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let min = aVals[offset];
let minIndex = 0;
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
if (value < min) {
min = value;
minIndex = j;
}
}
vals[i] = minIndex;
}
return result;
}
argMax(x: Tensor, axis: number): Tensor {
assertNotComplex(x, 'argMax');
const axes = [axis];
axis_util.assertAxesAreInnerMostDims('argMax', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, 'int32');
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let max = aVals[offset];
let maxIndex = 0;
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
if (value > max) {
max = value;
maxIndex = j;
}
}
vals[i] = maxIndex;
}
return result;
}
cumsum(x: Tensor, axis: number, exclusive: boolean, reverse: boolean):
Tensor {
assertNotComplex(x, 'cumsum');
if (axis !== x.rank - 1) {
throw new Error(
`backend.cumsum in CPU expects an inner-most axis=${x.rank - 1} ` +
`but got axis=${axis}`);
}
const resultDtype = upcastType(x.dtype, 'int32');
const result = ops.zeros(x.shape, resultDtype);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
const finalDim = x.shape[x.rank - 1];
const indexAdjuster = reverse ?
(i: number, j: number) => i + finalDim - j - 1 :
(i: number, j: number) => i + j;
for (let i = 0; i < aVals.length; i += finalDim) {
for (let j = 0; j < finalDim; j++) {
const idx = indexAdjuster(i, j);
if (j === 0) {
vals[idx] = exclusive ? 0 : aVals[idx];
} else {
const prevIdx = indexAdjuster(i, j - 1);
vals[idx] = exclusive ? aVals[prevIdx] + vals[prevIdx] :
aVals[idx] + vals[prevIdx];
}
}
}
return result;
}
equal(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'equal');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal === bVal) ? 1 : 0;
});
}
notEqual(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'notEqual');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal !== bVal) ? 1 : 0;
});
}
less(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'less');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal < bVal) ? 1 : 0;
});
}
lessEqual(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'lessEqual');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal <= bVal) ? 1 : 0;
});
}
greater(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'greater');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal > bVal) ? 1 : 0;
});
}
greaterEqual(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'greaterEqual');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return (aVal >= bVal) ? 1 : 0;
});
}
logicalNot<T extends Tensor>(x: T): T {
assertNotComplex(x, 'logicalNot');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Uint8Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = values[i] ? 0 : 1;
}
return this.makeOutput(newValues, x.shape, 'bool');
}
logicalAnd(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'logicalAnd');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return aVal && bVal;
});
}
logicalOr(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'logicalOr');
return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => {
return aVal || bVal;
});
}
select(condition: Tensor, a: Tensor, b: Tensor): Tensor {
assertNotComplex([condition, a, b], 'select');
const values = this.readSync(condition.dataId) as TypedArray;
const aValues = this.readSync(a.dataId) as TypedArray;
const bValues = this.readSync(b.dataId) as TypedArray;
const result = ops.zeros(a.shape, upcastType(a.dtype, b.dtype));
const newValues = this.readSync(result.dataId) as TypedArray;
let index = 0;
const offset = condition.rank === 0 || condition.rank > 1 || a.rank === 1 ?
1 :
util.sizeFromShape(a.shape.slice(1));
for (let i = 0; i < values.length; i++) {
for (let j = 0; j < offset; j++) {
if (values[i] === 1) {
newValues[index++] = aValues[i];
} else {
newValues[index++] = bValues[i];
}
}
}
return result;
}
where(condition: Tensor): Tensor2D {
assertNotComplex([condition], 'where');
const condVals = this.readSync(condition.dataId) as TypedArray;
return whereImpl(condition.shape, condVals);
}
topk<T extends Tensor>(x: T, k: number, sorted: boolean): [T, T] {
assertNotComplex(x, 'topk');
const xVals = this.readSync(x.dataId) as TypedArray;
return topkImpl(xVals, x.shape, x.dtype as NumericDataType, k, sorted);
}
min(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'min');
axis_util.assertAxesAreInnerMostDims('min', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, x.dtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let min = aVals[offset];
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
if (value < min) {
min = value;
}
}
vals[i] = min;
}
return result;
}
minimum(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'minimum');
return this.broadcastedBinaryOp(
a, b, a.dtype, (aVal, bVal) => Math.min(aVal, bVal));
}
mod(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'mod');
return this.broadcastedBinaryOp(a, b, a.dtype, (aVal, bVal) => {
const rem = aVal % bVal;
if ((aVal < 0 && bVal < 0) || (aVal >= 0 && bVal >= 0)) {
return rem;
} else {
return (rem + bVal) % bVal;
}
});
}
max(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'max');
axis_util.assertAxesAreInnerMostDims('max', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, x.dtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let max = aVals[offset];
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
if (value > max) {
max = value;
}
}
vals[i] = max;
}
return result;
}
maximum(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'maximum');
return this.broadcastedBinaryOp(
a, b, a.dtype, (aVal, bVal) => Math.max(aVal, bVal));
}
all(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'all');
axis_util.assertAxesAreInnerMostDims('all', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, x.dtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let all = aVals[offset];
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
all = all && value;
}
vals[i] = all;
}
return result;
}
any(x: Tensor, axes: number[]): Tensor {
assertNotComplex(x, 'any');
axis_util.assertAxesAreInnerMostDims('any', axes, x.rank);
const [outShape, reduceShape] =
axis_util.computeOutAndReduceShapes(x.shape, axes);
const result = ops.zeros(outShape, x.dtype);
const reduceSize = util.sizeFromShape(reduceShape);
const vals = this.readSync(result.dataId) as TypedArray;
const aVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < vals.length; ++i) {
const offset = i * reduceSize;
let anyVal = aVals[offset];
for (let j = 0; j < reduceSize; ++j) {
const value = aVals[offset + j];
anyVal = anyVal || value;
}
vals[i] = anyVal;
}
return result;
}
squaredDifference(a: Tensor, b: Tensor): Tensor {
assertNotComplex([a, b], 'squaredDifference');
return this.broadcastedBinaryOp(a, b, a.dtype, (aVal, bVal) => {
const diff = aVal - bVal;
return diff * diff;
});
}
ceil<T extends Tensor>(x: T): T {
assertNotComplex(x, 'ceil');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = Math.ceil(values[i]);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
floor<T extends Tensor>(x: T): T {
assertNotComplex(x, 'floor');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = Math.floor(values[i]);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
sign<T extends Tensor>(x: T): T {
assertNotComplex(x, 'x');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
if (values[i] < 0) {
newValues[i] = -1;
} else if (values[i] > 0) {
newValues[i] = 1;
} else {
newValues[i] = 0;
}
}
return this.makeOutput(newValues, x.shape, 'float32');
}
isNaN<T extends Tensor>(x: T): T {
assertNotComplex(x, 'x');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Uint8Array(values.length);
for (let i = 0; i < values.length; ++i) {
if (Number.isNaN(values[i])) {
newValues[i] = 1;
}
}
return this.makeOutput(newValues, x.shape, 'bool');
}
isInf<T extends Tensor>(x: T): T {
assertNotComplex(x, 'x');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Uint8Array(values.length);
for (let i = 0; i < values.length; ++i) {
if (Math.abs(values[i]) === Infinity) {
newValues[i] = 1;
}
}
return this.makeOutput(newValues, x.shape, 'bool');
}
isFinite<T extends Tensor>(x: T): T {
assertNotComplex(x, 'x');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Uint8Array(values.length);
for (let i = 0; i < values.length; ++i) {
if (Number.isFinite(values[i])) {
newValues[i] = 1;
}
}
return this.makeOutput(newValues, x.shape, 'bool');
}
round<T extends Tensor>(x: T): T {
assertNotComplex(x, 'round');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
// The algorithm is based on banker's rounding.
const base = Math.floor(values[i]);
if (values[i] - base < 0.5) {
newValues[i] = Math.floor(values[i]);
} else if (values[i] - base > 0.5) {
newValues[i] = Math.ceil(values[i]);
} else {
if (base % 2.0 === 0.0) {
newValues[i] = base;
} else {
newValues[i] = base + 1.0;
}
}
}
return this.makeOutput(newValues, x.shape, 'float32');
}
exp<T extends Tensor>(x: T): T {
assertNotComplex(x, 'exp');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = Math.exp(values[i]);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
expm1<T extends Tensor>(x: T): T {
assertNotComplex(x, 'expm1');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = Math.expm1(values[i]);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
log<T extends Tensor>(x: T): T {
assertNotComplex(x, 'log');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
const value = values[i];
newValues[i] = Math.log(value);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
log1p<T extends Tensor>(x: T): T {
assertNotComplex(x, 'log1p');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
const value = values[i];
newValues[i] = Math.log1p(value);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
sqrt<T extends Tensor>(x: T): T {
assertNotComplex(x, 'sqrt');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
const value = values[i];
newValues[i] = Math.sqrt(value);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
rsqrt<T extends Tensor>(x: T): T {
assertNotComplex(x, 'rsqrt');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
const value = values[i];
newValues[i] = 1 / Math.sqrt(value);
}
return this.makeOutput(newValues, x.shape, 'float32');
}
reciprocal<T extends Tensor>(x: T): T {
assertNotComplex(x, 'reciprocal');
const values = this.readSync(x.dataId) as TypedArray;
const newValues = new Float32Array(values.length);
for (let i = 0; i < values.length; ++i) {
newValues[i] = 1 / values[i];
}
return this.makeOutput(newValues, x.shape, 'float32');
}
linear<T extends Tensor>(x: T): T {
return x;
}
relu<T extends Tensor>(x: T): T {
assertNotComplex(x, 'relu');
const res = ops.zeros(x.shape, x.dtype);
const resVals = this.readSync(res.dataId) as TypedArray;
const inVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < inVals.length; ++i) {
resVals[i] = Math.max(0, inVals[i]);
}
return res as T;
}
relu6<T extends Tensor>(x: T): T {
assertNotComplex(x, 'relu');
const res = ops.zeros(x.shape, x.dtype);
const resVals = this.readSync(res.dataId) as TypedArray;
const inVals = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < inVals.length; ++i) {
resVals[i] = Math.min(Math.max(0, inVals[i]), 6);
}
return res as T;
}
prelu<T extends Tensor>(x: T, a: T): T {
assertNotComplex([x, a], 'prelu');
return this.broadcastedBinaryOp(
x, a, x.dtype,
(xValue, aValue) => xValue < 0 ? aValue * xValue : xValue) as T;
}
elu<T extends Tensor>(x: T): T {
assertNotComplex(x, 'elu');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
const v = values[i];
if (v >= 0) {
resultValues[i] = v;
} else {
resultValues[i] = (Math.exp(v) - 1);
}
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
eluDer<T extends Tensor>(dy: T, y: T): T {
assertNotComplex([dy, y], 'eluDer');
const resultValues = new Float32Array(y.size);
const values = this.readSync(y.dataId) as TypedArray;
const dyValues = this.readSync(dy.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
const v = values[i];
if (v >= 1) {
resultValues[i] = dyValues[i];
} else {
resultValues[i] = dyValues[i] * (v + 1);
}
}
return this.makeOutput(resultValues, y.shape, 'float32');
}
selu<T extends Tensor>(x: T): T {
assertNotComplex(x, 'selu');
// Stable and Attracting Fixed Point (0, 1) for Normalized Weights.
// see: https://arxiv.org/abs/1706.02515
const scaleAlpha = selu_util.SELU_SCALEALPHA;
const scale = selu_util.SELU_SCALE;
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
const v = values[i];
if (v >= 0) {
resultValues[i] = scale * v;
} else {
resultValues[i] = scaleAlpha * (Math.exp(v) - 1);
}
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
clip<T extends Tensor>(x: T, min: number, max: number): T {
assertNotComplex(x, 'clip');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
const v = values[i];
resultValues[i] = v > max ? max : (v < min ? min : v);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
abs<T extends Tensor>(x: T): T {
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.abs(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
complexAbs<T extends Tensor>(x: T): T {
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < x.size; ++i) {
const real = values[i * 2];
const imag = values[i * 2 + 1];
resultValues[i] = Math.hypot(real, imag);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
int<T extends Tensor>(x: T): T {
assertNotComplex(x, 'int');
const resultValues = new Int32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = values[i];
}
return this.makeOutput(resultValues, x.shape, 'int32');
}
sigmoid<T extends Tensor>(x: T): T {
assertNotComplex(x, 'sigmoid');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = 1 / (1 + Math.exp(-values[i]));
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
softplus<T extends Tensor>(x: T): T {
assertNotComplex(x, 'softplus');
// mirrors the implementation of tf.nn.softplus: https://goo.gl/vkcvwX
// epsilon is the difference between 1.0 and the next representable float.
// For a single precision 32 bit float this should be 2^-23, see:
// https://math.byu.edu/~schow/work/IEEEFloatingPoint.htm
const epsilon = 1.1920928955078125e-7;
const threshold = Math.log(epsilon) + 2.0;
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
// Value above which exp(x) may overflow, but softplus(x) == x
// is within machine epsilon.
const tooLarge = values[i] > -threshold;
// Value below which exp(x) may underflow, but softplus(x) == exp(x)
// is within machine epsilon.
const tooSmall = values[i] < threshold;
const expX = Math.exp(values[i]);
let result;
if (tooSmall) {
result = expX;
} else if (tooLarge) {
result = values[i];
} else {
result = Math.log(1.0 + expX);
}
resultValues[i] = result;
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
sin<T extends Tensor>(x: T): T {
assertNotComplex(x, 'sin');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.sin(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
cos<T extends Tensor>(x: T): T {
assertNotComplex(x, 'cos');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.cos(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
tan<T extends Tensor>(x: T): T {
assertNotComplex(x, 'tan');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.tan(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
asin<T extends Tensor>(x: T): T {
assertNotComplex(x, 'asin');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.asin(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
acos<T extends Tensor>(x: T): T {
assertNotComplex(x, 'acos');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.acos(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
atan<T extends Tensor>(x: T): T {
assertNotComplex(x, 'atan');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.atan(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
atan2<T extends Tensor>(a: T, b: T): T {
assertNotComplex([a, b], 'atan2');
return this.broadcastedBinaryOp(
a, b, a.dtype, (aValue, bValue) => Math.atan2(aValue, bValue)) as
T;
}
sinh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'sinh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.sinh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
cosh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'cosh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.cosh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
tanh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'tanh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = util.tanh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
asinh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'asinh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.asinh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
acosh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'acosh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.acosh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
atanh<T extends Tensor>(x: T): T {
assertNotComplex(x, 'atanh');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
resultValues[i] = Math.atanh(values[i]);
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
erf<T extends Tensor>(x: T): T {
assertNotComplex(x, 'erf');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
const p = erf_util.ERF_P;
const a1 = erf_util.ERF_A1;
const a2 = erf_util.ERF_A2;
const a3 = erf_util.ERF_A3;
const a4 = erf_util.ERF_A4;
const a5 = erf_util.ERF_A5;
for (let i = 0; i < values.length; ++i) {
const sign = Math.sign(values[i]);
const v = Math.abs(values[i]);
const t = 1.0 / (1.0 + p * v);
resultValues[i] = sign *
(1.0 -
(((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t *
Math.exp(-v * v));
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
step<T extends Tensor>(x: T, alpha = 0): T {
assertNotComplex(x, 'step');
const resultValues = new Float32Array(x.size);
const values = this.readSync(x.dataId) as TypedArray;
for (let i = 0; i < values.length; ++i) {
const value = values[i];
if (isNaN(value)) {
resultValues[i] = NaN;
} else {
resultValues[i] = value > 0 ? 1 : alpha;
}
}
return this.makeOutput(resultValues, x.shape, 'float32');
}
fusedConv2d(
{input, filter, convInfo, bias, activation, preluActivationWeights}:
FusedConv2DConfig): Tensor4D {
let result = this.conv2d(input, filter, convInfo);
if (bias) {
result = this.add(result, bias) as Tensor4D;
}
if (activation) {
result =
mapActivation(this, result, activation, preluActivationWeights) as
Tensor4D;
}
return result;
}
conv2d(x: Tensor4D, filter: Tensor4D, convInfo: Conv2DInfo): Tensor4D {
assertNotComplex([x, filter], 'conv2d');
const filterHeight = convInfo.filterHeight;
const filterWidth = convInfo.filterWidth;
const dilationHeight = convInfo.dilationHeight;
const dilationWidth = convInfo.dilationWidth;
const padLeft = convInfo.padInfo.left;
const padTop = convInfo.padInfo.top;
const isChannelsLast = convInfo.dataFormat === 'channelsLast';
const y = ops.buffer(convInfo.outShape, x.dtype as 'float32');
const xBatchStride = x.strides[0];
const xRowStride = isChannelsLast ? x.strides[1] : x.strides[2];
const xColStride = isChannelsLast ? x.strides[2] : 1;
const xChannelStride = isChannelsLast ? 1 : x.strides[1];
const yBatchStride = y.strides[0];
const yRowStride = isChannelsLast ? y.strides[1] : y.strides[2];
const yColStride = isChannelsLast ? y.strides[2] : 1;
const yChannelStride = isChannelsLast ? 1 : y.strides[1];
const xVals = this.readSync(x.dataId) as TypedArray;
const wVals = this.readSync(filter.dataId) as TypedArray;
const yVals = y.values;
for (let b = 0; b < convInfo.batchSize; ++b) {
const xOffset1 = b * xBatchStride;
const yOffset1 = b * yBatchStride;
for (let yR = 0; yR < convInfo.outHeight; ++yR) {
const yOffset2 = yOffset1 + yR * yRowStride;
const xRCorner = yR * convInfo.strideHeight - padTop;
for (let wR = 0; wR < filterHeight; wR++) {
const xR = xRCorner + wR * dilationHeight;
if (xR < 0 || xR >= convInfo.inHeight) {
continue;
}
const wOffset1 = wR * filter.strides[0];
const xOffset2 = xOffset1 + xR * xRowStride;
for (let yC = 0; yC < convInfo.outWidth; ++yC) {
const yOffset3 = yOffset2 + yC * yColStride;
const xCCorner = yC * convInfo.strideWidth - padLeft;
for (let wC = 0; wC < filterWidth; wC++) {
const xC = xCCorner + wC * dilationWidth;
if (xC < 0 || xC >= convInfo.inWidth) {
continue;
}
const wOffset2 = wOffset1 + wC * filter.strides[1];
const xOffset3 = xOffset2 + xC * xColStride;
let wOffset3 = wOffset2;
for (let d1 = 0; d1 < convInfo.inChannels; ++d1) {
const xVal = xVals[xOffset3 + d1 * xChannelStride];
for (let d2 = 0; d2 < convInfo.outChannels; ++d2) {
yVals[yOffset3 + d2 * yChannelStride] +=
xVal * wVals[wOffset3 + d2];
}
wOffset3 += convInfo.outChannels;
}
}
}
}
}
}
return y.toTensor() as Tensor4D;
}
conv3d(x: Tensor5D, filter: Tensor5D, convIn