UNPKG

@tensorflow/tfjs-core

Version:

Hardware-accelerated JavaScript library for machine intelligence

1,543 lines (1,315 loc) 138 kB
/** * @license * Copyright 2017 Google Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ import * as seedrandom from 'seedrandom'; import {ENGINE} from '../../engine'; import {env} from '../../environment'; import {warn} from '../../log'; import * as array_ops_util from '../../ops/array_ops_util'; import * as axis_util from '../../ops/axis_util'; import * as broadcast_util from '../../ops/broadcast_util'; import {complex, imag, real} from '../../ops/complex_ops'; import * as concat_util from '../../ops/concat_util'; import {Conv2DInfo, Conv3DInfo} from '../../ops/conv_util'; import * as erf_util from '../../ops/erf_util'; import {Activation, FusedBatchMatMulConfig, FusedConv2DConfig} from '../../ops/fused_util'; import * as gather_nd_util from '../../ops/gather_nd_util'; import * as ops from '../../ops/ops'; import {buffer, scalar, tensor, tensor4d} from '../../ops/ops'; import * as scatter_nd_util from '../../ops/scatter_nd_util'; import * as selu_util from '../../ops/selu_util'; import {computeFlatOffset, computeOutShape, isSliceContinous} from '../../ops/slice_util'; import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor'; import {BackendValues, DataType, DataValues, NumericDataType, Rank, ShapeMap, TypedArray, upcastType} from '../../types'; import * as util from '../../util'; import {getArrayFromDType, inferDtype, now, sizeFromShape} from '../../util'; import {BackendTimingInfo, DataStorage, EPSILON_FLOAT32, KernelBackend} from '../backend'; import * as backend_util from '../backend_util'; import * as complex_util from '../complex_util'; import {nonMaxSuppressionV3} from '../non_max_suppression_impl'; import {split} from '../split_shared'; import {tile} from '../tile_impl'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; import {assertNotComplex} from './cpu_util'; function mapActivation( backend: MathBackendCPU, x: Tensor, activation: Activation, preluActivationWeights?: Tensor): Tensor { if (activation === 'linear') { return backend.linear(x); } else if (activation === 'relu') { return backend.relu(x); } else if (activation === 'elu') { return backend.elu(x); } else if (activation === 'relu6') { return backend.relu6(x); } else if (activation === 'prelu') { return backend.prelu(x, preluActivationWeights); } throw new Error( `Activation ${activation} has not been implemented for the CPU backend.`); } export interface TensorData<D extends DataType> { values?: BackendValues; dtype: D; // For complex numbers, the real and imaginary parts are stored as their own // individual tensors, with a parent joining the two with the // complexTensors field. // TODO(smilkov): Replace Tensor with TensorInfo when you modularize ops // that work with complex tensors. complexTensors?: {real: Tensor, imag: Tensor}; } export class MathBackendCPU extends KernelBackend { public blockSize = 48; data: DataStorage<TensorData<DataType>>; private firstUse = true; constructor() { super(); this.data = new DataStorage(this, ENGINE); } write(values: BackendValues, shape: number[], dtype: DataType): DataId { if (this.firstUse) { this.firstUse = false; if (env().get('IS_NODE')) { warn( '\n============================\n' + 'Hi there 👋. Looks like you are running TensorFlow.js in ' + 'Node.js. To speed things up dramatically, install our node ' + 'backend, which binds to TensorFlow C++, by running ' + 'npm i @tensorflow/tfjs-node, ' + 'or npm i @tensorflow/tfjs-node-gpu if you have CUDA. ' + 'Then call require(\'@tensorflow/tfjs-node\'); (-gpu ' + 'suffix for CUDA) at the start of your program. ' + 'Visit https://github.com/tensorflow/tfjs-node for more details.' + '\n============================'); } } const dataId = {}; this.data.set(dataId, {values, dtype}); return dataId; } move(dataId: DataId, values: BackendValues, shape: number[], dtype: DataType): void { this.data.set(dataId, {values, dtype}); } numDataIds(): number { return this.data.numDataIds(); } async read(dataId: DataId): Promise<BackendValues> { return this.readSync(dataId); } readSync(dataId: DataId): BackendValues { const {dtype, complexTensors} = this.data.get(dataId); if (dtype === 'complex64') { const realValues = this.readSync(complexTensors.real.dataId) as Float32Array; const imagValues = this.readSync(complexTensors.imag.dataId) as Float32Array; return complex_util.mergeRealAndImagArrays(realValues, imagValues); } return this.data.get(dataId).values; } private bufferSync<R extends Rank>(t: Tensor<R>): TensorBuffer<R> { const data = this.readSync(t.dataId); let decodedData = data as DataValues; if (t.dtype === 'string') { try { // Decode the bytes into string. decodedData = (data as Uint8Array[]).map(d => util.decodeString(d)); } catch { throw new Error('Failed to decode encoded string bytes into utf-8'); } } return buffer(t.shape, t.dtype, decodedData) as TensorBuffer<R>; } private makeOutput<T extends Tensor>( values: BackendValues, shape: number[], dtype: DataType): T { const dataId = this.write(values, shape, dtype); return ENGINE.makeTensorFromDataId(dataId, shape, dtype, this) as T; } disposeData(dataId: DataId): void { if (this.data.has(dataId)) { const {complexTensors} = this.data.get(dataId); if (complexTensors != null) { complexTensors.real.dispose(); complexTensors.imag.dispose(); } this.data.delete(dataId); } } async time(f: () => void): Promise<BackendTimingInfo> { const start = now(); f(); const kernelMs = now() - start; return {kernelMs}; } memory() { return { // Unreliable due to automatic gc. The numbers above are cumulative. unreliable: true, reasons: ['The reported memory is an upper bound. Due to automatic garbage ' + 'collection, the true allocated memory may be less.'] }; } complex<T extends Tensor>(real: T, imag: T): T { const result = this.makeOutput(null, real.shape, 'complex64'); const resultData = this.data.get(result.dataId); // The backend owns the reference to the underlying real and imaginary // clones. These will explicitly get disposed when the complex tensor is // disposed. resultData.complexTensors = { real: ENGINE.keep(real.clone()), imag: ENGINE.keep(imag.clone()) }; return result as T; } real<T extends Tensor>(input: T): T { const resultData = this.data.get(input.dataId); return resultData.complexTensors.real.clone() as T; } imag<T extends Tensor>(input: T): T { const resultData = this.data.get(input.dataId); return resultData.complexTensors.imag.clone() as T; } slice<T extends Tensor>(x: T, begin: number[], size: number[]): T { assertNotComplex(x, 'slice'); const isContinous = isSliceContinous(x.shape, begin, size); if (isContinous) { const flatOffset = computeFlatOffset(begin, x.strides); const length = util.sizeFromShape(size); const vals = this.readSync(x.dataId) as TypedArray; return tensor( vals.subarray(flatOffset, flatOffset + length), size, x.dtype) as T; } const buffer = ops.buffer(size, x.dtype); const xBuf = this.bufferSync(x); for (let i = 0; i < buffer.size; ++i) { const loc = buffer.indexToLoc(i); const xLoc = loc.map((idx, j) => idx + begin[j]); buffer.values[i] = xBuf.get(...xLoc); } return buffer.toTensor() as T; } stridedSlice<T extends Tensor>( x: T, begin: number[], end: number[], strides: number[]): T { assertNotComplex(x, 'stridedSlice'); const outShape = computeOutShape(begin, end, strides); if (outShape.some(axis => axis === 0)) { return ops.tensor([], outShape) as T; } const buffer = ops.buffer(outShape, x.dtype); const xBuf = this.bufferSync(x); for (let i = 0; i < buffer.size; i++) { const loc = buffer.indexToLoc(i); const newLoc: number[] = new Array(loc.length); for (let j = 0; j < newLoc.length; j++) { newLoc[j] = loc[j] * strides[j] + begin[j]; } buffer.set(xBuf.get(...newLoc), ...loc); } return buffer.toTensor() as T; } diag(x: Tensor): Tensor { const xVals = this.readSync(x.dataId) as TypedArray; const buffer = ops.buffer([x.size, x.size], x.dtype); const vals = buffer.values; for (let i = 0; i < xVals.length; i++) { vals[i * x.size + i] = xVals[i]; } return buffer.toTensor(); } unstack(x: Tensor, axis: number): Tensor[] { const num = x.shape[axis]; const outShape: number[] = new Array(x.rank - 1); let outIndex = 0; for (let i = 0; i < x.rank; i++) { if (i !== axis) { outShape[outIndex++] = x.shape[i]; } } const begin = new Array(x.rank).fill(0); const size = x.shape.slice(); size[axis] = 1; const res = new Array(num); for (let i = 0; i < res.length; i++) { begin[axis] = i; res[i] = this.slice(x, begin, size).reshape(outShape); } return res; } reverse<T extends Tensor>(x: T, axis: number[]): T { assertNotComplex(x, 'reverse'); const buffer = ops.buffer(x.shape, x.dtype); const xBuf = this.bufferSync(x); for (let i = 0; i < buffer.size; i++) { const outLoc = buffer.indexToLoc(i); const inLoc = outLoc.slice(); axis.forEach(ax => inLoc[ax] = x.shape[ax] - 1 - inLoc[ax]); buffer.set(xBuf.get(...inLoc), ...outLoc); } return buffer.toTensor() as T; } concat(tensors: Tensor[], axis: number): Tensor { if (tensors[0].dtype === 'complex64') { const reals = tensors.map((t) => real(t)); const imags = tensors.map((t) => imag(t)); return complex(this.concat(reals, axis), this.concat(imags, axis)); } const tensors2D = tensors.map(t => { const innerSize = util.sizeFromShape(t.shape.slice(axis)); return t.as2D(-1, innerSize); }); const outShape = concat_util.computeOutShape(tensors2D.map(t => t.shape), 1 /* axis */); const values = ops.buffer(outShape as [number, number], tensors[0].dtype as 'float32') .values; if (tensors2D[0].shape[0] === 1) { // Use built-in TypedArray.set() method for speed. let offset = 0; tensors2D.forEach(t => { values.set(this.readSync(t.dataId) as TypedArray, offset); offset += t.size; }); } else { let colOffset = 0; tensors2D.forEach(t => { const tVals = this.readSync(t.dataId) as TypedArray; let tIdx = 0; for (let row = 0; row < t.shape[0]; ++row) { const resIdx = row * outShape[1] + colOffset; for (let col = 0; col < t.shape[1]; ++col) { values[resIdx + col] = tVals[tIdx++]; } } colOffset += t.shape[1]; }); } const finalOutShape = concat_util.computeOutShape(tensors.map(t => t.shape), axis); return tensor(values, finalOutShape, tensors[0].dtype); } neg<T extends Tensor>(x: T): T { assertNotComplex(x, 'neg'); return this.multiply(ops.scalar(-1), x) as T; } add(a: Tensor, b: Tensor): Tensor { if (a.dtype === 'complex64' || b.dtype === 'complex64') { return this.broadcastedBinaryComplexOp( a.cast('complex64'), b.cast('complex64'), (aReal, aImag, bReal, bImag) => { return {real: aReal + bReal, imag: aImag + bImag}; }); } return this.broadcastedBinaryOp( a, b, upcastType(a.dtype, b.dtype), (aValue, bValue) => aValue + bValue); } addN<T extends Tensor>(tensors: T[]): T { assertNotComplex(tensors, 'addN'); const vals = tensors.map(t => this.readSync(t.dataId) as TypedArray); const result = ops.buffer(tensors[0].shape, tensors[0].dtype as 'float32'); const resultVals = result.values; for (let i = 0; i < tensors.length; i++) { const currVals = vals[i]; for (let j = 0; j < resultVals.length; j++) { resultVals[j] += currVals[j]; } } return result.toTensor() as T; } softmax<T extends Tensor>(logits: T, dim: number): T { const axes = util.parseAxisParam([dim], logits.shape); const maxLogit = this.max(logits, axes); const expandedShape = axis_util.expandShapeToKeepDim(maxLogit.shape, axes); const a = this.subtract(logits, maxLogit.reshape(expandedShape)); const b = this.exp(a); const sumExp = this.sum(b, axes).reshape(expandedShape); return this.realDivide(b, sumExp) as T; } subtract(a: Tensor, b: Tensor): Tensor { if (a.dtype === 'complex64' || b.dtype === 'complex64') { return this.broadcastedBinaryComplexOp( a.cast('complex64'), b.cast('complex64'), (aReal, aImag, bReal, bImag) => { return {real: aReal - bReal, imag: aImag - bImag}; }); } return this.broadcastedBinaryOp( a, b, upcastType(a.dtype, b.dtype), (aValue, bValue) => aValue - bValue); } pow<T extends Tensor>(a: T, b: Tensor): T { assertNotComplex([a, b], 'pow'); return this.broadcastedBinaryOp( a, b, a.dtype, (aValue, bValue) => Math.pow(aValue, bValue)) as T; } batchMatMul( a: Tensor3D, b: Tensor3D, transposeA: boolean, transposeB: boolean): Tensor3D { assertNotComplex([a, b], 'matMul'); const sharedDim = transposeA ? a.shape[1] : a.shape[2]; const leftDim = transposeA ? a.shape[2] : a.shape[1]; const rightDim = transposeB ? b.shape[1] : b.shape[2]; const batchDim = a.shape[0]; const aValues = this.readSync(a.dataId) as TypedArray; const bValues = this.readSync(b.dataId) as TypedArray; const [aBatch, aOuterStep, aInnerStep] = transposeA ? [a.strides[0], 1, a.strides[1]] : [a.strides[0], a.strides[1], 1]; const [bInnerStep, bOuterStep, bBatch] = transposeB ? [1, b.strides[1], b.strides[0]] : [b.strides[1], 1, b.strides[0]]; const size = leftDim * rightDim; const result = buffer([batchDim, leftDim, rightDim], a.dtype); const resVals = result.values as TypedArray; const blockSize = this.blockSize; for (let b = 0; b < batchDim; b++) { for (let i0 = 0; i0 < leftDim; i0 += blockSize) { for (let j0 = 0; j0 < rightDim; j0 += blockSize) { for (let k0 = 0; k0 < sharedDim; k0 += blockSize) { // for when blockSize doesn't evenly divide the input const iBlock = Math.min(i0 + blockSize, leftDim); const jBlock = Math.min(j0 + blockSize, rightDim); const kBlock = Math.min(k0 + blockSize, sharedDim); for (let i = i0; i < iBlock; i++) { for (let j = j0; j < jBlock; j++) { let sum = 0.0; for (let k = k0; k < kBlock; k++) { sum += aValues[b * aBatch + i * aOuterStep + k * aInnerStep] * bValues[k * bInnerStep + j * bOuterStep + b * bBatch]; } resVals[b * size + (i * rightDim + j)] += sum; } } } } } } return result.toTensor() as Tensor3D; } fusedBatchMatMul( {a, b, transposeA, transposeB, bias, activation, preluActivationWeights}: FusedBatchMatMulConfig): Tensor3D { let result = this.batchMatMul(a, b, transposeA, transposeB); if (bias) { result = this.add(result, bias) as Tensor3D; } if (activation) { result = mapActivation(this, result, activation, preluActivationWeights) as Tensor3D; } return result; } multiply(a: Tensor, b: Tensor): Tensor { if (a.dtype === 'complex64' || b.dtype === 'complex64') { return this.broadcastedBinaryComplexOp( a.cast('complex64'), b.cast('complex64'), (aReal, aImag, bReal, bImag) => { return { real: aReal * bReal - aImag * bImag, imag: aReal * bImag + aImag * bReal }; }); } return this.broadcastedBinaryOp( a, b, upcastType(a.dtype, b.dtype), (aValue, bValue) => aValue * bValue); } realDivide(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'realDivide'); const op = (a: number, b: number) => a / b; const outputDtype = 'float32'; return this.broadcastedBinaryOp(a, b, outputDtype, op); } floorDiv(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'floorDiv'); const op = (a: number, b: number) => Math.floor(a / b); const outputDtype = 'int32'; return this.broadcastedBinaryOp(a, b, outputDtype, op); } sum(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'sum'); axis_util.assertAxesAreInnerMostDims('sum', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const resultDtype = upcastType(x.dtype, 'int32'); const result = ops.zeros(outShape, resultDtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let sum = 0; for (let j = 0; j < reduceSize; ++j) { sum += aVals[offset + j]; } vals[i] = sum; } return result; } prod(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'sum'); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const resultDtype = upcastType(x.dtype, 'int32'); const result = ops.zeros(outShape, resultDtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let prod = 1; for (let j = 0; j < reduceSize; ++j) { prod *= aVals[offset + j]; } vals[i] = prod; } return result; } unsortedSegmentSum<T extends Tensor>( x: T, segmentIds: Tensor1D, numSegments: number): Tensor { assertNotComplex(x, 'unsortedSegmentSum'); const res = []; // Reshape the segment id's so that they can be broadcast with // x. The new shape should be [segmentIds.shape, 1, ..., 1] const numIters = x.rank - segmentIds.rank; for (let i = 0; i < numIters; ++i) { segmentIds = segmentIds.expandDims(i + 1); } for (let i = 0; i < numSegments; ++i) { const segmentId = ops.scalar(i, 'int32'); const mask = ops.equal(segmentId, segmentIds).asType('float32'); const sum = mask.mul(x).sum(0); res.push(sum); } return ops.stack(res); } argMin(x: Tensor, axis: number): Tensor { assertNotComplex(x, 'argMin'); const axes = [axis]; axis_util.assertAxesAreInnerMostDims('argMin', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, 'int32'); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let min = aVals[offset]; let minIndex = 0; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; if (value < min) { min = value; minIndex = j; } } vals[i] = minIndex; } return result; } argMax(x: Tensor, axis: number): Tensor { assertNotComplex(x, 'argMax'); const axes = [axis]; axis_util.assertAxesAreInnerMostDims('argMax', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, 'int32'); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let max = aVals[offset]; let maxIndex = 0; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; if (value > max) { max = value; maxIndex = j; } } vals[i] = maxIndex; } return result; } cumsum(x: Tensor, axis: number, exclusive: boolean, reverse: boolean): Tensor { assertNotComplex(x, 'cumsum'); if (axis !== x.rank - 1) { throw new Error( `backend.cumsum in CPU expects an inner-most axis=${x.rank - 1} ` + `but got axis=${axis}`); } const resultDtype = upcastType(x.dtype, 'int32'); const result = ops.zeros(x.shape, resultDtype); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; const finalDim = x.shape[x.rank - 1]; const indexAdjuster = reverse ? (i: number, j: number) => i + finalDim - j - 1 : (i: number, j: number) => i + j; for (let i = 0; i < aVals.length; i += finalDim) { for (let j = 0; j < finalDim; j++) { const idx = indexAdjuster(i, j); if (j === 0) { vals[idx] = exclusive ? 0 : aVals[idx]; } else { const prevIdx = indexAdjuster(i, j - 1); vals[idx] = exclusive ? aVals[prevIdx] + vals[prevIdx] : aVals[idx] + vals[prevIdx]; } } } return result; } equal(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'equal'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal === bVal) ? 1 : 0; }); } notEqual(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'notEqual'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal !== bVal) ? 1 : 0; }); } less(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'less'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal < bVal) ? 1 : 0; }); } lessEqual(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'lessEqual'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal <= bVal) ? 1 : 0; }); } greater(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'greater'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal > bVal) ? 1 : 0; }); } greaterEqual(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'greaterEqual'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return (aVal >= bVal) ? 1 : 0; }); } logicalNot<T extends Tensor>(x: T): T { assertNotComplex(x, 'logicalNot'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Uint8Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = values[i] ? 0 : 1; } return this.makeOutput(newValues, x.shape, 'bool'); } logicalAnd(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'logicalAnd'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return aVal && bVal; }); } logicalOr(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'logicalOr'); return this.broadcastedBinaryOp(a, b, 'bool', (aVal, bVal) => { return aVal || bVal; }); } select(condition: Tensor, a: Tensor, b: Tensor): Tensor { assertNotComplex([condition, a, b], 'select'); const values = this.readSync(condition.dataId) as TypedArray; const aValues = this.readSync(a.dataId) as TypedArray; const bValues = this.readSync(b.dataId) as TypedArray; const result = ops.zeros(a.shape, upcastType(a.dtype, b.dtype)); const newValues = this.readSync(result.dataId) as TypedArray; let index = 0; const offset = condition.rank === 0 || condition.rank > 1 || a.rank === 1 ? 1 : util.sizeFromShape(a.shape.slice(1)); for (let i = 0; i < values.length; i++) { for (let j = 0; j < offset; j++) { if (values[i] === 1) { newValues[index++] = aValues[i]; } else { newValues[index++] = bValues[i]; } } } return result; } where(condition: Tensor): Tensor2D { assertNotComplex([condition], 'where'); const condVals = this.readSync(condition.dataId) as TypedArray; return whereImpl(condition.shape, condVals); } topk<T extends Tensor>(x: T, k: number, sorted: boolean): [T, T] { assertNotComplex(x, 'topk'); const xVals = this.readSync(x.dataId) as TypedArray; return topkImpl(xVals, x.shape, x.dtype as NumericDataType, k, sorted); } min(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'min'); axis_util.assertAxesAreInnerMostDims('min', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, x.dtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let min = aVals[offset]; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; if (value < min) { min = value; } } vals[i] = min; } return result; } minimum(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'minimum'); return this.broadcastedBinaryOp( a, b, a.dtype, (aVal, bVal) => Math.min(aVal, bVal)); } mod(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'mod'); return this.broadcastedBinaryOp(a, b, a.dtype, (aVal, bVal) => { const rem = aVal % bVal; if ((aVal < 0 && bVal < 0) || (aVal >= 0 && bVal >= 0)) { return rem; } else { return (rem + bVal) % bVal; } }); } max(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'max'); axis_util.assertAxesAreInnerMostDims('max', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, x.dtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let max = aVals[offset]; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; if (value > max) { max = value; } } vals[i] = max; } return result; } maximum(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'maximum'); return this.broadcastedBinaryOp( a, b, a.dtype, (aVal, bVal) => Math.max(aVal, bVal)); } all(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'all'); axis_util.assertAxesAreInnerMostDims('all', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, x.dtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let all = aVals[offset]; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; all = all && value; } vals[i] = all; } return result; } any(x: Tensor, axes: number[]): Tensor { assertNotComplex(x, 'any'); axis_util.assertAxesAreInnerMostDims('any', axes, x.rank); const [outShape, reduceShape] = axis_util.computeOutAndReduceShapes(x.shape, axes); const result = ops.zeros(outShape, x.dtype); const reduceSize = util.sizeFromShape(reduceShape); const vals = this.readSync(result.dataId) as TypedArray; const aVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < vals.length; ++i) { const offset = i * reduceSize; let anyVal = aVals[offset]; for (let j = 0; j < reduceSize; ++j) { const value = aVals[offset + j]; anyVal = anyVal || value; } vals[i] = anyVal; } return result; } squaredDifference(a: Tensor, b: Tensor): Tensor { assertNotComplex([a, b], 'squaredDifference'); return this.broadcastedBinaryOp(a, b, a.dtype, (aVal, bVal) => { const diff = aVal - bVal; return diff * diff; }); } ceil<T extends Tensor>(x: T): T { assertNotComplex(x, 'ceil'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = Math.ceil(values[i]); } return this.makeOutput(newValues, x.shape, 'float32'); } floor<T extends Tensor>(x: T): T { assertNotComplex(x, 'floor'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = Math.floor(values[i]); } return this.makeOutput(newValues, x.shape, 'float32'); } sign<T extends Tensor>(x: T): T { assertNotComplex(x, 'x'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { if (values[i] < 0) { newValues[i] = -1; } else if (values[i] > 0) { newValues[i] = 1; } else { newValues[i] = 0; } } return this.makeOutput(newValues, x.shape, 'float32'); } isNaN<T extends Tensor>(x: T): T { assertNotComplex(x, 'x'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Uint8Array(values.length); for (let i = 0; i < values.length; ++i) { if (Number.isNaN(values[i])) { newValues[i] = 1; } } return this.makeOutput(newValues, x.shape, 'bool'); } isInf<T extends Tensor>(x: T): T { assertNotComplex(x, 'x'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Uint8Array(values.length); for (let i = 0; i < values.length; ++i) { if (Math.abs(values[i]) === Infinity) { newValues[i] = 1; } } return this.makeOutput(newValues, x.shape, 'bool'); } isFinite<T extends Tensor>(x: T): T { assertNotComplex(x, 'x'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Uint8Array(values.length); for (let i = 0; i < values.length; ++i) { if (Number.isFinite(values[i])) { newValues[i] = 1; } } return this.makeOutput(newValues, x.shape, 'bool'); } round<T extends Tensor>(x: T): T { assertNotComplex(x, 'round'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { // The algorithm is based on banker's rounding. const base = Math.floor(values[i]); if (values[i] - base < 0.5) { newValues[i] = Math.floor(values[i]); } else if (values[i] - base > 0.5) { newValues[i] = Math.ceil(values[i]); } else { if (base % 2.0 === 0.0) { newValues[i] = base; } else { newValues[i] = base + 1.0; } } } return this.makeOutput(newValues, x.shape, 'float32'); } exp<T extends Tensor>(x: T): T { assertNotComplex(x, 'exp'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = Math.exp(values[i]); } return this.makeOutput(newValues, x.shape, 'float32'); } expm1<T extends Tensor>(x: T): T { assertNotComplex(x, 'expm1'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = Math.expm1(values[i]); } return this.makeOutput(newValues, x.shape, 'float32'); } log<T extends Tensor>(x: T): T { assertNotComplex(x, 'log'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { const value = values[i]; newValues[i] = Math.log(value); } return this.makeOutput(newValues, x.shape, 'float32'); } log1p<T extends Tensor>(x: T): T { assertNotComplex(x, 'log1p'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { const value = values[i]; newValues[i] = Math.log1p(value); } return this.makeOutput(newValues, x.shape, 'float32'); } sqrt<T extends Tensor>(x: T): T { assertNotComplex(x, 'sqrt'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { const value = values[i]; newValues[i] = Math.sqrt(value); } return this.makeOutput(newValues, x.shape, 'float32'); } rsqrt<T extends Tensor>(x: T): T { assertNotComplex(x, 'rsqrt'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { const value = values[i]; newValues[i] = 1 / Math.sqrt(value); } return this.makeOutput(newValues, x.shape, 'float32'); } reciprocal<T extends Tensor>(x: T): T { assertNotComplex(x, 'reciprocal'); const values = this.readSync(x.dataId) as TypedArray; const newValues = new Float32Array(values.length); for (let i = 0; i < values.length; ++i) { newValues[i] = 1 / values[i]; } return this.makeOutput(newValues, x.shape, 'float32'); } linear<T extends Tensor>(x: T): T { return x; } relu<T extends Tensor>(x: T): T { assertNotComplex(x, 'relu'); const res = ops.zeros(x.shape, x.dtype); const resVals = this.readSync(res.dataId) as TypedArray; const inVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < inVals.length; ++i) { resVals[i] = Math.max(0, inVals[i]); } return res as T; } relu6<T extends Tensor>(x: T): T { assertNotComplex(x, 'relu'); const res = ops.zeros(x.shape, x.dtype); const resVals = this.readSync(res.dataId) as TypedArray; const inVals = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < inVals.length; ++i) { resVals[i] = Math.min(Math.max(0, inVals[i]), 6); } return res as T; } prelu<T extends Tensor>(x: T, a: T): T { assertNotComplex([x, a], 'prelu'); return this.broadcastedBinaryOp( x, a, x.dtype, (xValue, aValue) => xValue < 0 ? aValue * xValue : xValue) as T; } elu<T extends Tensor>(x: T): T { assertNotComplex(x, 'elu'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { const v = values[i]; if (v >= 0) { resultValues[i] = v; } else { resultValues[i] = (Math.exp(v) - 1); } } return this.makeOutput(resultValues, x.shape, 'float32'); } eluDer<T extends Tensor>(dy: T, y: T): T { assertNotComplex([dy, y], 'eluDer'); const resultValues = new Float32Array(y.size); const values = this.readSync(y.dataId) as TypedArray; const dyValues = this.readSync(dy.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { const v = values[i]; if (v >= 1) { resultValues[i] = dyValues[i]; } else { resultValues[i] = dyValues[i] * (v + 1); } } return this.makeOutput(resultValues, y.shape, 'float32'); } selu<T extends Tensor>(x: T): T { assertNotComplex(x, 'selu'); // Stable and Attracting Fixed Point (0, 1) for Normalized Weights. // see: https://arxiv.org/abs/1706.02515 const scaleAlpha = selu_util.SELU_SCALEALPHA; const scale = selu_util.SELU_SCALE; const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { const v = values[i]; if (v >= 0) { resultValues[i] = scale * v; } else { resultValues[i] = scaleAlpha * (Math.exp(v) - 1); } } return this.makeOutput(resultValues, x.shape, 'float32'); } clip<T extends Tensor>(x: T, min: number, max: number): T { assertNotComplex(x, 'clip'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { const v = values[i]; resultValues[i] = v > max ? max : (v < min ? min : v); } return this.makeOutput(resultValues, x.shape, 'float32'); } abs<T extends Tensor>(x: T): T { const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.abs(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } complexAbs<T extends Tensor>(x: T): T { const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < x.size; ++i) { const real = values[i * 2]; const imag = values[i * 2 + 1]; resultValues[i] = Math.hypot(real, imag); } return this.makeOutput(resultValues, x.shape, 'float32'); } int<T extends Tensor>(x: T): T { assertNotComplex(x, 'int'); const resultValues = new Int32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = values[i]; } return this.makeOutput(resultValues, x.shape, 'int32'); } sigmoid<T extends Tensor>(x: T): T { assertNotComplex(x, 'sigmoid'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = 1 / (1 + Math.exp(-values[i])); } return this.makeOutput(resultValues, x.shape, 'float32'); } softplus<T extends Tensor>(x: T): T { assertNotComplex(x, 'softplus'); // mirrors the implementation of tf.nn.softplus: https://goo.gl/vkcvwX // epsilon is the difference between 1.0 and the next representable float. // For a single precision 32 bit float this should be 2^-23, see: // https://math.byu.edu/~schow/work/IEEEFloatingPoint.htm const epsilon = 1.1920928955078125e-7; const threshold = Math.log(epsilon) + 2.0; const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { // Value above which exp(x) may overflow, but softplus(x) == x // is within machine epsilon. const tooLarge = values[i] > -threshold; // Value below which exp(x) may underflow, but softplus(x) == exp(x) // is within machine epsilon. const tooSmall = values[i] < threshold; const expX = Math.exp(values[i]); let result; if (tooSmall) { result = expX; } else if (tooLarge) { result = values[i]; } else { result = Math.log(1.0 + expX); } resultValues[i] = result; } return this.makeOutput(resultValues, x.shape, 'float32'); } sin<T extends Tensor>(x: T): T { assertNotComplex(x, 'sin'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.sin(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } cos<T extends Tensor>(x: T): T { assertNotComplex(x, 'cos'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.cos(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } tan<T extends Tensor>(x: T): T { assertNotComplex(x, 'tan'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.tan(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } asin<T extends Tensor>(x: T): T { assertNotComplex(x, 'asin'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.asin(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } acos<T extends Tensor>(x: T): T { assertNotComplex(x, 'acos'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.acos(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } atan<T extends Tensor>(x: T): T { assertNotComplex(x, 'atan'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.atan(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } atan2<T extends Tensor>(a: T, b: T): T { assertNotComplex([a, b], 'atan2'); return this.broadcastedBinaryOp( a, b, a.dtype, (aValue, bValue) => Math.atan2(aValue, bValue)) as T; } sinh<T extends Tensor>(x: T): T { assertNotComplex(x, 'sinh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.sinh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } cosh<T extends Tensor>(x: T): T { assertNotComplex(x, 'cosh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.cosh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } tanh<T extends Tensor>(x: T): T { assertNotComplex(x, 'tanh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = util.tanh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } asinh<T extends Tensor>(x: T): T { assertNotComplex(x, 'asinh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.asinh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } acosh<T extends Tensor>(x: T): T { assertNotComplex(x, 'acosh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.acosh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } atanh<T extends Tensor>(x: T): T { assertNotComplex(x, 'atanh'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { resultValues[i] = Math.atanh(values[i]); } return this.makeOutput(resultValues, x.shape, 'float32'); } erf<T extends Tensor>(x: T): T { assertNotComplex(x, 'erf'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; const p = erf_util.ERF_P; const a1 = erf_util.ERF_A1; const a2 = erf_util.ERF_A2; const a3 = erf_util.ERF_A3; const a4 = erf_util.ERF_A4; const a5 = erf_util.ERF_A5; for (let i = 0; i < values.length; ++i) { const sign = Math.sign(values[i]); const v = Math.abs(values[i]); const t = 1.0 / (1.0 + p * v); resultValues[i] = sign * (1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-v * v)); } return this.makeOutput(resultValues, x.shape, 'float32'); } step<T extends Tensor>(x: T, alpha = 0): T { assertNotComplex(x, 'step'); const resultValues = new Float32Array(x.size); const values = this.readSync(x.dataId) as TypedArray; for (let i = 0; i < values.length; ++i) { const value = values[i]; if (isNaN(value)) { resultValues[i] = NaN; } else { resultValues[i] = value > 0 ? 1 : alpha; } } return this.makeOutput(resultValues, x.shape, 'float32'); } fusedConv2d( {input, filter, convInfo, bias, activation, preluActivationWeights}: FusedConv2DConfig): Tensor4D { let result = this.conv2d(input, filter, convInfo); if (bias) { result = this.add(result, bias) as Tensor4D; } if (activation) { result = mapActivation(this, result, activation, preluActivationWeights) as Tensor4D; } return result; } conv2d(x: Tensor4D, filter: Tensor4D, convInfo: Conv2DInfo): Tensor4D { assertNotComplex([x, filter], 'conv2d'); const filterHeight = convInfo.filterHeight; const filterWidth = convInfo.filterWidth; const dilationHeight = convInfo.dilationHeight; const dilationWidth = convInfo.dilationWidth; const padLeft = convInfo.padInfo.left; const padTop = convInfo.padInfo.top; const isChannelsLast = convInfo.dataFormat === 'channelsLast'; const y = ops.buffer(convInfo.outShape, x.dtype as 'float32'); const xBatchStride = x.strides[0]; const xRowStride = isChannelsLast ? x.strides[1] : x.strides[2]; const xColStride = isChannelsLast ? x.strides[2] : 1; const xChannelStride = isChannelsLast ? 1 : x.strides[1]; const yBatchStride = y.strides[0]; const yRowStride = isChannelsLast ? y.strides[1] : y.strides[2]; const yColStride = isChannelsLast ? y.strides[2] : 1; const yChannelStride = isChannelsLast ? 1 : y.strides[1]; const xVals = this.readSync(x.dataId) as TypedArray; const wVals = this.readSync(filter.dataId) as TypedArray; const yVals = y.values; for (let b = 0; b < convInfo.batchSize; ++b) { const xOffset1 = b * xBatchStride; const yOffset1 = b * yBatchStride; for (let yR = 0; yR < convInfo.outHeight; ++yR) { const yOffset2 = yOffset1 + yR * yRowStride; const xRCorner = yR * convInfo.strideHeight - padTop; for (let wR = 0; wR < filterHeight; wR++) { const xR = xRCorner + wR * dilationHeight; if (xR < 0 || xR >= convInfo.inHeight) { continue; } const wOffset1 = wR * filter.strides[0]; const xOffset2 = xOffset1 + xR * xRowStride; for (let yC = 0; yC < convInfo.outWidth; ++yC) { const yOffset3 = yOffset2 + yC * yColStride; const xCCorner = yC * convInfo.strideWidth - padLeft; for (let wC = 0; wC < filterWidth; wC++) { const xC = xCCorner + wC * dilationWidth; if (xC < 0 || xC >= convInfo.inWidth) { continue; } const wOffset2 = wOffset1 + wC * filter.strides[1]; const xOffset3 = xOffset2 + xC * xColStride; let wOffset3 = wOffset2; for (let d1 = 0; d1 < convInfo.inChannels; ++d1) { const xVal = xVals[xOffset3 + d1 * xChannelStride]; for (let d2 = 0; d2 < convInfo.outChannels; ++d2) { yVals[yOffset3 + d2 * yChannelStride] += xVal * wVals[wOffset3 + d2]; } wOffset3 += convInfo.outChannels; } } } } } } return y.toTensor() as Tensor4D; } conv3d(x: Tensor5D, filter: Tensor5D, convIn