@tensorflow-models/body-pix
Version:
Pretrained BodyPix model in TensorFlow.js
932 lines (909 loc) • 102 kB
JavaScript
/**
* @license
* Copyright 2019 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('@tensorflow/tfjs-core'), require('@tensorflow/tfjs-converter')) :
typeof define === 'function' && define.amd ? define(['exports', '@tensorflow/tfjs-core', '@tensorflow/tfjs-converter'], factory) :
(factory((global.bodyPix = {}),global.tf,global.tf));
}(this, (function (exports,tf,tfconv) { 'use strict';
/*! *****************************************************************************
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of the
License at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions
and limitations under the License.
***************************************************************************** */
/* global Reflect, Promise */
var extendStatics = function(d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
function __extends(d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
}
var __assign = function() {
__assign = Object.assign || function __assign(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
function __awaiter(thisArg, _arguments, P, generator) {
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
}
function __generator(thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
}
function toFlattenedOneHotPartMap(partHeatmapScores) {
var numParts = partHeatmapScores.shape[2];
var partMapLocations = partHeatmapScores.argMax(2);
var partMapFlattened = partMapLocations.reshape([-1]);
return tf.oneHot(partMapFlattened, numParts);
}
function clipByMask2d(image, mask) {
return image.mul(mask);
}
function toMaskTensor(segmentScores, threshold) {
return tf.tidy(function () {
return segmentScores.greater(tf.scalar(threshold)).toInt();
});
}
function decodePartSegmentation(segmentationMask, partHeatmapScores) {
var _a = partHeatmapScores.shape, partMapHeight = _a[0], partMapWidth = _a[1], numParts = _a[2];
return tf.tidy(function () {
var flattenedMap = toFlattenedOneHotPartMap(partHeatmapScores);
var partNumbers = tf.range(0, numParts, 1, 'int32').expandDims(1);
var partMapFlattened = flattenedMap.matMul(partNumbers).toInt();
var partMap = partMapFlattened.reshape([partMapHeight, partMapWidth]);
var partMapShiftedUpForClipping = partMap.add(tf.scalar(1, 'int32'));
return clipByMask2d(partMapShiftedUpForClipping, segmentationMask)
.sub(tf.scalar(1, 'int32'));
});
}
function decodeOnlyPartSegmentation(partHeatmapScores) {
var _a = partHeatmapScores.shape, partMapHeight = _a[0], partMapWidth = _a[1], numParts = _a[2];
return tf.tidy(function () {
var flattenedMap = toFlattenedOneHotPartMap(partHeatmapScores);
var partNumbers = tf.range(0, numParts, 1, 'int32').expandDims(1);
var partMapFlattened = flattenedMap.matMul(partNumbers).toInt();
return partMapFlattened.reshape([partMapHeight, partMapWidth]);
});
}
var BaseModel = (function () {
function BaseModel(model, outputStride) {
this.model = model;
this.outputStride = outputStride;
var inputShape = this.model.inputs[0].shape;
tf.util.assert((inputShape[1] === -1) && (inputShape[2] === -1), function () { return "Input shape [" + inputShape[1] + ", " + inputShape[2] + "] " +
"must both be equal to or -1"; });
}
BaseModel.prototype.predict = function (input) {
var _this = this;
return tf.tidy(function () {
var asFloat = _this.preprocessInput(input.toFloat());
var asBatch = asFloat.expandDims(0);
var results = _this.model.predict(asBatch);
var results3d = results.map(function (y) { return y.squeeze([0]); });
var namedResults = _this.nameOutputResults(results3d);
return {
heatmapScores: namedResults.heatmap.sigmoid(),
offsets: namedResults.offsets,
displacementFwd: namedResults.displacementFwd,
displacementBwd: namedResults.displacementBwd,
segmentation: namedResults.segmentation,
partHeatmaps: namedResults.partHeatmaps,
longOffsets: namedResults.longOffsets,
partOffsets: namedResults.partOffsets
};
});
};
BaseModel.prototype.dispose = function () {
this.model.dispose();
};
return BaseModel;
}());
var MobileNet = (function (_super) {
__extends(MobileNet, _super);
function MobileNet() {
return _super !== null && _super.apply(this, arguments) || this;
}
MobileNet.prototype.preprocessInput = function (input) {
return tf.tidy(function () { return tf.div(input, 127.5).sub(1.0); });
};
MobileNet.prototype.nameOutputResults = function (results) {
var offsets = results[0], segmentation = results[1], partHeatmaps = results[2], longOffsets = results[3], heatmap = results[4], displacementFwd = results[5], displacementBwd = results[6], partOffsets = results[7];
return {
offsets: offsets,
segmentation: segmentation,
partHeatmaps: partHeatmaps,
longOffsets: longOffsets,
heatmap: heatmap,
displacementFwd: displacementFwd,
displacementBwd: displacementBwd,
partOffsets: partOffsets
};
};
return MobileNet;
}(BaseModel));
var PART_NAMES = [
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle'
];
var NUM_KEYPOINTS = PART_NAMES.length;
var PART_IDS = PART_NAMES.reduce(function (result, jointName, i) {
result[jointName] = i;
return result;
}, {});
var CONNECTED_PART_NAMES = [
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip']
];
var POSE_CHAIN = [
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
['rightKnee', 'rightAnkle']
];
var CONNECTED_PART_INDICES = CONNECTED_PART_NAMES.map(function (_a) {
var jointNameA = _a[0], jointNameB = _a[1];
return ([PART_IDS[jointNameA], PART_IDS[jointNameB]]);
});
function getScale(_a, _b, padding) {
var height = _a[0], width = _a[1];
var inputResolutionY = _b[0], inputResolutionX = _b[1];
var padT = padding.top, padB = padding.bottom, padL = padding.left, padR = padding.right;
var scaleY = inputResolutionY / (padT + padB + height);
var scaleX = inputResolutionX / (padL + padR + width);
return [scaleX, scaleY];
}
function getOffsetPoint(y, x, keypoint, offsets) {
return {
y: offsets.get(y, x, keypoint),
x: offsets.get(y, x, keypoint + NUM_KEYPOINTS)
};
}
function getImageCoords(part, outputStride, offsets) {
var heatmapY = part.heatmapY, heatmapX = part.heatmapX, keypoint = part.id;
var _a = getOffsetPoint(heatmapY, heatmapX, keypoint, offsets), y = _a.y, x = _a.x;
return {
x: part.heatmapX * outputStride + x,
y: part.heatmapY * outputStride + y
};
}
function clamp(a, min, max) {
if (a < min) {
return min;
}
if (a > max) {
return max;
}
return a;
}
function squaredDistance(y1, x1, y2, x2) {
var dy = y2 - y1;
var dx = x2 - x1;
return dy * dy + dx * dx;
}
function addVectors(a, b) {
return { x: a.x + b.x, y: a.y + b.y };
}
function computeDistance(embedding, pose, minPartScore) {
if (minPartScore === void 0) { minPartScore = 0.3; }
var distance = 0.0;
var numKpt = 0;
for (var p = 0; p < embedding.length; p++) {
if (pose.keypoints[p].score > minPartScore) {
numKpt += 1;
distance += Math.pow((embedding[p].x - pose.keypoints[p].position.x), 2) +
Math.pow((embedding[p].y - pose.keypoints[p].position.y), 2);
}
}
if (numKpt === 0) {
distance = Infinity;
}
else {
distance = distance / numKpt;
}
return distance;
}
function convertToPositionInOuput(position, _a, _b, stride) {
var padT = _a[0], padL = _a[1];
var scaleX = _b[0], scaleY = _b[1];
var y = Math.round(((padT + position.y + 1.0) * scaleY - 1.0) / stride);
var x = Math.round(((padL + position.x + 1.0) * scaleX - 1.0) / stride);
return { x: x, y: y };
}
function getEmbedding(location, keypointIndex, convertToPosition, outputResolutionX, longOffsets, refineSteps, _a) {
var height = _a[0], width = _a[1];
var newLocation = convertToPosition(location);
var nn = newLocation.y * outputResolutionX + newLocation.x;
var dy = longOffsets[NUM_KEYPOINTS * (2 * nn) + keypointIndex];
var dx = longOffsets[NUM_KEYPOINTS * (2 * nn + 1) + keypointIndex];
var y = location.y + dy;
var x = location.x + dx;
for (var t = 0; t < refineSteps; t++) {
y = Math.min(y, height - 1);
x = Math.min(x, width - 1);
var newPos = convertToPosition({ x: x, y: y });
var nn_1 = newPos.y * outputResolutionX + newPos.x;
dy = longOffsets[NUM_KEYPOINTS * (2 * nn_1) + keypointIndex];
dx = longOffsets[NUM_KEYPOINTS * (2 * nn_1 + 1) + keypointIndex];
y = y + dy;
x = x + dx;
}
return { x: x, y: y };
}
function matchEmbeddingToInstance(location, longOffsets, poses, numKptForMatching, _a, _b, outputResolutionX, _c, stride, refineSteps) {
var padT = _a[0], padL = _a[1];
var scaleX = _b[0], scaleY = _b[1];
var height = _c[0], width = _c[1];
var embed = [];
var convertToPosition = function (pair) {
return convertToPositionInOuput(pair, [padT, padL], [scaleX, scaleY], stride);
};
for (var keypointsIndex = 0; keypointsIndex < numKptForMatching; keypointsIndex++) {
var embedding = getEmbedding(location, keypointsIndex, convertToPosition, outputResolutionX, longOffsets, refineSteps, [height, width]);
embed.push(embedding);
}
var kMin = -1;
var kMinDist = Infinity;
for (var k = 0; k < poses.length; k++) {
var dist = computeDistance(embed, poses[k]);
if (dist < kMinDist) {
kMin = k;
kMinDist = dist;
}
}
return kMin;
}
function getOutputResolution(_a, stride) {
var inputResolutionY = _a[0], inputResolutionX = _a[1];
var outputResolutionX = Math.round((inputResolutionX - 1.0) / stride + 1.0);
var outputResolutionY = Math.round((inputResolutionY - 1.0) / stride + 1.0);
return [outputResolutionX, outputResolutionY];
}
function decodeMultipleMasksCPU(segmentation, longOffsets, posesAboveScore, height, width, stride, _a, padding, refineSteps, numKptForMatching) {
var inHeight = _a[0], inWidth = _a[1];
if (numKptForMatching === void 0) { numKptForMatching = 5; }
var dataArrays = posesAboveScore.map(function (x) { return new Uint8Array(height * width).fill(0); });
var padT = padding.top, padL = padding.left;
var _b = getScale([height, width], [inHeight, inWidth], padding), scaleX = _b[0], scaleY = _b[1];
var outputResolutionX = getOutputResolution([inHeight, inWidth], stride)[0];
for (var i = 0; i < height; i += 1) {
for (var j = 0; j < width; j += 1) {
var n = i * width + j;
var prob = segmentation[n];
if (prob === 1) {
var kMin = matchEmbeddingToInstance({ x: j, y: i }, longOffsets, posesAboveScore, numKptForMatching, [padT, padL], [scaleX, scaleY], outputResolutionX, [height, width], stride, refineSteps);
if (kMin >= 0) {
dataArrays[kMin][n] = 1;
}
}
}
}
return dataArrays;
}
function decodeMultiplePartMasksCPU(segmentation, longOffsets, partSegmentaion, posesAboveScore, height, width, stride, _a, padding, refineSteps, numKptForMatching) {
var inHeight = _a[0], inWidth = _a[1];
if (numKptForMatching === void 0) { numKptForMatching = 5; }
var dataArrays = posesAboveScore.map(function (x) { return new Int32Array(height * width).fill(-1); });
var padT = padding.top, padL = padding.left;
var _b = getScale([height, width], [inHeight, inWidth], padding), scaleX = _b[0], scaleY = _b[1];
var outputResolutionX = getOutputResolution([inHeight, inWidth], stride)[0];
for (var i = 0; i < height; i += 1) {
for (var j = 0; j < width; j += 1) {
var n = i * width + j;
var prob = segmentation[n];
if (prob === 1) {
var kMin = matchEmbeddingToInstance({ x: j, y: i }, longOffsets, posesAboveScore, numKptForMatching, [padT, padL], [scaleX, scaleY], outputResolutionX, [height, width], stride, refineSteps);
if (kMin >= 0) {
dataArrays[kMin][n] = partSegmentaion[n];
}
}
}
}
return dataArrays;
}
function decodeMultipleMasksWebGl(segmentation, longOffsets, posesAboveScore, height, width, stride, _a, padding, refineSteps, minKptScore, maxNumPeople) {
var inHeight = _a[0], inWidth = _a[1];
var _b = segmentation.shape, origHeight = _b[0], origWidth = _b[1];
var _c = longOffsets.shape.slice(0, 2), outHeight = _c[0], outWidth = _c[1];
var shapedLongOffsets = longOffsets.reshape([outHeight, outWidth, 2, NUM_KEYPOINTS]);
var poseVals = new Float32Array(maxNumPeople * NUM_KEYPOINTS * 3).fill(0.0);
for (var i = 0; i < posesAboveScore.length; i++) {
var poseOffset = i * NUM_KEYPOINTS * 3;
var pose = posesAboveScore[i];
for (var kp = 0; kp < NUM_KEYPOINTS; kp++) {
var keypoint = pose.keypoints[kp];
var offset = poseOffset + kp * 3;
poseVals[offset] = keypoint.score;
poseVals[offset + 1] = keypoint.position.y;
poseVals[offset + 2] = keypoint.position.x;
}
}
var _d = getScale([height, width], [inHeight, inWidth], padding), scaleX = _d[0], scaleY = _d[1];
var posesTensor = tf.tensor(poseVals, [maxNumPeople, NUM_KEYPOINTS, 3]);
var padT = padding.top, padL = padding.left;
var program = {
variableNames: ['segmentation', 'longOffsets', 'poses'],
outputShape: [origHeight, origWidth],
userCode: "\n int convertToPositionInOutput(int pos, int pad, float scale, int stride) {\n return round(((float(pos + pad) + 1.0) * scale - 1.0) / float(stride));\n }\n\n float convertToPositionInOutputFloat(\n int pos, int pad, float scale, int stride) {\n return ((float(pos + pad) + 1.0) * scale - 1.0) / float(stride);\n }\n\n float dist(float x1, float y1, float x2, float y2) {\n return pow(x1 - x2, 2.0) + pow(y1 - y2, 2.0);\n }\n\n float sampleLongOffsets(float h, float w, int d, int k) {\n float fh = fract(h);\n float fw = fract(w);\n int clH = int(ceil(h));\n int clW = int(ceil(w));\n int flH = int(floor(h));\n int flW = int(floor(w));\n float o11 = getLongOffsets(flH, flW, d, k);\n float o12 = getLongOffsets(flH, clW, d, k);\n float o21 = getLongOffsets(clH, flW, d, k);\n float o22 = getLongOffsets(clH, clW, d, k);\n float o1 = mix(o11, o12, fw);\n float o2 = mix(o21, o22, fw);\n return mix(o1, o2, fh);\n }\n\n int findNearestPose(int h, int w) {\n float prob = getSegmentation(h, w);\n if (prob < 1.0) {\n return -1;\n }\n\n // Done(Tyler): convert from output space h/w to strided space.\n float stridedH = convertToPositionInOutputFloat(\n h, " + padT + ", " + scaleY + ", " + stride + ");\n float stridedW = convertToPositionInOutputFloat(\n w, " + padL + ", " + scaleX + ", " + stride + ");\n\n float minDist = 1000000.0;\n int iMin = -1;\n for (int i = 0; i < " + maxNumPeople + "; i++) {\n float curDistSum = 0.0;\n int numKpt = 0;\n for (int k = 0; k < " + NUM_KEYPOINTS + "; k++) {\n float dy = sampleLongOffsets(stridedH, stridedW, 0, k);\n float dx = sampleLongOffsets(stridedH, stridedW, 1, k);\n\n float y = float(h) + dy;\n float x = float(w) + dx;\n\n for (int s = 0; s < " + refineSteps + "; s++) {\n int yRounded = round(min(y, float(" + (height - 1.0) + ")));\n int xRounded = round(min(x, float(" + (width - 1.0) + ")));\n\n float yStrided = convertToPositionInOutputFloat(\n yRounded, " + padT + ", " + scaleY + ", " + stride + ");\n float xStrided = convertToPositionInOutputFloat(\n xRounded, " + padL + ", " + scaleX + ", " + stride + ");\n\n float dy = sampleLongOffsets(yStrided, xStrided, 0, k);\n float dx = sampleLongOffsets(yStrided, xStrided, 1, k);\n\n y = y + dy;\n x = x + dx;\n }\n\n float poseScore = getPoses(i, k, 0);\n float poseY = getPoses(i, k, 1);\n float poseX = getPoses(i, k, 2);\n if (poseScore > " + minKptScore + ") {\n numKpt = numKpt + 1;\n curDistSum = curDistSum + dist(x, y, poseX, poseY);\n }\n }\n if (numKpt > 0 && curDistSum / float(numKpt) < minDist) {\n minDist = curDistSum / float(numKpt);\n iMin = i;\n }\n }\n return iMin;\n }\n\n void main() {\n ivec2 coords = getOutputCoords();\n int nearestPose = findNearestPose(coords[0], coords[1]);\n setOutput(float(nearestPose));\n }\n "
};
var webglBackend = tf.backend();
return webglBackend.compileAndRun(program, [segmentation, shapedLongOffsets, posesTensor]);
}
function toPersonKSegmentation(segmentation, k) {
return tf.tidy(function () { return segmentation.equal(tf.scalar(k)).toInt(); });
}
function toPersonKPartSegmentation(segmentation, bodyParts, k) {
return tf.tidy(function () { return segmentation.equal(tf.scalar(k))
.toInt()
.mul(bodyParts.add(1))
.sub(1); });
}
function isWebGlBackend() {
return tf.getBackend() === 'webgl';
}
function decodePersonInstanceMasks(segmentation, longOffsets, poses, height, width, stride, _a, padding, minPoseScore, refineSteps, minKeypointScore, maxNumPeople) {
var inHeight = _a[0], inWidth = _a[1];
if (minPoseScore === void 0) { minPoseScore = 0.2; }
if (refineSteps === void 0) { refineSteps = 8; }
if (minKeypointScore === void 0) { minKeypointScore = 0.3; }
if (maxNumPeople === void 0) { maxNumPeople = 10; }
return __awaiter(this, void 0, void 0, function () {
var posesAboveScore, personSegmentationsData, personSegmentations, segmentationsData, longOffsetsData;
return __generator(this, function (_b) {
switch (_b.label) {
case 0:
posesAboveScore = poses.filter(function (pose) { return pose.score >= minPoseScore; });
if (!isWebGlBackend()) return [3, 2];
personSegmentations = tf.tidy(function () {
var masksTensor = decodeMultipleMasksWebGl(segmentation, longOffsets, posesAboveScore, height, width, stride, [inHeight, inWidth], padding, refineSteps, minKeypointScore, maxNumPeople);
return posesAboveScore.map(function (_, k) { return toPersonKSegmentation(masksTensor, k); });
});
return [4, Promise.all(personSegmentations.map(function (mask) { return mask.data(); }))];
case 1:
personSegmentationsData =
(_b.sent());
personSegmentations.forEach(function (x) { return x.dispose(); });
return [3, 5];
case 2: return [4, segmentation.data()];
case 3:
segmentationsData = _b.sent();
return [4, longOffsets.data()];
case 4:
longOffsetsData = _b.sent();
personSegmentationsData = decodeMultipleMasksCPU(segmentationsData, longOffsetsData, posesAboveScore, height, width, stride, [inHeight, inWidth], padding, refineSteps);
_b.label = 5;
case 5: return [2, personSegmentationsData.map(function (data, i) { return ({ data: data, pose: posesAboveScore[i], width: width, height: height }); })];
}
});
});
}
function decodePersonInstancePartMasks(segmentation, longOffsets, partSegmentation, poses, height, width, stride, _a, padding, minPoseScore, refineSteps, minKeypointScore, maxNumPeople) {
var inHeight = _a[0], inWidth = _a[1];
if (minPoseScore === void 0) { minPoseScore = 0.2; }
if (refineSteps === void 0) { refineSteps = 8; }
if (minKeypointScore === void 0) { minKeypointScore = 0.3; }
if (maxNumPeople === void 0) { maxNumPeople = 10; }
return __awaiter(this, void 0, void 0, function () {
var posesAboveScore, partSegmentationsByPersonData, partSegmentations, segmentationsData, longOffsetsData, partSegmentaionData;
return __generator(this, function (_b) {
switch (_b.label) {
case 0:
posesAboveScore = poses.filter(function (pose) { return pose.score >= minPoseScore; });
if (!isWebGlBackend()) return [3, 2];
partSegmentations = tf.tidy(function () {
var masksTensor = decodeMultipleMasksWebGl(segmentation, longOffsets, posesAboveScore, height, width, stride, [inHeight, inWidth], padding, refineSteps, minKeypointScore, maxNumPeople);
return posesAboveScore.map(function (_, k) {
return toPersonKPartSegmentation(masksTensor, partSegmentation, k);
});
});
return [4, Promise.all(partSegmentations.map(function (x) { return x.data(); }))];
case 1:
partSegmentationsByPersonData =
(_b.sent());
partSegmentations.forEach(function (x) { return x.dispose(); });
return [3, 6];
case 2: return [4, segmentation.data()];
case 3:
segmentationsData = _b.sent();
return [4, longOffsets.data()];
case 4:
longOffsetsData = _b.sent();
return [4, partSegmentation.data()];
case 5:
partSegmentaionData = _b.sent();
partSegmentationsByPersonData = decodeMultiplePartMasksCPU(segmentationsData, longOffsetsData, partSegmentaionData, posesAboveScore, height, width, stride, [inHeight, inWidth], padding, refineSteps);
_b.label = 6;
case 6: return [2, partSegmentationsByPersonData.map(function (data, k) { return ({ pose: posesAboveScore[k], data: data, height: height, width: width }); })];
}
});
});
}
function half(k) {
return Math.floor(k / 2);
}
var MaxHeap = (function () {
function MaxHeap(maxSize, getElementValue) {
this.priorityQueue = new Array(maxSize);
this.numberOfElements = -1;
this.getElementValue = getElementValue;
}
MaxHeap.prototype.enqueue = function (x) {
this.priorityQueue[++this.numberOfElements] = x;
this.swim(this.numberOfElements);
};
MaxHeap.prototype.dequeue = function () {
var max = this.priorityQueue[0];
this.exchange(0, this.numberOfElements--);
this.sink(0);
this.priorityQueue[this.numberOfElements + 1] = null;
return max;
};
MaxHeap.prototype.empty = function () {
return this.numberOfElements === -1;
};
MaxHeap.prototype.size = function () {
return this.numberOfElements + 1;
};
MaxHeap.prototype.all = function () {
return this.priorityQueue.slice(0, this.numberOfElements + 1);
};
MaxHeap.prototype.max = function () {
return this.priorityQueue[0];
};
MaxHeap.prototype.swim = function (k) {
while (k > 0 && this.less(half(k), k)) {
this.exchange(k, half(k));
k = half(k);
}
};
MaxHeap.prototype.sink = function (k) {
while (2 * k <= this.numberOfElements) {
var j = 2 * k;
if (j < this.numberOfElements && this.less(j, j + 1)) {
j++;
}
if (!this.less(k, j)) {
break;
}
this.exchange(k, j);
k = j;
}
};
MaxHeap.prototype.getValueAt = function (i) {
return this.getElementValue(this.priorityQueue[i]);
};
MaxHeap.prototype.less = function (i, j) {
return this.getValueAt(i) < this.getValueAt(j);
};
MaxHeap.prototype.exchange = function (i, j) {
var t = this.priorityQueue[i];
this.priorityQueue[i] = this.priorityQueue[j];
this.priorityQueue[j] = t;
};
return MaxHeap;
}());
function scoreIsMaximumInLocalWindow(keypointId, score, heatmapY, heatmapX, localMaximumRadius, scores) {
var _a = scores.shape, height = _a[0], width = _a[1];
var localMaximum = true;
var yStart = Math.max(heatmapY - localMaximumRadius, 0);
var yEnd = Math.min(heatmapY + localMaximumRadius + 1, height);
for (var yCurrent = yStart; yCurrent < yEnd; ++yCurrent) {
var xStart = Math.max(heatmapX - localMaximumRadius, 0);
var xEnd = Math.min(heatmapX + localMaximumRadius + 1, width);
for (var xCurrent = xStart; xCurrent < xEnd; ++xCurrent) {
if (scores.get(yCurrent, xCurrent, keypointId) > score) {
localMaximum = false;
break;
}
}
if (!localMaximum) {
break;
}
}
return localMaximum;
}
function buildPartWithScoreQueue(scoreThreshold, localMaximumRadius, scores) {
var _a = scores.shape, height = _a[0], width = _a[1], numKeypoints = _a[2];
var queue = new MaxHeap(height * width * numKeypoints, function (_a) {
var score = _a.score;
return score;
});
for (var heatmapY = 0; heatmapY < height; ++heatmapY) {
for (var heatmapX = 0; heatmapX < width; ++heatmapX) {
for (var keypointId = 0; keypointId < numKeypoints; ++keypointId) {
var score = scores.get(heatmapY, heatmapX, keypointId);
if (score < scoreThreshold) {
continue;
}
if (scoreIsMaximumInLocalWindow(keypointId, score, heatmapY, heatmapX, localMaximumRadius, scores)) {
queue.enqueue({ score: score, part: { heatmapY: heatmapY, heatmapX: heatmapX, id: keypointId } });
}
}
}
}
return queue;
}
var parentChildrenTuples = POSE_CHAIN.map(function (_a) {
var parentJoinName = _a[0], childJoinName = _a[1];
return ([PART_IDS[parentJoinName], PART_IDS[childJoinName]]);
});
var parentToChildEdges = parentChildrenTuples.map(function (_a) {
var childJointId = _a[1];
return childJointId;
});
var childToParentEdges = parentChildrenTuples.map(function (_a) {
var parentJointId = _a[0];
return parentJointId;
});
function getDisplacement(edgeId, point, displacements) {
var numEdges = displacements.shape[2] / 2;
return {
y: displacements.get(point.y, point.x, edgeId),
x: displacements.get(point.y, point.x, numEdges + edgeId)
};
}
function getStridedIndexNearPoint(point, outputStride, height, width) {
return {
y: clamp(Math.round(point.y / outputStride), 0, height - 1),
x: clamp(Math.round(point.x / outputStride), 0, width - 1)
};
}
function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scoresBuffer, offsets, outputStride, displacements, offsetRefineStep) {
if (offsetRefineStep === void 0) { offsetRefineStep = 2; }
var _a = scoresBuffer.shape, height = _a[0], width = _a[1];
var sourceKeypointIndices = getStridedIndexNearPoint(sourceKeypoint.position, outputStride, height, width);
var displacement = getDisplacement(edgeId, sourceKeypointIndices, displacements);
var displacedPoint = addVectors(sourceKeypoint.position, displacement);
var targetKeypoint = displacedPoint;
for (var i = 0; i < offsetRefineStep; i++) {
var targetKeypointIndices = getStridedIndexNearPoint(targetKeypoint, outputStride, height, width);
var offsetPoint = getOffsetPoint(targetKeypointIndices.y, targetKeypointIndices.x, targetKeypointId, offsets);
targetKeypoint = addVectors({
x: targetKeypointIndices.x * outputStride,
y: targetKeypointIndices.y * outputStride
}, { x: offsetPoint.x, y: offsetPoint.y });
}
var targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, outputStride, height, width);
var score = scoresBuffer.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetKeypointId);
return { position: targetKeypoint, part: PART_NAMES[targetKeypointId], score: score };
}
function decodePose(root, scores, offsets, outputStride, displacementsFwd, displacementsBwd) {
var numParts = scores.shape[2];
var numEdges = parentToChildEdges.length;
var instanceKeypoints = new Array(numParts);
var rootPart = root.part, rootScore = root.score;
var rootPoint = getImageCoords(rootPart, outputStride, offsets);
instanceKeypoints[rootPart.id] = {
score: rootScore,
part: PART_NAMES[rootPart.id],
position: rootPoint
};
for (var edge = numEdges - 1; edge >= 0; --edge) {
var sourceKeypointId = parentToChildEdges[edge];
var targetKeypointId = childToParentEdges[edge];
if (instanceKeypoints[sourceKeypointId] &&
!instanceKeypoints[targetKeypointId]) {
instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, outputStride, displacementsBwd);
}
}
for (var edge = 0; edge < numEdges; ++edge) {
var sourceKeypointId = childToParentEdges[edge];
var targetKeypointId = parentToChildEdges[edge];
if (instanceKeypoints[sourceKeypointId] &&
!instanceKeypoints[targetKeypointId]) {
instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, outputStride, displacementsFwd);
}
}
return instanceKeypoints;
}
function withinNmsRadiusOfCorrespondingPoint(poses, squaredNmsRadius, _a, keypointId) {
var x = _a.x, y = _a.y;
return poses.some(function (_a) {
var keypoints = _a.keypoints;
var correspondingKeypoint = keypoints[keypointId].position;
return squaredDistance(y, x, correspondingKeypoint.y, correspondingKeypoint.x) <=
squaredNmsRadius;
});
}
function getInstanceScore(existingPoses, squaredNmsRadius, instanceKeypoints) {
var notOverlappedKeypointScores = instanceKeypoints.reduce(function (result, _a, keypointId) {
var position = _a.position, score = _a.score;
if (!withinNmsRadiusOfCorrespondingPoint(existingPoses, squaredNmsRadius, position, keypointId)) {
result += score;
}
return result;
}, 0.0);
return notOverlappedKeypointScores /= instanceKeypoints.length;
}
var kLocalMaximumRadius = 1;
function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, maxPoseDetections, scoreThreshold, nmsRadius) {
if (scoreThreshold === void 0) { scoreThreshold = 0.5; }
if (nmsRadius === void 0) { nmsRadius = 20; }
var poses = [];
var queue = buildPartWithScoreQueue(scoreThreshold, kLocalMaximumRadius, scoresBuffer);
var squaredNmsRadius = nmsRadius * nmsRadius;
while (poses.length < maxPoseDetections && !queue.empty()) {
var root = queue.dequeue();
var rootImageCoords = getImageCoords(root.part, outputStride, offsetsBuffer);
if (withinNmsRadiusOfCorrespondingPoint(poses, squaredNmsRadius, rootImageCoords, root.part.id)) {
continue;
}
var keypoints = decodePose(root, scoresBuffer, offsetsBuffer, outputStride, displacementsFwdBuffer, displacementsBwdBuffer);
var score = getInstanceScore(poses, squaredNmsRadius, keypoints);
poses.push({ keypoints: keypoints, score: score });
}
return poses;
}
var imageNetMean = [-123.15, -115.90, -103.06];
var ResNet = (function (_super) {
__extends(ResNet, _super);
function ResNet() {
return _super !== null && _super.apply(this, arguments) || this;
}
ResNet.prototype.preprocessInput = function (input) {
return input.add(imageNetMean);
};
ResNet.prototype.nameOutputResults = function (results) {
var displacementBwd = results[0], displacementFwd = results[1], heatmap = results[2], longOffsets = results[3], offsets = results[4], partHeatmaps = results[5], segmentation = results[6], partOffsets = results[7];
return {
offsets: offsets,
segmentation: segmentation,
partHeatmaps: partHeatmaps,
longOffsets: longOffsets,
heatmap: heatmap,
displacementFwd: displacementFwd,
displacementBwd: displacementBwd,
partOffsets: partOffsets
};
};
return ResNet;
}(BaseModel));
var RESNET50_BASE_URL = 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/';
var MOBILENET_BASE_URL = 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/';
function resNet50SavedModel(stride, quantBytes) {
var graphJson = "model-stride" + stride + ".json";
if (quantBytes === 4) {
return RESNET50_BASE_URL + "float/" + graphJson;
}
else {
return RESNET50_BASE_URL + ("quant" + quantBytes + "/") + graphJson;
}
}
function mobileNetSavedModel(stride, multiplier, quantBytes) {
var toStr = { 1.0: '100', 0.75: '075', 0.50: '050' };
var graphJson = "model-stride" + stride + ".json";
if (quantBytes === 4) {
return MOBILENET_BASE_URL + ("float/" + toStr[multiplier] + "/") + graphJson;
}
else {
return MOBILENET_BASE_URL + ("quant" + quantBytes + "/" + toStr[multiplier] + "/") +
graphJson;
}
}
var _a;
function getSizeFromImageLikeElement(input) {
if (input.offsetHeight !== 0 && input.offsetWidth !== 0) {
return [input.offsetHeight, input.offsetWidth];
}
else if (input.height != null && input.width != null) {
return [input.height, input.width];
}
else {
throw new Error("HTMLImageElement must have height and width attributes set.");
}
}
function getSizeFromVideoElement(input) {
if (input.height != null && input.width != null) {
return [input.height, input.width];
}
else {
return [input.videoHeight, input.videoWidth];
}
}
function getInputSize(input) {
if ((typeof (HTMLCanvasElement) !== 'undefined' &&
input instanceof HTMLCanvasElement) ||
(typeof (HTMLImageElement) !== 'undefined' &&
input instanceof HTMLImageElement)) {
return getSizeFromImageLikeElement(input);
}
else if (typeof (ImageData) !== 'undefined' && input instanceof ImageData) {
return [input.height, input.width];
}
else if (typeof (HTMLVideoElement) !== 'undefined' &&
input instanceof HTMLVideoElement) {
return getSizeFromVideoElement(input);
}
else if (input instanceof tf.Tensor) {
return [input.shape[0], input.shape[1]];
}
else {
throw new Error("error: Unknown input type: " + input + ".");
}
}
function isValidInputResolution(resolution, outputStride) {
return (resolution - 1) % outputStride === 0;
}
function toValidInputResolution(inputResolution, outputStride) {
if (isValidInputResolution(inputResolution, outputStride)) {
return inputResolution;
}
return Math.floor(inputResolution / outputStride) * outputStride + 1;
}
var INTERNAL_RESOLUTION_STRING_OPTIONS = {
low: 'low',
medium: 'medium',
high: 'high',
full: 'full'
};
var INTERNAL_RESOLUTION_PERCENTAGES = (_a = {},
_a[INTERNAL_RESOLUTION_STRING_OPTIONS.low] = 0.25,
_a[INTERNAL_RESOLUTION_STRING_OPTIONS.medium] = 0.5,
_a[INTERNAL_RESOLUTION_STRING_OPTIONS.high] = 0.75,
_a[INTERNAL_RESOLUTION_STRING_OPTIONS.full] = 1.0,
_a);
var MIN_INTERNAL_RESOLUTION = 0.1;
var MAX_INTERNAL_RESOLUTION = 2.0;
function toInternalResolutionPercentage(internalResolution) {
if (typeof internalResolution === 'string') {
var result = INTERNAL_RESOLUTION_PERCENTAGES[internalResolution];
tf.util.assert(typeof result === 'number', function () { return "string value of inputResolution must be one of " + Object.values(INTERNAL_RESOLUTION_STRING_OPTIONS)
.join(',') + " but was " + internalResolution + "."; });
return result;
}
else {
tf.util.assert(typeof internalResolution === 'number' &&
internalResolution <= MAX_INTERNAL_RESOLUTION &&
internalResolution >= MIN_INTERNAL_RESOLUTION, function () {
return "inputResolution must be a string or number between 0 and 4, but " +
("was " + internalResolution);
});
return internalResolution;
}
}
function toInputResolutionHeightAndWidth(internalResolution, outputStride, _a) {
var inputHeight = _a[0], inputWidth = _a[1];
var internalResolutionPercentage = toInternalResolutionPercentage(internalResolution);
return [
toValidInputResolution(inputHeight * internalResolutionPercentage, outputStride),
toValidInputResolution(inputWidth * internalResolutionPercentage, outputStride)
];
}
function toInputTensor(input) {
return input instanceof tf.Tensor ? input : tf.browser.fromPixels(input);
}
function resizeAndPadTo(imageTensor, _a, flipHorizontal) {
var targetH = _a[0], targetW = _a[1];
if (flipHorizontal === void 0) { flipHorizontal = false; }
var _b = imageTensor.shape, height = _b[0], width = _b[1];
var targetAspect = targetW / targetH;
var aspect = width / height;
var resizeW;
var resizeH;
var padL;
var padR;
var padT;
var padB;
if (aspect > targetAspect) {
resizeW = targetW;
resizeH = Math.ceil(resizeW / aspect);
var padHeight = targetH - resizeH;
padL = 0;
padR = 0;
padT = Math.floor(padHeight / 2);
padB = targetH - (resizeH + padT);
}
else {
resizeH = targetH;
resizeW = Math.ceil(targetH * aspect);
var padWidth = targetW - resizeW;
padL = Math.floor(padWidth / 2);
padR = targetW - (resizeW + padL);
padT = 0;
padB = 0;
}
var resizedAndPadded = tf.tidy(function () {
var resized;
if (flipHorizontal) {
resized = imageTensor.reverse(1).resizeBilinear([resizeH, resizeW]);
}
else {
resized = imageTensor.resizeBilinear([resizeH, resizeW]);
}
var padded = tf.pad3d(resized, [[padT, padB], [padL, padR], [0, 0]]);
return padded;
});
return { resizedAndPadded: resizedAndPadded, paddedBy: [[padT, padB], [padL, padR]] };
}
function scaleAndCropToInputTensorShape(tensor, _a, _b, _c, applySigmoidActivation) {
var inputTensorHeight = _a[0], inputTensorWidth = _a[1];
var resizedAndPaddedHeight = _b[0], resizedAndPaddedWidth = _b[1];
var _d = _c[0], padT = _d[0], padB = _d[1], _e = _c[1], padL = _e[0], padR = _e[1];
if (applySigmoidActivation === void 0) { applySigmoidActivation = false; }
return tf.tidy(function () {
var inResizedAndPadded = tensor.resizeBilinear([resizedAndPaddedHeight, resizedAndPaddedWidth], true);
if (applySigmoidActivation) {
inResizedAndPadded = inResizedAndPadded.sigmoid();
}
return removePaddingAndResizeBack(inResizedAndPadded, [inputTensorHeight, inputTensorWidth], [[padT, padB], [padL, padR]]);
});
}
function removePaddingAndResizeBack(resizedAndPadded, _a, _b) {
var originalHeight = _a[0], originalWidth = _a[1];
var _c = _b[0], padT = _c[0], padB = _c[1], _d = _b[1], padL = _d[0], padR = _d[1];
return tf.tidy(function () {
return tf.image
.cropAndResize(resizedAndPadded.expandDims(), [[
padT / (originalHeight + padT + padB - 1.0),
padL / (originalWidth + padL + padR - 1.0),
(padT + originalHeight - 1.0) /
(originalHeight + padT + padB - 1.0),
(padL + originalWidth - 1.0) / (originalWidth + padL + padR - 1.0)
]], [0], [originalHeight, originalWidth])
.squeeze([0]