UNPKG

@tensorflow-models/body-pix

Version:

Pretrained BodyPix model in TensorFlow.js

github.com/tensorflow/tfjs-models

tensorflow/tfjs-models

725 lines • 43.8 kB

JavaScript

"use strict"; /** * @license * Copyright 2019 Google Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ var __assign = (this && this.__assign) || function () { __assign = Object.assign || function(t) { for (var s, i = 1, n = arguments.length; i < n; i++) { s = arguments[i]; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p]; } return t; }; return __assign.apply(this, arguments); }; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (g && (g = 0, op[0] && (_ = 0)), _) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", { value: true }); exports.load = exports.BodyPix = exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG = exports.PERSON_INFERENCE_CONFIG = void 0; var tfconv = require("@tensorflow/tfjs-converter"); var tf = require("@tensorflow/tfjs-core"); var decode_part_map_1 = require("./decode_part_map"); var mobilenet_1 = require("./mobilenet"); var decode_instance_masks_1 = require("./multi_person/decode_instance_masks"); var decode_multiple_poses_1 = require("./multi_person/decode_multiple_poses"); var resnet_1 = require("./resnet"); var saved_models_1 = require("./saved_models"); var util_1 = require("./util"); var APPLY_SIGMOID_ACTIVATION = true; var FLIP_POSES_AFTER_SCALING = false; // The default configuration for loading MobileNetV1 based BodyPix. // // (And for references, the default configuration for loading ResNet // based PoseNet is also included). // // ``` // const RESNET_CONFIG = { // architecture: 'ResNet50', // outputStride: 32, // quantBytes: 4, // } as ModelConfig; // ``` var MOBILENET_V1_CONFIG = { architecture: 'MobileNetV1', outputStride: 16, quantBytes: 4, multiplier: 0.75, }; var VALID_ARCHITECTURE = ['MobileNetV1', 'ResNet50']; var VALID_STRIDE = { 'MobileNetV1': [8, 16, 32], 'ResNet50': [32, 16] }; var VALID_MULTIPLIER = { 'MobileNetV1': [0.50, 0.75, 1.0], 'ResNet50': [1.0] }; var VALID_QUANT_BYTES = [1, 2, 4]; function validateModelConfig(config) { config = config || MOBILENET_V1_CONFIG; if (config.architecture == null) { config.architecture = 'MobileNetV1'; } if (VALID_ARCHITECTURE.indexOf(config.architecture) < 0) { throw new Error("Invalid architecture ".concat(config.architecture, ". ") + "Should be one of ".concat(VALID_ARCHITECTURE)); } if (config.outputStride == null) { config.outputStride = 16; } if (VALID_STRIDE[config.architecture].indexOf(config.outputStride) < 0) { throw new Error("Invalid outputStride ".concat(config.outputStride, ". ") + "Should be one of ".concat(VALID_STRIDE[config.architecture], " ") + "for architecture ".concat(config.architecture, ".")); } if (config.multiplier == null) { config.multiplier = 1.0; } if (VALID_MULTIPLIER[config.architecture].indexOf(config.multiplier) < 0) { throw new Error("Invalid multiplier ".concat(config.multiplier, ". ") + "Should be one of ".concat(VALID_MULTIPLIER[config.architecture], " ") + "for architecture ".concat(config.architecture, ".")); } if (config.quantBytes == null) { config.quantBytes = 4; } if (VALID_QUANT_BYTES.indexOf(config.quantBytes) < 0) { throw new Error("Invalid quantBytes ".concat(config.quantBytes, ". ") + "Should be one of ".concat(VALID_QUANT_BYTES, " ") + "for architecture ".concat(config.architecture, ".")); } return config; } exports.PERSON_INFERENCE_CONFIG = { flipHorizontal: false, internalResolution: 'medium', segmentationThreshold: 0.7, maxDetections: 10, scoreThreshold: 0.4, nmsRadius: 20, }; exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG = { flipHorizontal: false, internalResolution: 'medium', segmentationThreshold: 0.7, maxDetections: 10, scoreThreshold: 0.4, nmsRadius: 20, minKeypointScore: 0.3, refineSteps: 10 }; function validatePersonInferenceConfig(config) { var segmentationThreshold = config.segmentationThreshold, maxDetections = config.maxDetections, scoreThreshold = config.scoreThreshold, nmsRadius = config.nmsRadius; if (segmentationThreshold < 0.0 || segmentationThreshold > 1.0) { throw new Error("segmentationThreshold ".concat(segmentationThreshold, ". ") + "Should be in range [0.0, 1.0]"); } if (maxDetections <= 0) { throw new Error("Invalid maxDetections ".concat(maxDetections, ". ") + "Should be > 0"); } if (scoreThreshold < 0.0 || scoreThreshold > 1.0) { throw new Error("Invalid scoreThreshold ".concat(scoreThreshold, ". ") + "Should be in range [0.0, 1.0]"); } if (nmsRadius <= 0) { throw new Error("Invalid nmsRadius ".concat(nmsRadius, ".")); } } function validateMultiPersonInstanceInferenceConfig(config) { var segmentationThreshold = config.segmentationThreshold, maxDetections = config.maxDetections, scoreThreshold = config.scoreThreshold, nmsRadius = config.nmsRadius, minKeypointScore = config.minKeypointScore, refineSteps = config.refineSteps; if (segmentationThreshold < 0.0 || segmentationThreshold > 1.0) { throw new Error("segmentationThreshold ".concat(segmentationThreshold, ". ") + "Should be in range [0.0, 1.0]"); } if (maxDetections <= 0) { throw new Error("Invalid maxDetections ".concat(maxDetections, ". ") + "Should be > 0"); } if (scoreThreshold < 0.0 || scoreThreshold > 1.0) { throw new Error("Invalid scoreThreshold ".concat(scoreThreshold, ". ") + "Should be in range [0.0, 1.0]"); } if (nmsRadius <= 0) { throw new Error("Invalid nmsRadius ".concat(nmsRadius, ".")); } if (minKeypointScore < 0 || minKeypointScore > 1) { throw new Error("Invalid minKeypointScore ".concat(minKeypointScore, ".") + "Should be in range [0.0, 1.0]"); } if (refineSteps <= 0 || refineSteps > 20) { throw new Error("Invalid refineSteps ".concat(refineSteps, ".") + "Should be in range [1, 20]"); } } var BodyPix = /** @class */ (function () { function BodyPix(net) { this.baseModel = net; } BodyPix.prototype.predictForPersonSegmentation = function (input) { var _a = this.baseModel.predict(input), segmentation = _a.segmentation, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd; return { segmentLogits: segmentation, heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, }; }; BodyPix.prototype.predictForPersonSegmentationAndPart = function (input) { var _a = this.baseModel.predict(input), segmentation = _a.segmentation, partHeatmaps = _a.partHeatmaps, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd; return { segmentLogits: segmentation, partHeatmapLogits: partHeatmaps, heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, }; }; BodyPix.prototype.predictForMultiPersonInstanceSegmentationAndPart = function (input) { var _a = this.baseModel.predict(input), segmentation = _a.segmentation, longOffsets = _a.longOffsets, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd, partHeatmaps = _a.partHeatmaps; return { segmentLogits: segmentation, longOffsets: longOffsets, heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, partHeatmaps: partHeatmaps }; }; /** * Given an image with people, returns a dictionary of all intermediate * tensors including: 1) a binary array with 1 for the pixels that are part of * the person, and 0 otherwise, 2) heatmapScores, 3) offsets, and 4) paddings. * * @param input ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) * The input image to feed through the network. * * @param internalResolution Defaults to 'medium'. The internal resolution * that the input is resized to before inference. The larger the * internalResolution the more accurate the model at the cost of slower * prediction times. Available values are 'low', 'medium', 'high', 'full', or * a percentage value between 0 and 1. The values 'low', 'medium', 'high', and * 'full' map to 0.25, 0.5, 0.75, and 1.0 correspondingly. * * @param segmentationThreshold The minimum that segmentation values must have * to be considered part of the person. Affects the generation of the * segmentation mask. * * @return A dictionary containing `segmentation`, `heatmapScores`, `offsets`, * and `padding`: * - `segmentation`: A 2d Tensor with 1 for the pixels that are part of the * person, and 0 otherwise. The width and height correspond to the same * dimensions of the input image. * - `heatmapScores`: A 3d Tensor of the keypoint heatmaps used by * pose estimation decoding. * - `offsets`: A 3d Tensor of the keypoint offsets used by pose * estimation decoding. * - `displacementFwd`: A 3d Tensor of the keypoint forward displacement used * by pose estimation decoding. * - `displacementBwd`: A 3d Tensor of the keypoint backward displacement used * by pose estimation decoding. * - `padding`: The padding (unit pixels) being applied to the input image * before it is fed into the model. */ BodyPix.prototype.segmentPersonActivation = function (input, internalResolution, segmentationThreshold) { var _this = this; if (segmentationThreshold === void 0) { segmentationThreshold = 0.5; } var _a = (0, util_1.getInputSize)(input), height = _a[0], width = _a[1]; var internalResolutionHeightAndWidth = (0, util_1.toInputResolutionHeightAndWidth)(internalResolution, this.baseModel.outputStride, [height, width]); var _b = (0, util_1.padAndResizeTo)(input, internalResolutionHeightAndWidth), resized = _b.resized, padding = _b.padding; var _c = tf.tidy(function () { var _a = _this.predictForPersonSegmentation(resized), segmentLogits = _a.segmentLogits, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd; var _b = resized.shape, resizedHeight = _b[0], resizedWidth = _b[1]; var scaledSegmentScores = (0, util_1.scaleAndCropToInputTensorShape)(segmentLogits, [height, width], [resizedHeight, resizedWidth], [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); return { segmentation: (0, decode_part_map_1.toMaskTensor)(tf.squeeze(scaledSegmentScores), segmentationThreshold), heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, }; }), segmentation = _c.segmentation, heatmapScores = _c.heatmapScores, offsets = _c.offsets, displacementFwd = _c.displacementFwd, displacementBwd = _c.displacementBwd; resized.dispose(); return { segmentation: segmentation, heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, padding: padding, internalResolutionHeightAndWidth: internalResolutionHeightAndWidth }; }; /** * Given an image with many people, returns a PersonSegmentation dictionary * that contains the segmentation mask for all people and a single pose. * * Note: The segmentation mask returned by this method covers all people but * the pose works well for one person. If you want to estimate instance-level * multiple person segmentation & pose for each person, use * `segmentMultiPerson` instead. * * @param input ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) * The input image to feed through the network. * * @param config PersonInferenceConfig object that contains * parameters for the BodyPix inference using person decoding. * * @return A SemanticPersonSegmentation dictionary that contains height, * width, the flattened binary segmentation mask and the poses for all people. * The width and height correspond to the same dimensions of the input image. * - `height`: The height of the segmentation data in pixel unit. * - `width`: The width of the segmentation data in pixel unit. * - `data`: The flattened Uint8Array of segmentation data. 1 means the pixel * belongs to a person and 0 means the pixel doesn't belong to a person. The * size of the array is equal to `height` x `width` in row-major order. * - `allPoses`: The 2d poses of all people. */ BodyPix.prototype.segmentPerson = function (input, config) { if (config === void 0) { config = exports.PERSON_INFERENCE_CONFIG; } return __awaiter(this, void 0, void 0, function () { var _a, segmentation, heatmapScores, offsets, displacementFwd, displacementBwd, padding, internalResolutionHeightAndWidth, _b, height, width, result, tensorBuffers, scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, poses; return __generator(this, function (_c) { switch (_c.label) { case 0: config = __assign(__assign({}, exports.PERSON_INFERENCE_CONFIG), config); validatePersonInferenceConfig(config); _a = this.segmentPersonActivation(input, config.internalResolution, config.segmentationThreshold), segmentation = _a.segmentation, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd, padding = _a.padding, internalResolutionHeightAndWidth = _a.internalResolutionHeightAndWidth; _b = segmentation.shape, height = _b[0], width = _b[1]; return [4 /*yield*/, segmentation.data()]; case 1: result = _c.sent(); segmentation.dispose(); return [4 /*yield*/, (0, util_1.toTensorBuffers3D)([heatmapScores, offsets, displacementFwd, displacementBwd])]; case 2: tensorBuffers = _c.sent(); scoresBuf = tensorBuffers[0], offsetsBuf = tensorBuffers[1], displacementsFwdBuf = tensorBuffers[2], displacementsBwdBuf = tensorBuffers[3]; poses = (0, decode_multiple_poses_1.decodeMultiplePoses)(scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, this.baseModel.outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); poses = (0, util_1.scaleAndFlipPoses)(poses, [height, width], internalResolutionHeightAndWidth, padding, FLIP_POSES_AFTER_SCALING); heatmapScores.dispose(); offsets.dispose(); displacementFwd.dispose(); displacementBwd.dispose(); return [2 /*return*/, { height: height, width: width, data: result, allPoses: poses }]; } }); }); }; /** * Given an image with multiple people, returns an *array* of * PersonSegmentation object. Each element in the array corresponding to one * of the people in the input image. In other words, it predicts * instance-level multiple person segmentation & pose for each person. * * The model does standard ImageNet pre-processing before inferring through * the model. The image pixels should have values [0-255]. * * @param input * ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) The input * image to feed through the network. * * @param config MultiPersonInferenceConfig object that contains * parameters for the BodyPix inference using multi-person decoding. * * @return An array of PersonSegmentation object, each containing a width, * height, a binary array (1 for the pixels that are part of the * person, and 0 otherwise) and 2D pose. The array size corresponds to the * number of pixels in the image. The width and height correspond to the * dimensions of the image the binary array is shaped to, which are the same * dimensions of the input image. */ BodyPix.prototype.segmentMultiPerson = function (input, config) { if (config === void 0) { config = exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG; } return __awaiter(this, void 0, void 0, function () { var _a, height, width, internalResolutionHeightAndWidth, _b, resized, padding, _c, segmentation, longOffsets, heatmapScoresRaw, offsetsRaw, displacementFwdRaw, displacementBwdRaw, tensorBuffers, scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, poses, instanceMasks; var _this = this; return __generator(this, function (_d) { switch (_d.label) { case 0: config = __assign(__assign({}, exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG), config); validateMultiPersonInstanceInferenceConfig(config); _a = (0, util_1.getInputSize)(input), height = _a[0], width = _a[1]; internalResolutionHeightAndWidth = (0, util_1.toInputResolutionHeightAndWidth)(config.internalResolution, this.baseModel.outputStride, [height, width]); _b = (0, util_1.padAndResizeTo)(input, internalResolutionHeightAndWidth), resized = _b.resized, padding = _b.padding; _c = tf.tidy(function () { var _a = _this.predictForMultiPersonInstanceSegmentationAndPart(resized), segmentLogits = _a.segmentLogits, longOffsets = _a.longOffsets, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd; var scaledSegmentScores = (0, util_1.scaleAndCropToInputTensorShape)(segmentLogits, [height, width], internalResolutionHeightAndWidth, [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); var longOffsetsResized = false; var scaledLongOffsets; if (longOffsetsResized) { scaledLongOffsets = (0, util_1.scaleAndCropToInputTensorShape)(longOffsets, [height, width], internalResolutionHeightAndWidth, [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); } else { scaledLongOffsets = longOffsets; } var segmentation = (0, decode_part_map_1.toMaskTensor)(tf.squeeze(scaledSegmentScores), config.segmentationThreshold); return { segmentation: segmentation, longOffsets: scaledLongOffsets, heatmapScoresRaw: heatmapScores, offsetsRaw: offsets, displacementFwdRaw: displacementFwd, displacementBwdRaw: displacementBwd, }; }), segmentation = _c.segmentation, longOffsets = _c.longOffsets, heatmapScoresRaw = _c.heatmapScoresRaw, offsetsRaw = _c.offsetsRaw, displacementFwdRaw = _c.displacementFwdRaw, displacementBwdRaw = _c.displacementBwdRaw; return [4 /*yield*/, (0, util_1.toTensorBuffers3D)([heatmapScoresRaw, offsetsRaw, displacementFwdRaw, displacementBwdRaw])]; case 1: tensorBuffers = _d.sent(); scoresBuf = tensorBuffers[0], offsetsBuf = tensorBuffers[1], displacementsFwdBuf = tensorBuffers[2], displacementsBwdBuf = tensorBuffers[3]; poses = (0, decode_multiple_poses_1.decodeMultiplePoses)(scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, this.baseModel.outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); poses = (0, util_1.scaleAndFlipPoses)(poses, [height, width], internalResolutionHeightAndWidth, padding, FLIP_POSES_AFTER_SCALING); return [4 /*yield*/, (0, decode_instance_masks_1.decodePersonInstanceMasks)(segmentation, longOffsets, poses, height, width, this.baseModel.outputStride, internalResolutionHeightAndWidth, padding, config.scoreThreshold, config.refineSteps, config.minKeypointScore, config.maxDetections)]; case 2: instanceMasks = _d.sent(); resized.dispose(); segmentation.dispose(); longOffsets.dispose(); heatmapScoresRaw.dispose(); offsetsRaw.dispose(); displacementFwdRaw.dispose(); displacementBwdRaw.dispose(); return [2 /*return*/, instanceMasks]; } }); }); }; /** * Given an image with many people, returns a dictionary containing: height, * width, a tensor with a part id from 0-24 for the pixels that are * part of a corresponding body part, and -1 otherwise. This does standard * ImageNet pre-processing before inferring through the model. The image * should pixels should have values [0-255]. * * @param input ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) * The input image to feed through the network. * * @param internalResolution Defaults to 'medium'. The internal resolution * percentage that the input is resized to before inference. The larger the * internalResolution the more accurate the model at the cost of slower * prediction times. Available values are 'low', 'medium', 'high', 'full', or * a percentage value between 0 and 1. The values 'low', 'medium', 'high', and * 'full' map to 0.25, 0.5, 0.75, and 1.0 correspondingly. * * @param segmentationThreshold The minimum that segmentation values must have * to be considered part of the person. Affects the clipping of the colored * part image. * * @return A dictionary containing `partSegmentation`, `heatmapScores`, * `offsets`, and `padding`: * - `partSegmentation`: A 2d Tensor with a part id from 0-24 for * the pixels that are part of a corresponding body part, and -1 otherwise. * - `heatmapScores`: A 3d Tensor of the keypoint heatmaps used by * single-person pose estimation decoding. * - `offsets`: A 3d Tensor of the keypoint offsets used by single-person pose * estimation decoding. * - `displacementFwd`: A 3d Tensor of the keypoint forward displacement * used by pose estimation decoding. * - `displacementBwd`: A 3d Tensor of the keypoint backward displacement used * by pose estimation decoding. * - `padding`: The padding (unit pixels) being applied to the input image * before it is fed into the model. */ BodyPix.prototype.segmentPersonPartsActivation = function (input, internalResolution, segmentationThreshold) { var _this = this; if (segmentationThreshold === void 0) { segmentationThreshold = 0.5; } var _a = (0, util_1.getInputSize)(input), height = _a[0], width = _a[1]; var internalResolutionHeightAndWidth = (0, util_1.toInputResolutionHeightAndWidth)(internalResolution, this.baseModel.outputStride, [height, width]); var _b = (0, util_1.padAndResizeTo)(input, internalResolutionHeightAndWidth), resized = _b.resized, padding = _b.padding; var _c = tf.tidy(function () { var _a = _this.predictForPersonSegmentationAndPart(resized), segmentLogits = _a.segmentLogits, partHeatmapLogits = _a.partHeatmapLogits, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd; var _b = resized.shape, resizedHeight = _b[0], resizedWidth = _b[1]; var scaledSegmentScores = (0, util_1.scaleAndCropToInputTensorShape)(segmentLogits, [height, width], [resizedHeight, resizedWidth], [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); var scaledPartHeatmapScore = (0, util_1.scaleAndCropToInputTensorShape)(partHeatmapLogits, [height, width], [resizedHeight, resizedWidth], [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); var segmentation = (0, decode_part_map_1.toMaskTensor)(tf.squeeze(scaledSegmentScores), segmentationThreshold); return { partSegmentation: (0, decode_part_map_1.decodePartSegmentation)(segmentation, scaledPartHeatmapScore), heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, }; }), partSegmentation = _c.partSegmentation, heatmapScores = _c.heatmapScores, offsets = _c.offsets, displacementFwd = _c.displacementFwd, displacementBwd = _c.displacementBwd; resized.dispose(); return { partSegmentation: partSegmentation, heatmapScores: heatmapScores, offsets: offsets, displacementFwd: displacementFwd, displacementBwd: displacementBwd, padding: padding, internalResolutionHeightAndWidth: internalResolutionHeightAndWidth }; }; /** * Given an image with many people, returns a PartSegmentation dictionary that * contains the body part segmentation mask for all people and a single pose. * * Note: The body part segmentation mask returned by this method covers all * people but the pose works well when there is one person. If you want to * estimate instance-level multiple person body part segmentation & pose for * each person, use `segmentMultiPersonParts` instead. * * @param input ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) * The input image to feed through the network. * * @param config PersonInferenceConfig object that contains * parameters for the BodyPix inference using single person decoding. * * @return A SemanticPartSegmentation dictionary that contains height, width, * the flattened binary segmentation mask and the pose for the person. The * width and height correspond to the same dimensions of the input image. * - `height`: The height of the person part segmentation data in pixel unit. * - `width`: The width of the person part segmentation data in pixel unit. * - `data`: The flattened Int32Array of person part segmentation data with a * part id from 0-24 for the pixels that are part of a corresponding body * part, and -1 otherwise. The size of the array is equal to `height` x * `width` in row-major order. * - `allPoses`: The 2d poses of all people. */ BodyPix.prototype.segmentPersonParts = function (input, config) { if (config === void 0) { config = exports.PERSON_INFERENCE_CONFIG; } return __awaiter(this, void 0, void 0, function () { var _a, partSegmentation, heatmapScores, offsets, displacementFwd, displacementBwd, padding, internalResolutionHeightAndWidth, _b, height, width, data, tensorBuffers, scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, poses; return __generator(this, function (_c) { switch (_c.label) { case 0: config = __assign(__assign({}, exports.PERSON_INFERENCE_CONFIG), config); validatePersonInferenceConfig(config); _a = this.segmentPersonPartsActivation(input, config.internalResolution, config.segmentationThreshold), partSegmentation = _a.partSegmentation, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd, padding = _a.padding, internalResolutionHeightAndWidth = _a.internalResolutionHeightAndWidth; _b = partSegmentation.shape, height = _b[0], width = _b[1]; return [4 /*yield*/, partSegmentation.data()]; case 1: data = _c.sent(); partSegmentation.dispose(); return [4 /*yield*/, (0, util_1.toTensorBuffers3D)([heatmapScores, offsets, displacementFwd, displacementBwd])]; case 2: tensorBuffers = _c.sent(); scoresBuf = tensorBuffers[0], offsetsBuf = tensorBuffers[1], displacementsFwdBuf = tensorBuffers[2], displacementsBwdBuf = tensorBuffers[3]; poses = (0, decode_multiple_poses_1.decodeMultiplePoses)(scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, this.baseModel.outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); poses = (0, util_1.scaleAndFlipPoses)(poses, [height, width], internalResolutionHeightAndWidth, padding, FLIP_POSES_AFTER_SCALING); heatmapScores.dispose(); offsets.dispose(); displacementFwd.dispose(); displacementBwd.dispose(); return [2 /*return*/, { height: height, width: width, data: data, allPoses: poses }]; } }); }); }; /** * Given an image with multiple people, returns an *array* of PartSegmentation * object. Each element in the array corresponding to one * of the people in the input image. In other words, it predicts * instance-level multiple person body part segmentation & pose for each * person. * * This does standard ImageNet pre-processing before inferring through * the model. The image pixels should have values [0-255]. * * @param input * ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement) The input * image to feed through the network. * * @param config MultiPersonInferenceConfig object that contains * parameters for the BodyPix inference using multi-person decoding. * * @return An array of PartSegmentation object, each containing a width, * height, a flattened array (with part id from 0-24 for the pixels that are * part of a corresponding body part, and -1 otherwise) and 2D pose. The width * and height correspond to the dimensions of the image. Each flattened part * segmentation array size is equal to `height` x `width`. */ BodyPix.prototype.segmentMultiPersonParts = function (input, config) { if (config === void 0) { config = exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG; } return __awaiter(this, void 0, void 0, function () { var _a, height, width, internalResolutionHeightAndWidth, _b, resized, padding, _c, segmentation, longOffsets, heatmapScoresRaw, offsetsRaw, displacementFwdRaw, displacementBwdRaw, partSegmentation, tensorBuffers, scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, poses, instanceMasks; var _this = this; return __generator(this, function (_d) { switch (_d.label) { case 0: config = __assign(__assign({}, exports.MULTI_PERSON_INSTANCE_INFERENCE_CONFIG), config); validateMultiPersonInstanceInferenceConfig(config); _a = (0, util_1.getInputSize)(input), height = _a[0], width = _a[1]; internalResolutionHeightAndWidth = (0, util_1.toInputResolutionHeightAndWidth)(config.internalResolution, this.baseModel.outputStride, [height, width]); _b = (0, util_1.padAndResizeTo)(input, internalResolutionHeightAndWidth), resized = _b.resized, padding = _b.padding; _c = tf.tidy(function () { var _a = _this.predictForMultiPersonInstanceSegmentationAndPart(resized), segmentLogits = _a.segmentLogits, longOffsets = _a.longOffsets, heatmapScores = _a.heatmapScores, offsets = _a.offsets, displacementFwd = _a.displacementFwd, displacementBwd = _a.displacementBwd, partHeatmaps = _a.partHeatmaps; // decoding with scaling. var scaledSegmentScores = (0, util_1.scaleAndCropToInputTensorShape)(segmentLogits, [height, width], internalResolutionHeightAndWidth, [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); // decoding with scaling. var scaledPartSegmentationScores = (0, util_1.scaleAndCropToInputTensorShape)(partHeatmaps, [height, width], internalResolutionHeightAndWidth, [[padding.top, padding.bottom], [padding.left, padding.right]], APPLY_SIGMOID_ACTIVATION); var scaledLongOffsets = longOffsets; var segmentation = (0, decode_part_map_1.toMaskTensor)(tf.squeeze(scaledSegmentScores), config.segmentationThreshold); var partSegmentation = (0, decode_part_map_1.decodeOnlyPartSegmentation)(scaledPartSegmentationScores); return { segmentation: segmentation, longOffsets: scaledLongOffsets, heatmapScoresRaw: heatmapScores, offsetsRaw: offsets, displacementFwdRaw: displacementFwd, displacementBwdRaw: displacementBwd, partSegmentation: partSegmentation }; }), segmentation = _c.segmentation, longOffsets = _c.longOffsets, heatmapScoresRaw = _c.heatmapScoresRaw, offsetsRaw = _c.offsetsRaw, displacementFwdRaw = _c.displacementFwdRaw, displacementBwdRaw = _c.displacementBwdRaw, partSegmentation = _c.partSegmentation; return [4 /*yield*/, (0, util_1.toTensorBuffers3D)([heatmapScoresRaw, offsetsRaw, displacementFwdRaw, displacementBwdRaw])]; case 1: tensorBuffers = _d.sent(); scoresBuf = tensorBuffers[0], offsetsBuf = tensorBuffers[1], displacementsFwdBuf = tensorBuffers[2], displacementsBwdBuf = tensorBuffers[3]; poses = (0, decode_multiple_poses_1.decodeMultiplePoses)(scoresBuf, offsetsBuf, displacementsFwdBuf, displacementsBwdBuf, this.baseModel.outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); poses = (0, util_1.scaleAndFlipPoses)(poses, [height, width], internalResolutionHeightAndWidth, padding, FLIP_POSES_AFTER_SCALING); return [4 /*yield*/, (0, decode_instance_masks_1.decodePersonInstancePartMasks)(segmentation, longOffsets, partSegmentation, poses, height, width, this.baseModel.outputStride, internalResolutionHeightAndWidth, padding, config.scoreThreshold, config.refineSteps, config.minKeypointScore, config.maxDetections)]; case 2: instanceMasks = _d.sent(); resized.dispose(); segmentation.dispose(); longOffsets.dispose(); heatmapScoresRaw.dispose(); offsetsRaw.dispose(); displacementFwdRaw.dispose(); displacementBwdRaw.dispose(); partSegmentation.dispose(); return [2 /*return*/, instanceMasks]; } }); }); }; BodyPix.prototype.dispose = function () { this.baseModel.dispose(); }; return BodyPix; }()); exports.BodyPix = BodyPix; /** * Loads the MobileNet BodyPix model. */ function loadMobileNet(config) { return __awaiter(this, void 0, void 0, function () { var outputStride, quantBytes, multiplier, url, graphModel, mobilenet; return __generator(this, function (_a) { switch (_a.label) { case 0: outputStride = config.outputStride; quantBytes = config.quantBytes; multiplier = config.multiplier; if (tf == null) { throw new Error("Cannot find TensorFlow.js. If you are using a <script> tag, please " + "also include @tensorflow/tfjs on the page before using this\n model."); } url = (0, saved_models_1.mobileNetSavedModel)(outputStride, multiplier, quantBytes); return [4 /*yield*/, tfconv.loadGraphModel(config.modelUrl || url)]; case 1: graphModel = _a.sent(); mobilenet = new mobilenet_1.MobileNet(graphModel, outputStride); return [2 /*return*/, new BodyPix(mobilenet)]; } }); }); } /** * Loads the ResNet BodyPix model. */ function loadResNet(config) { return __awaiter(this, void 0, void 0, function () { var outputStride, quantBytes, url, graphModel, resnet; return __generator(this, function (_a) { switch (_a.label) { case 0: outputStride = config.outputStride; quantBytes = config.quantBytes; if (tf == null) { throw new Error("Cannot find TensorFlow.js. If you are using a <script> tag, please " + "also include @tensorflow/tfjs on the page before using this\n model."); } url = (0, saved_models_1.resNet50SavedModel)(outputStride, quantBytes); return [4 /*yield*/, tfconv.loadGraphModel(config.modelUrl || url)]; case 1: graphModel = _a.sent(); resnet = new resnet_1.ResNet(graphModel, outputStride); return [2 /*return*/, new BodyPix(resnet)]; } }); }); } /** * Loads the BodyPix model instance from a checkpoint, with the ResNet * or MobileNet architecture. The model to be loaded is configurable using the * config dictionary ModelConfig. Please find more details in the * documentation of the ModelConfig. * * @param config ModelConfig dictionary that contains parameters for * the BodyPix loading process. Please find more details of each parameters * in the documentation of the ModelConfig interface. The predefined * `MOBILENET_V1_CONFIG` and `RESNET_CONFIG` can also be used as references * for defining your customized config. */ function load(config) { if (config === void 0) { config = MOBILENET_V1_CONFIG; } return __awaiter(this, void 0, void 0, function () { return __generator(this, function (_a) { config = validateModelConfig(config); if (config.architecture === 'ResNet50') { return [2 /*return*/, loadResNet(config)]; } else if (config.architecture === 'MobileNetV1') { return [2 /*return*/, loadMobileNet(config)]; } else { return [2 /*return*/, null]; } return [2 /*return*/]; }); }); } exports.load = load; //# sourceMappingURL=body_pix_model.js.map