UNPKG

@vladmandic/face-api

Version:

FaceAPI: AI-powered Face Detection & Rotation Tracking, Face Description & Recognition, Age & Gender & Emotion Prediction for Browser and NodeJS using TensorFlow/JS

88 lines (79 loc) 3.45 kB
import * as tf from '../../dist/tfjs.esm'; import { Rect } from '../classes/index'; import { FaceDetection } from '../classes/FaceDetection'; import { NetInput, TNetInput, toNetInput } from '../dom/index'; import { NeuralNetwork } from '../NeuralNetwork'; import { extractParams } from './extractParams'; import { extractParamsFromWeightMap } from './extractParamsFromWeightMap'; import { mobileNetV1 } from './mobileNetV1'; import { nonMaxSuppression } from './nonMaxSuppression'; import { outputLayer } from './outputLayer'; import { predictionLayer } from './predictionLayer'; import { ISsdMobilenetv1Options, SsdMobilenetv1Options } from './SsdMobilenetv1Options'; import { NetParams } from './types'; export class SsdMobilenetv1 extends NeuralNetwork<NetParams> { constructor() { super('SsdMobilenetv1'); } public forwardInput(input: NetInput) { const { params } = this; if (!params) throw new Error('SsdMobilenetv1 - load model before inference'); return tf.tidy(() => { const batchTensor = tf.cast(input.toBatchTensor(512, false), 'float32'); const x = tf.sub(tf.div(batchTensor, 127.5), 1) as tf.Tensor4D; // input is normalized -1..1 const features = mobileNetV1(x, params.mobilenetv1); const { boxPredictions, classPredictions } = predictionLayer(features.out, features.conv11, params.prediction_layer); return outputLayer(boxPredictions, classPredictions, params.output_layer); }); } public async forward(input: TNetInput) { return this.forwardInput(await toNetInput(input)); } public async locateFaces(input: TNetInput, options: ISsdMobilenetv1Options = {}): Promise<FaceDetection[]> { const { maxResults, minConfidence } = new SsdMobilenetv1Options(options); const netInput = await toNetInput(input); const { boxes: _boxes, scores: _scores } = this.forwardInput(netInput); const boxes = _boxes[0]; const scores = _scores[0]; for (let i = 1; i < _boxes.length; i++) { _boxes[i].dispose(); _scores[i].dispose(); } const scoresData = Array.from(scores.dataSync()); const iouThreshold = 0.5; const indices = nonMaxSuppression(boxes, scoresData as number[], maxResults, iouThreshold, minConfidence); const reshapedDims = netInput.getReshapedInputDimensions(0); const inputSize = netInput.inputSize as number; const padX = inputSize / reshapedDims.width; const padY = inputSize / reshapedDims.height; const boxesData = boxes.arraySync(); const results = indices .map((idx) => { const [top, bottom] = [ Math.max(0, boxesData[idx][0]), Math.min(1.0, boxesData[idx][2]), ].map((val) => val * padY); const [left, right] = [ Math.max(0, boxesData[idx][1]), Math.min(1.0, boxesData[idx][3]), ].map((val) => val * padX); return new FaceDetection( scoresData[idx] as number, new Rect(left, top, right - left, bottom - top), { height: netInput.getInputHeight(0), width: netInput.getInputWidth(0) }, ); }); boxes.dispose(); scores.dispose(); return results; } protected getDefaultModelName(): string { return 'ssd_mobilenetv1_model'; } protected extractParamsFromWeightMap(weightMap: tf.NamedTensorMap) { return extractParamsFromWeightMap(weightMap); } protected extractParams(weights: Float32Array) { return extractParams(weights); } }