mind-ar
Version:
web augmented reality framework
376 lines (326 loc) • 13.5 kB
JavaScript
import {memory,nextFrame} from '@tensorflow/tfjs';
const tf = {memory,nextFrame};
import ControllerWorker from "./controller.worker.js?worker&inline";
import {Tracker} from './tracker/tracker.js';
import {CropDetector} from './detector/crop-detector.js';
import {Compiler} from './compiler.js';
import {InputLoader} from './input-loader.js';
import {OneEuroFilter} from '../libs/one-euro-filter.js';
const DEFAULT_FILTER_CUTOFF = 0.001; // 1Hz. time period in milliseconds
const DEFAULT_FILTER_BETA = 1000;
const DEFAULT_WARMUP_TOLERANCE = 5;
const DEFAULT_MISS_TOLERANCE = 5;
class Controller {
constructor({inputWidth, inputHeight, onUpdate=null, debugMode=false, maxTrack=1,
warmupTolerance=null, missTolerance=null, filterMinCF=null, filterBeta=null}) {
this.inputWidth = inputWidth;
this.inputHeight = inputHeight;
this.maxTrack = maxTrack;
this.filterMinCF = filterMinCF === null? DEFAULT_FILTER_CUTOFF: filterMinCF;
this.filterBeta = filterBeta === null? DEFAULT_FILTER_BETA: filterBeta;
this.warmupTolerance = warmupTolerance === null? DEFAULT_WARMUP_TOLERANCE: warmupTolerance;
this.missTolerance = missTolerance === null? DEFAULT_MISS_TOLERANCE: missTolerance;
this.cropDetector = new CropDetector(this.inputWidth, this.inputHeight, debugMode);
this.inputLoader = new InputLoader(this.inputWidth, this.inputHeight);
this.markerDimensions = null;
this.onUpdate = onUpdate;
this.debugMode = debugMode;
this.processingVideo = false;
this.interestedTargetIndex = -1;
this.trackingStates = [];
const near = 10;
const far = 100000;
const fovy = 45.0 * Math.PI / 180; // 45 in radian. field of view vertical
const f = (this.inputHeight/2) / Math.tan(fovy/2);
// [fx s cx]
// K = [ 0 fx cy]
// [ 0 0 1]
this.projectionTransform = [
[f, 0, this.inputWidth / 2],
[0, f, this.inputHeight / 2],
[0, 0, 1]
];
this.projectionMatrix = this._glProjectionMatrix({
projectionTransform: this.projectionTransform,
width: this.inputWidth,
height: this.inputHeight,
near: near,
far: far,
});
this.worker = new ControllerWorker()//new Worker(new URL('./controller.worker.js', import.meta.url));
this.workerMatchDone = null;
this.workerTrackDone = null;
this.worker.onmessage = (e) => {
if (e.data.type === 'matchDone' && this.workerMatchDone !== null) {
this.workerMatchDone(e.data);
}
if (e.data.type === 'trackUpdateDone' && this.workerTrackDone !== null) {
this.workerTrackDone(e.data);
}
}
}
showTFStats() {
console.log(tf.memory().numTensors);
console.table(tf.memory());
}
addImageTargets(fileURL) {
return new Promise(async (resolve, reject) => {
const content = await fetch(fileURL);
const buffer = await content.arrayBuffer();
const result = this.addImageTargetsFromBuffer(buffer);
resolve(result);
});
}
addImageTargetsFromBuffer(buffer) {
const compiler = new Compiler();
const dataList = compiler.importData(buffer);
const trackingDataList = [];
const matchingDataList = [];
const imageListList = [];
const dimensions = [];
for (let i = 0; i < dataList.length; i++) {
matchingDataList.push(dataList[i].matchingData);
trackingDataList.push(dataList[i].trackingData);
dimensions.push([dataList[i].targetImage.width, dataList[i].targetImage.height]);
}
this.tracker = new Tracker(dimensions, trackingDataList, this.projectionTransform, this.inputWidth, this.inputHeight, this.debugMode);
this.worker.postMessage({
type: 'setup',
inputWidth: this.inputWidth,
inputHeight: this.inputHeight,
projectionTransform: this.projectionTransform,
debugMode: this.debugMode,
matchingDataList,
});
this.markerDimensions = dimensions;
return {dimensions: dimensions, matchingDataList, trackingDataList};
}
dispose() {
this.stopProcessVideo();
this.worker.postMessage({
type: "dispose"
});
}
// warm up gpu - build kernels is slow
dummyRun(input) {
const inputT = this.inputLoader.loadInput(input);
this.cropDetector.detect(inputT);
this.tracker.dummyRun(inputT);
inputT.dispose();
}
getProjectionMatrix() {
return this.projectionMatrix;
}
getWorldMatrix(modelViewTransform, targetIndex) {
return this._glModelViewMatrix(modelViewTransform, targetIndex);
}
async _detectAndMatch(inputT, targetIndexes) {
const {featurePoints} = this.cropDetector.detectMoving(inputT);
const {targetIndex: matchedTargetIndex, modelViewTransform} = await this._workerMatch(featurePoints, targetIndexes);
return {targetIndex: matchedTargetIndex, modelViewTransform}
}
async _trackAndUpdate(inputT, lastModelViewTransform, targetIndex) {
const {worldCoords, screenCoords} = this.tracker.track(inputT, lastModelViewTransform, targetIndex);
if (worldCoords.length < 4) return null;
const modelViewTransform = await this._workerTrackUpdate(lastModelViewTransform, {worldCoords, screenCoords});
return modelViewTransform;
}
processVideo(input) {
if (this.processingVideo) return;
this.processingVideo = true;
this.trackingStates = [];
for (let i = 0; i < this.markerDimensions.length; i++) {
this.trackingStates.push({
showing: false,
isTracking: false,
currentModelViewTransform: null,
trackCount: 0,
trackMiss: 0,
filter: new OneEuroFilter({minCutOff: this.filterMinCF, beta: this.filterBeta})
});
//console.log("filterMinCF", this.filterMinCF, this.filterBeta);
}
const startProcessing = async() => {
while (true) {
if (!this.processingVideo) break;
const inputT = this.inputLoader.loadInput(input);
const nTracking = this.trackingStates.reduce((acc, s) => {
return acc + (!!s.isTracking? 1: 0);
}, 0);
// detect and match only if less then maxTrack
if (nTracking < this.maxTrack) {
const matchingIndexes = [];
for (let i = 0; i < this.trackingStates.length; i++) {
const trackingState = this.trackingStates[i];
if (trackingState.isTracking === true) continue;
if (this.interestedTargetIndex !== -1 && this.interestedTargetIndex !== i) continue;
matchingIndexes.push(i);
}
const {targetIndex: matchedTargetIndex, modelViewTransform} = await this._detectAndMatch(inputT, matchingIndexes);
if (matchedTargetIndex !== -1) {
this.trackingStates[matchedTargetIndex].isTracking = true;
this.trackingStates[matchedTargetIndex].currentModelViewTransform = modelViewTransform;
}
}
// tracking update
for (let i = 0; i < this.trackingStates.length; i++) {
const trackingState = this.trackingStates[i];
if (trackingState.isTracking) {
let modelViewTransform = await this._trackAndUpdate(inputT, trackingState.currentModelViewTransform, i);
if (modelViewTransform === null) {
trackingState.isTracking = false;
} else {
trackingState.currentModelViewTransform = modelViewTransform;
}
}
// if not showing, then show it once it reaches warmup number of frames
if (!trackingState.showing) {
if (trackingState.isTracking) {
trackingState.trackMiss = 0;
trackingState.trackCount += 1;
if (trackingState.trackCount > this.warmupTolerance) {
trackingState.showing = true;
trackingState.trackingMatrix = null;
trackingState.filter.reset();
}
}
}
// if showing, then count miss, and hide it when reaches tolerance
if (trackingState.showing) {
if (!trackingState.isTracking) {
trackingState.trackCount = 0;
trackingState.trackMiss += 1;
if (trackingState.trackMiss > this.missTolerance) {
trackingState.showing = false;
trackingState.trackingMatrix = null;
this.onUpdate && this.onUpdate({type: 'updateMatrix', targetIndex: i, worldMatrix: null});
}
} else {
trackingState.trackMiss = 0;
}
}
// if showing, then call onUpdate, with world matrix
if (trackingState.showing) {
const worldMatrix = this._glModelViewMatrix(trackingState.currentModelViewTransform, i);
trackingState.trackingMatrix = trackingState.filter.filter(Date.now(), worldMatrix);
const clone = [];
for (let j = 0; j < trackingState.trackingMatrix.length; j++) {
clone[j] = trackingState.trackingMatrix[j];
}
this.onUpdate && this.onUpdate({type: 'updateMatrix', targetIndex: i, worldMatrix: clone});
}
}
inputT.dispose();
this.onUpdate && this.onUpdate({type: 'processDone'});
await tf.nextFrame();
}
}
startProcessing();
}
stopProcessVideo() {
this.processingVideo = false;
}
async detect(input) {
const inputT = this.inputLoader.loadInput(input);
const {featurePoints, debugExtra} = await this.cropDetector.detect(inputT);
inputT.dispose();
return {featurePoints, debugExtra};
}
async match(featurePoints, targetIndex) {
const {modelViewTransform, debugExtra} = await this._workerMatch(featurePoints, [targetIndex]);
return {modelViewTransform, debugExtra};
}
async track(input, modelViewTransform, targetIndex) {
const inputT = this.inputLoader.loadInput(input);
const result = this.tracker.track(inputT, modelViewTransform, targetIndex);
inputT.dispose();
return result;
}
async trackUpdate(modelViewTransform, trackFeatures) {
if (trackFeatures.worldCoords.length < 4 ) return null;
const modelViewTransform2 = await this._workerTrackUpdate(modelViewTransform, trackFeatures);
return modelViewTransform2;
}
_workerMatch(featurePoints, targetIndexes) {
return new Promise(async (resolve, reject) => {
this.workerMatchDone = (data) => {
resolve({targetIndex: data.targetIndex, modelViewTransform: data.modelViewTransform, debugExtra: data.debugExtra});
}
this.worker.postMessage({type: 'match', featurePoints: featurePoints, targetIndexes});
});
}
_workerTrackUpdate(modelViewTransform, trackingFeatures) {
return new Promise(async (resolve, reject) => {
this.workerTrackDone = (data) => {
resolve(data.modelViewTransform);
}
const {worldCoords, screenCoords} = trackingFeatures;
this.worker.postMessage({type: 'trackUpdate', modelViewTransform, worldCoords, screenCoords});
});
}
_glModelViewMatrix(modelViewTransform, targetIndex) {
const height = this.markerDimensions[targetIndex][1];
// Question: can someone verify this interpreation is correct?
// I'm not very convinced, but more like trial and error and works......
//
// First, opengl has y coordinate system go from bottom to top, while the marker corrdinate goes from top to bottom,
// since the modelViewTransform is estimated in marker coordinate, we need to apply this transform before modelViewTransform
// I can see why y = h - y*, but why z = z* ? should we intepret it as rotate 90 deg along x-axis and then translate y by h?
//
// [1 0 0 0]
// [0 -1 0 h]
// [0 0 -1 0]
// [0 0 0 1]
//
// This is tested that if we reverse marker coordinate from bottom to top and estimate the modelViewTransform,
// then the above matrix is not necessary.
//
// Second, in opengl, positive z is away from camera, so we rotate 90 deg along x-axis after transform to fix the axis mismatch
// [1 1 0 0]
// [0 -1 0 0]
// [0 0 -1 0]
// [0 0 0 1]
//
// all together, the combined matrix is
//
// [1 1 0 0] [m00, m01, m02, m03] [1 0 0 0]
// [0 -1 0 0] [m10, m11, m12, m13] [0 -1 0 h]
// [0 0 -1 0] [m20, m21, m22, m23] [0 0 -1 0]
// [0 0 0 1] [ 0 0 0 1] [0 0 0 1]
//
// [ m00, -m01, -m02, (m01 * h + m03) ]
// [-m10, m11, m12, -(m11 * h + m13) ]
// = [-m20, m21, m22, -(m21 * h + m23) ]
// [ 0, 0, 0, 1 ]
//
//
// Finally, in threejs, matrix is represented in col by row, so we transpose it, and get below:
const openGLWorldMatrix = [
modelViewTransform[0][0], -modelViewTransform[1][0], -modelViewTransform[2][0], 0,
-modelViewTransform[0][1], modelViewTransform[1][1], modelViewTransform[2][1], 0,
-modelViewTransform[0][2], modelViewTransform[1][2], modelViewTransform[2][2], 0,
modelViewTransform[0][1] * height + modelViewTransform[0][3], -(modelViewTransform[1][1] * height + modelViewTransform[1][3]), -(modelViewTransform[2][1] * height + modelViewTransform[2][3]), 1
];
return openGLWorldMatrix;
}
// build openGL projection matrix
// ref: https://strawlab.org/2011/11/05/augmented-reality-with-OpenGL/
_glProjectionMatrix({projectionTransform, width, height, near, far}) {
const proj = [
[2 * projectionTransform[0][0] / width, 0, -(2 * projectionTransform[0][2] / width - 1), 0],
[0, 2 * projectionTransform[1][1] / height, -(2 * projectionTransform[1][2] / height - 1), 0],
[0, 0, -(far + near) / (far - near), -2 * far * near / (far - near)],
[0, 0, -1, 0]
];
const projMatrix = [];
for (let i = 0; i < 4; i++) {
for (let j = 0; j < 4; j++) {
projMatrix.push(proj[j][i]);
}
}
return projMatrix;
}
}
export {
Controller
}