mind-ar
Version:
web augmented reality framework
1,105 lines (949 loc) • 33.9 kB
JavaScript
// result should be similar to previou
// improve freka descriptors computation
import * as tf from '@tensorflow/tfjs';
import { FREAKPOINTS } from './freak.js';
import './kernels/webgl/index.js';
const PYRAMID_MIN_SIZE = 8;
const PYRAMID_MAX_OCTAVE = 5;
const LAPLACIAN_THRESHOLD = 3.0;
const LAPLACIAN_SQR_THRESHOLD = LAPLACIAN_THRESHOLD * LAPLACIAN_THRESHOLD;
const EDGE_THRESHOLD = 4.0;
const EDGE_HESSIAN_THRESHOLD = ((EDGE_THRESHOLD + 1) * (EDGE_THRESHOLD + 1) / EDGE_THRESHOLD);
const NUM_BUCKETS_PER_DIMENSION = 10;
const MAX_FEATURES_PER_BUCKET = 5;
const NUM_BUCKETS = NUM_BUCKETS_PER_DIMENSION * NUM_BUCKETS_PER_DIMENSION;
// total max feature points = NUM_BUCKETS * MAX_FEATURES_PER_BUCKET
const ORIENTATION_NUM_BINS = 36;
const ORIENTATION_SMOOTHING_ITERATIONS = 5;
const ORIENTATION_GAUSSIAN_EXPANSION_FACTOR = 3.0;
const ORIENTATION_REGION_EXPANSION_FACTOR = 1.5;
const FREAK_EXPANSION_FACTOR = 7.0;
const FREAK_CONPARISON_COUNT = (FREAKPOINTS.length - 1) * (FREAKPOINTS.length) / 2; // 666
class Detector {
constructor(width, height, debugMode = false) {
this.debugMode = debugMode;
this.width = width;
this.height = height;
let numOctaves = 0;
while (width >= PYRAMID_MIN_SIZE && height >= PYRAMID_MIN_SIZE) {
width /= 2;
height /= 2;
numOctaves++;
if (numOctaves === PYRAMID_MAX_OCTAVE) break;
}
this.numOctaves = numOctaves;
this.tensorCaches = {};
this.kernelCaches = {};
}
// used in compiler
detectImageData(imageData) {
const arr = new Uint8ClampedArray(4 * imageData.length);
for (let i = 0; i < imageData.length; i++) {
arr[4 * i] = imageData[i];
arr[4 * i + 1] = imageData[i];
arr[4 * i + 2] = imageData[i];
arr[4 * i + 3] = 255;
}
const img = new ImageData(arr, this.width, this.height);
return this.detect(img);
}
/**
*
* @param {tf.Tensor<tf.Rank>} inputImageT
* @returns
*/
detect(inputImageT) {
let debugExtra = null;
// Build gaussian pyramid images, two images per octave
/** @type {Array<Array<tf.Tensor<tf.Rank>>} */
const pyramidImagesT = [];
//console.log("Detector::Building pyramid Images...");
for (let i = 0; i < this.numOctaves; i++) {
let image1T;
let image2T;
if (i === 0) {
image1T = this._applyFilter(inputImageT);
} else {
image1T = this._downsampleBilinear(pyramidImagesT[i - 1][pyramidImagesT[i - 1].length - 1]);
}
image2T = this._applyFilter(image1T);
pyramidImagesT.push([image1T, image2T]);
}
//console.log("Detector::Building dog images...");
// Build difference-of-gaussian (dog) pyramid
/** @type {tf.Tensor<tf.Rank>[]} */
const dogPyramidImagesT = [];
for (let i = 0; i < this.numOctaves; i++) {
let dogImageT = this._differenceImageBinomial(pyramidImagesT[i][0], pyramidImagesT[i][1]);
dogPyramidImagesT.push(dogImageT);
}
// find local maximum/minimum
/** @type {tf.Tensor<tf.Rank>[]} */
const extremasResultsT = [];
for (let i = 1; i < this.numOctaves - 1; i++) {
const extremasResultT = this._buildExtremas(dogPyramidImagesT[i - 1], dogPyramidImagesT[i], dogPyramidImagesT[i + 1]);
extremasResultsT.push(extremasResultT);
}
// divide the input into N by N buckets, and for each bucket,
// collect the top 5 most significant extrema across extremas in all scale level
// result would be NUM_BUCKETS x NUM_FEATURES_PER_BUCKET extremas
const prunedExtremasList = this._applyPrune(extremasResultsT);
const prunedExtremasT = this._computeLocalization(prunedExtremasList, dogPyramidImagesT);
// compute the orientation angle for each pruned extremas
const extremaHistogramsT = this._computeOrientationHistograms(prunedExtremasT, pyramidImagesT);
const smoothedHistogramsT = this._smoothHistograms(extremaHistogramsT);
const extremaAnglesT = this._computeExtremaAngles(smoothedHistogramsT);
// to compute freak descriptors, we first find the pixel value of 37 freak points for each extrema
const extremaFreaksT = this._computeExtremaFreak(pyramidImagesT, prunedExtremasT, extremaAnglesT);
// compute the binary descriptors
const freakDescriptorsT = this._computeFreakDescriptors(extremaFreaksT);
const prunedExtremasArr = prunedExtremasT.arraySync();
const extremaAnglesArr = extremaAnglesT.arraySync();
const freakDescriptorsArr = freakDescriptorsT.arraySync();
if (this.debugMode) {
debugExtra = {
pyramidImages: pyramidImagesT.map((ts) => ts.map((t) => t.arraySync())),
dogPyramidImages: dogPyramidImagesT.map((t) => t ? t.arraySync() : null),
extremasResults: extremasResultsT.map((t) => t.arraySync()),
extremaAngles: extremaAnglesT.arraySync(),
prunedExtremas: prunedExtremasList,
localizedExtremas: prunedExtremasT.arraySync(),
}
}
pyramidImagesT.forEach((ts) => ts.forEach((t) => t.dispose()));
dogPyramidImagesT.forEach((t) => t && t.dispose());
extremasResultsT.forEach((t) => t.dispose());
prunedExtremasT.dispose();
extremaHistogramsT.dispose();
smoothedHistogramsT.dispose();
extremaAnglesT.dispose();
extremaFreaksT.dispose();
freakDescriptorsT.dispose();
const featurePoints = [];
for (let i = 0; i < prunedExtremasArr.length; i++) {
if (prunedExtremasArr[i][0] == 0) continue;
const descriptors = [];
for (let m = 0; m < freakDescriptorsArr[i].length; m += 4) {
const v1 = freakDescriptorsArr[i][m];
const v2 = freakDescriptorsArr[i][m + 1];
const v3 = freakDescriptorsArr[i][m + 2];
const v4 = freakDescriptorsArr[i][m + 3];
let combined = v1 * 16777216 + v2 * 65536 + v3 * 256 + v4;
//if (m === freakDescriptorsArr[i].length-4) { // last one, legacy reason
// combined /= 32;
//}
descriptors.push(combined);
}
const octave = prunedExtremasArr[i][1];
const y = prunedExtremasArr[i][2];
const x = prunedExtremasArr[i][3];
const originalX = x * Math.pow(2, octave) + Math.pow(2, (octave - 1)) - 0.5;
const originalY = y * Math.pow(2, octave) + Math.pow(2, (octave - 1)) - 0.5;
const scale = Math.pow(2, octave);
featurePoints.push({
maxima: prunedExtremasArr[i][0] > 0,
x: originalX,
y: originalY,
scale: scale,
angle: extremaAnglesArr[i],
descriptors: descriptors
});
}
//console.log("feature points", featurePoints);
//console.table(tf.memory());
return { featurePoints, debugExtra };
}
_computeFreakDescriptors(extremaFreaks) {
if (!this.tensorCaches.computeFreakDescriptors) {
const in1Arr = [];
const in2Arr = [];
for (let k1 = 0; k1 < extremaFreaks.shape[1]; k1++) {
for (let k2 = k1 + 1; k2 < extremaFreaks.shape[1]; k2++) {
in1Arr.push(k1);
in2Arr.push(k2);
}
}
const in1 = tf.tensor(in1Arr, [in1Arr.length]).cast('int32');
const in2 = tf.tensor(in2Arr, [in2Arr.length]).cast('int32');
this.tensorCaches.computeFreakDescriptors = {
positionT: tf.keep(tf.stack([in1, in2], 1))
}
}
const { positionT } = this.tensorCaches.computeFreakDescriptors;
// encode 8 bits into one number
// trying to encode 16 bits give wrong result in iOS. may integer precision issue
const descriptorCount = Math.ceil(FREAK_CONPARISON_COUNT / 8);
/*
if (!this.kernelCaches.computeFreakDescriptors) {
const kernel = {
variableNames: ['freak', 'p'],
outputShape: [extremaFreaks.shape[0], descriptorCount],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int featureIndex = coords[0];
int descIndex = coords[1] * 8;
int sum = 0;
for (int i = 0; i < 8; i++) {
if (descIndex + i >= ${FREAK_CONPARISON_COUNT}) {
continue;
}
int p1 = int(getP(descIndex + i, 0));
int p2 = int(getP(descIndex + i, 1));
float v1 = getFreak(featureIndex, p1);
float v2 = getFreak(featureIndex, p2);
if (v1 < v2 + 0.01) {
sum += int(pow(2.0, float(7 - i)));
}
}
setOutput(float(sum));
}
`
}
this.kernelCaches.computeFreakDescriptors = [kernel];
}
*/
return tf.tidy(() => {
//const [program] = this.kernelCaches.computeFreakDescriptors;
//return this._runWebGLProgram(program, [extremaFreaks, positionT], 'int32');
return tf.engine().runKernel('ComputeFreakDescriptors', { extremaFreaks, positionT });
});
}
_computeExtremaFreak(pyramidImagesT, prunedExtremas, prunedExtremasAngles) {
if (!this.tensorCaches._computeExtremaFreak) {
tf.tidy(() => {
const freakPoints = tf.tensor(FREAKPOINTS);
this.tensorCaches._computeExtremaFreak = {
freakPointsT: tf.keep(freakPoints),
};
});
}
const { freakPointsT } = this.tensorCaches._computeExtremaFreak;
const gaussianImagesT = [];
for (let i = 1; i < pyramidImagesT.length; i++) {
//gaussianImagesT.push(pyramidImagesT[i][0]);
gaussianImagesT.push(pyramidImagesT[i][1]); // better
}
/* if (!this.kernelCaches._computeExtremaFreak) {
const imageVariableNames = [];
for (let i = 1; i < pyramidImagesT.length; i++) {
imageVariableNames.push('image' + i);
}
let pixelsSubCodes = `float getPixel(int octave, int y, int x) {`;
for (let i = 1; i < pyramidImagesT.length; i++) {
pixelsSubCodes += `
if (octave == ${i}) {
return getImage${i}(y, x);
}
`
}
pixelsSubCodes += `}`;
const kernel = {
variableNames: [...imageVariableNames, 'extrema', 'angles', 'freakPoints'],
outputShape: [prunedExtremas.shape[0], FREAKPOINTS.length],
userCode: `
${pixelsSubCodes}
void main() {
ivec2 coords = getOutputCoords();
int featureIndex = coords[0];
int freakIndex = coords[1];
float freakSigma = getFreakPoints(freakIndex, 0);
float freakX = getFreakPoints(freakIndex, 1);
float freakY = getFreakPoints(freakIndex, 2);
int octave = int(getExtrema(featureIndex, 1));
float inputY = getExtrema(featureIndex, 2);
float inputX = getExtrema(featureIndex, 3);
float inputAngle = getAngles(featureIndex);
float cos = ${FREAK_EXPANSION_FACTOR}. * cos(inputAngle);
float sin = ${FREAK_EXPANSION_FACTOR}. * sin(inputAngle);
float yp = inputY + freakX * sin + freakY * cos;
float xp = inputX + freakX * cos + freakY * -sin;
int x0 = int(floor(xp));
int x1 = x0 + 1;
int y0 = int(floor(yp));
int y1 = y0 + 1;
float f1 = getPixel(octave, y0, x0);
float f2 = getPixel(octave, y0, x1);
float f3 = getPixel(octave, y1, x0);
float f4 = getPixel(octave, y1, x1);
float x1f = float(x1);
float y1f = float(y1);
float x0f = float(x0);
float y0f = float(y0);
// ratio for interpolation between four neighbouring points
float value = (x1f - xp) * (y1f - yp) * f1
+ (xp - x0f) * (y1f - yp) * f2
+ (x1f - xp) * (yp - y0f) * f3
+ (xp - x0f) * (yp - y0f) * f4;
setOutput(value);
}
`
}
this.kernelCaches._computeExtremaFreak = [kernel];
} */
return tf.tidy(() => {
/* const [program] = this.kernelCaches._computeExtremaFreak;
const result = this._compileAndRun(program, [...gaussianImagesT, prunedExtremas, prunedExtremasAngles, freakPointsT]);
return result; */
return tf.engine().runKernel('ComputeExtremaFreak', { gaussianImagesT, prunedExtremas, prunedExtremasAngles, freakPointsT, pyramidImagesLength: pyramidImagesT.length });
});
}
/**
*
* @param {tf.Tensor<tf.Rank>} histograms
* @returns
*/
_computeExtremaAngles(histograms) {
/* if (!this.kernelCaches.computeExtremaAngles) {
const kernel = {
variableNames: ['histogram'],
outputShape: [histograms.shape[0]],
userCode: `
void main() {
int featureIndex = getOutputCoords();
int maxIndex = 0;
for (int i = 1; i < ${ORIENTATION_NUM_BINS}; i++) {
if (getHistogram(featureIndex, i) > getHistogram(featureIndex, maxIndex)) {
maxIndex = i;
}
}
int prev = imod(maxIndex - 1 + ${ORIENTATION_NUM_BINS}, ${ORIENTATION_NUM_BINS});
int next = imod(maxIndex + 1, ${ORIENTATION_NUM_BINS});
**
* Fit a quatratic to 3 points. The system of equations is:
*
* y0 = A*x0^2 + B*x0 + C
* y1 = A*x1^2 + B*x1 + C
* y2 = A*x2^2 + B*x2 + C
*
* This system of equations is solved for A,B,C.
*
float p10 = float(maxIndex - 1);
float p11 = getHistogram(featureIndex, prev);
float p20 = float(maxIndex);
float p21 = getHistogram(featureIndex, maxIndex);
float p30 = float(maxIndex + 1);
float p31 = getHistogram(featureIndex, next);
float d1 = (p30-p20)*(p30-p10);
float d2 = (p10-p20)*(p30-p10);
float d3 = p10-p20;
// If any of the denominators are zero then, just use maxIndex.
float fbin = float(maxIndex);
if ( abs(d1) > 0.00001 && abs(d2) > 0.00001 && abs(d3) > 0.00001) {
float a = p10*p10;
float b = p20*p20;
// Solve for the coefficients A,B,C
float A = ((p31-p21)/d1)-((p11-p21)/d2);
float B = ((p11-p21)+(A*(b-a)))/d3;
float C = p11-(A*a)-(B*p10);
fbin = -B / (2. * A);
}
float an = 2.0 *${Math.PI} * (fbin + 0.5) / ${ORIENTATION_NUM_BINS}. - ${Math.PI};
setOutput(an);
}
`
}
this.kernelCaches.computeExtremaAngles = kernel;
} */
return tf.tidy(() => {
/* const program = this.kernelCaches.computeExtremaAngles;
return this._compileAndRun(program, [histograms]); */
return tf.engine().runKernel("ComputeExtremaAngles", { histograms });
});
}
// TODO: maybe can try just using average momentum, instead of histogram method. histogram might be overcomplicated
/**
*
* @param {tf.Tensor<tf.Rank>} prunedExtremasT
* @param {tf.Tensor<tf.Rank>[]} pyramidImagesT
* @returns
*/
_computeOrientationHistograms(prunedExtremasT, pyramidImagesT) {
const oneOver2PI = 0.159154943091895;
const gaussianImagesT = [];
for (let i = 1; i < pyramidImagesT.length; i++) {
gaussianImagesT.push(pyramidImagesT[i][1]);
}
if (!this.tensorCaches.orientationHistograms) {
tf.tidy(() => {
const gwScale = -1.0 / (2 * ORIENTATION_GAUSSIAN_EXPANSION_FACTOR * ORIENTATION_GAUSSIAN_EXPANSION_FACTOR);
const radius = ORIENTATION_GAUSSIAN_EXPANSION_FACTOR * ORIENTATION_REGION_EXPANSION_FACTOR;
const radiusCeil = Math.ceil(radius);
const radialProperties = [];
for (let y = -radiusCeil; y <= radiusCeil; y++) {
for (let x = -radiusCeil; x <= radiusCeil; x++) {
const distanceSquare = x * x + y * y;
// may just assign w = 1 will do, this could be over complicated.
if (distanceSquare <= radius * radius) {
const _x = distanceSquare * gwScale;
// fast expontenial approx
let w = (720 + _x * (720 + _x * (360 + _x * (120 + _x * (30 + _x * (6 + _x)))))) * 0.0013888888;
radialProperties.push([y, x, w]);
}
}
}
this.tensorCaches.orientationHistograms = {
radialPropertiesT: tf.keep(tf.tensor(radialProperties, [radialProperties.length, 3])),
}
});
}
const { radialPropertiesT } = this.tensorCaches.orientationHistograms;
/* if (!this.kernelCaches.computeOrientationHistograms) {
const imageVariableNames = [];
for (let i = 1; i < pyramidImagesT.length; i++) {
imageVariableNames.push('image' + i);
}
let kernel1SubCodes = `float getPixel(int octave, int y, int x) {`;
for (let i = 1; i < pyramidImagesT.length; i++) {
kernel1SubCodes += `
if (octave == ${i}) {
return getImage${i}(y, x);
}
`
}
kernel1SubCodes += `}`;
const kernel1 = {
variableNames: [...imageVariableNames, 'extrema', 'radial'],
outputShape: [prunedExtremasT.shape[0], radialPropertiesT.shape[0], 2], // last dimension: [fbin, magnitude]
userCode: `
${kernel1SubCodes}
void main() {
ivec3 coords = getOutputCoords();
int featureIndex = coords[0];
int radialIndex = coords[1];
int propertyIndex = coords[2];
int radialY = int(getRadial(radialIndex, 0));
int radialX = int(getRadial(radialIndex, 1));
float radialW = getRadial(radialIndex, 2);
int octave = int(getExtrema(featureIndex, 1));
int y = int(getExtrema(featureIndex, 2));
int x = int(getExtrema(featureIndex, 3));
int xp = x + radialX;
int yp = y + radialY;
float dy = getPixel(octave, yp+1, xp) - getPixel(octave, yp-1, xp);
float dx = getPixel(octave, yp, xp+1) - getPixel(octave, yp, xp-1);
if (propertyIndex == 0) {
// be careful that atan(0, 0) gives 1.57 instead of 0 (different from js), but doesn't matter here, coz magnitude is 0
float angle = atan(dy, dx) + ${Math.PI};
float fbin = angle * ${ORIENTATION_NUM_BINS}. * ${oneOver2PI};
setOutput(fbin);
return;
}
if (propertyIndex == 1) {
float mag = sqrt(dx * dx + dy * dy);
float magnitude = radialW * mag;
setOutput(magnitude);
return;
}
}
`
}
const kernel2 = {
variableNames: ['fbinMag'],
outputShape: [prunedExtremasT.shape[0], ORIENTATION_NUM_BINS],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int featureIndex = coords[0];
int binIndex = coords[1];
float sum = 0.;
for (int i = 0; i < ${radialPropertiesT.shape[0]}; i++) {
float fbin = getFbinMag(featureIndex, i, 0);
int bin = int(floor(fbin - 0.5));
int b1 = imod(bin + ${ORIENTATION_NUM_BINS}, ${ORIENTATION_NUM_BINS});
int b2 = imod(bin + 1 + ${ORIENTATION_NUM_BINS}, ${ORIENTATION_NUM_BINS});
if (b1 == binIndex || b2 == binIndex) {
float magnitude = getFbinMag(featureIndex, i, 1);
float w2 = fbin - float(bin) - 0.5;
float w1 = w2 * -1. + 1.;
if (b1 == binIndex) {
sum += w1 * magnitude;
}
if (b2 == binIndex) {
sum += w2 * magnitude;
}
}
}
setOutput(sum);
}
`
}
this.kernelCaches.computeOrientationHistograms = [kernel1, kernel2];
} */
return tf.tidy(() => {
/* const [program1, program2] = this.kernelCaches.computeOrientationHistograms;
const result1 = this._compileAndRun(program1, [...gaussianImagesT, prunedExtremasT, radialPropertiesT]);
const result2 = this._compileAndRun(program2, [result1]);
return result2;*/
return tf.engine().runKernel('ComputeOrientationHistograms', { gaussianImagesT, prunedExtremasT, radialPropertiesT, pyramidImagesLength: pyramidImagesT.length });
});
}
// The histogram is smoothed with a Gaussian, with sigma = 1
_smoothHistograms(histograms) {
/* if (!this.kernelCaches.smoothHistograms) {
const kernel = {
variableNames: ['histogram'],
outputShape: [histograms.shape[0], ORIENTATION_NUM_BINS],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int featureIndex = coords[0];
int binIndex = coords[1];
int prevBin = imod(binIndex - 1 + ${ORIENTATION_NUM_BINS}, ${ORIENTATION_NUM_BINS});
int nextBin = imod(binIndex + 1, ${ORIENTATION_NUM_BINS});
float result = 0.274068619061197 * getHistogram(featureIndex, prevBin) + 0.451862761877606 * getHistogram(featureIndex, binIndex) + 0.274068619061197 * getHistogram(featureIndex, nextBin);
setOutput(result);
}
`
}
this.kernelCaches.smoothHistograms = kernel;
} */
return tf.tidy(() => {
return tf.engine().runKernel("SmoothHistograms", { histograms });//
/* const program = this.kernelCaches.smoothHistograms;
for (let i = 0; i < ORIENTATION_SMOOTHING_ITERATIONS; i++) {
histograms = this._compileAndRun(program, [histograms]);
}
return histograms; */
});
}
/**
*
* @param {number[][]} prunedExtremasList
* @param {tf.Tensor<tf.Rank>[]} dogPyramidImagesT
* @returns
*/
_computeLocalization(prunedExtremasList, dogPyramidImagesT) {
/* if (!this.kernelCaches.computeLocalization) {
const dogVariableNames = [];
let dogSubCodes = `float getPixel(int octave, int y, int x) {`;
for (let i = 1; i < dogPyramidImagesT.length; i++) { // extrema starts from second octave
dogVariableNames.push('image' + i);
dogSubCodes += `
if (octave == ${i}) {
return getImage${i}(y, x);
}
`;
}
dogSubCodes += `}`;
const kernel = {
variableNames: [...dogVariableNames, 'extrema'],
outputShape: [prunedExtremasList.length, 3, 3], // 3x3 pixels around the extrema
userCode: `
${dogSubCodes}
void main() {
ivec3 coords = getOutputCoords();
int featureIndex = coords[0];
float score = getExtrema(featureIndex, 0);
if (score == 0.0) {
return;
}
int dy = coords[1]-1;
int dx = coords[2]-1;
int octave = int(getExtrema(featureIndex, 1));
int y = int(getExtrema(featureIndex, 2));
int x = int(getExtrema(featureIndex, 3));
setOutput(getPixel(octave, y+dy, x+dx));
}
`
}
this.kernelCaches.computeLocalization = [kernel];
} */
return tf.tidy(() => {
//const program = this.kernelCaches.computeLocalization[0];
//const prunedExtremasT = tf.tensor(prunedExtremasList, [prunedExtremasList.length, prunedExtremasList[0].length], 'int32');
const pixelsT = tf.engine().runKernel('ComputeLocalization', { prunedExtremasList, dogPyramidImagesT });//this._compileAndRun(program, [...dogPyramidImagesT.slice(1), prunedExtremasT]);
const pixels = pixelsT.arraySync();
const result = [];
for (let i = 0; i < pixels.length; i++) {
result.push([]);
for (let j = 0; j < pixels[i].length; j++) {
result[i].push([]);
}
}
const localizedExtremas = [];
for (let i = 0; i < prunedExtremasList.length; i++) {
localizedExtremas[i] = [
prunedExtremasList[i][0],
prunedExtremasList[i][1],
prunedExtremasList[i][2],
prunedExtremasList[i][3],
];
}
for (let i = 0; i < localizedExtremas.length; i++) {
if (localizedExtremas[i][0] === 0) {
continue;
}
const pixel = pixels[i];
const dx = 0.5 * (pixel[1][2] - pixel[1][0]);
const dy = 0.5 * (pixel[2][1] - pixel[0][1]);
const dxx = pixel[1][2] + pixel[1][0] - 2 * pixel[1][1];
const dyy = pixel[2][1] + pixel[0][1] - 2 * pixel[1][1];
const dxy = 0.25 * (pixel[0][0] + pixel[2][2] - pixel[0][2] - pixel[2][0]);
const det = dxx * dyy - dxy * dxy;
const ux = (dyy * -dx + -dxy * -dy) / det;
const uy = (-dxy * -dx + dxx * -dy) / det;
const newY = localizedExtremas[i][2] + uy;
const newX = localizedExtremas[i][3] + ux;
if (Math.abs(det) < 0.0001) {
continue;
}
localizedExtremas[i][2] = newY;
localizedExtremas[i][3] = newX;
}
return tf.tensor(localizedExtremas, [localizedExtremas.length, localizedExtremas[0].length], 'float32');
});
}
// faster to do it in CPU
// if we do in gpu, we probably need to use tf.topk(), which seems to be run in CPU anyway (no gpu operation for that)
// TODO: research adapative maximum supression method
/**
*
* @param {tf.Tensor<tf.Rank>[]} extremasResultsT
* @returns
*/
_applyPrune(extremasResultsT) {
const nBuckets = NUM_BUCKETS_PER_DIMENSION * NUM_BUCKETS_PER_DIMENSION;
const nFeatures = MAX_FEATURES_PER_BUCKET;
/*
if (!this.kernelCaches.applyPrune) {
const reductionKernels = [];
// to reduce to amount of data that need to sync back to CPU by 4 times, we apply this trick:
// the fact that there is not possible to have consecutive maximum/minimum, we can safe combine 4 pixels into 1
for (let k = 0; k < extremasResultsT.length; k++) {
const extremaHeight = extremasResultsT[k].shape[0];
const extremaWidth = extremasResultsT[k].shape[1];
const kernel = {
variableNames: ['extrema'],
outputShape: [Math.floor(extremaHeight/2), Math.floor(extremaWidth/2)],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int y = coords[0] * 2;
int x = coords[1] * 2;
float location = 0.0;
float values = getExtrema(y, x);
if (getExtrema(y+1, x) != 0.0) {
location = 1.0;
values = getExtrema(y+1, x);
}
else if (getExtrema(y, x+1) != 0.0) {
location = 2.0;
values = getExtrema(y, x+1);
}
else if (getExtrema(y+1, x+1) != 0.0) {
location = 3.0;
values = getExtrema(y+1, x+1);
}
if (values < 0.0) {
setOutput(location * -1000.0 + values);
} else {
setOutput(location * 1000.0 + values);
}
}
`
}
reductionKernels.push(kernel);
}
this.kernelCaches.applyPrune = {reductionKernels};
}
*/
// combine results into a tensor of:
// nBuckets x nFeatures x [score, octave, y, x]
const curAbsScores = [];
/** @type {number[][][]} */
const result = [];
for (let i = 0; i < nBuckets; i++) {
result.push([]);
curAbsScores.push([]);
for (let j = 0; j < nFeatures; j++) {
result[i].push([0, 0, 0, 0]);
curAbsScores[i].push(0);
}
}
tf.tidy(() => {
//const {reductionKernels} = this.kernelCaches.applyPrune;
for (let k = 0; k < extremasResultsT.length; k++) {
//const program = reductionKernels[k];
//const reducedT = this._compileAndRun(program, [extremasResultsT[k]]);
const reducedT = tf.engine().runKernel('ExtremaReduction', { extremasResultT: extremasResultsT[k] });
const octave = k + 1; // extrema starts from second octave
const reduced = reducedT.arraySync();
const height = reducedT.shape[0];
const width = reducedT.shape[1];
const bucketWidth = width * 2 / NUM_BUCKETS_PER_DIMENSION;
const bucketHeight = height * 2 / NUM_BUCKETS_PER_DIMENSION;
for (let j = 0; j < height; j++) {
for (let i = 0; i < width; i++) {
const encoded = reduced[j][i];
if (encoded == 0) continue;
const score = encoded % 1000;
const loc = Math.floor(Math.abs(encoded) / 1000);
const x = i * 2 + (loc === 2 || loc === 3 ? 1 : 0);
const y = j * 2 + (loc === 1 || loc === 3 ? 1 : 0);
const bucketX = Math.floor(x / bucketWidth);
const bucketY = Math.floor(y / bucketHeight);
const bucket = bucketY * NUM_BUCKETS_PER_DIMENSION + bucketX;
const absScore = Math.abs(score);
let tIndex = nFeatures;
while (tIndex >= 1 && absScore > curAbsScores[bucket][tIndex - 1]) {
tIndex -= 1;
}
if (tIndex < nFeatures) {
for (let t = nFeatures - 1; t >= tIndex + 1; t--) {
curAbsScores[bucket][t] = curAbsScores[bucket][t - 1];
result[bucket][t][0] = result[bucket][t - 1][0];
result[bucket][t][1] = result[bucket][t - 1][1];
result[bucket][t][2] = result[bucket][t - 1][2];
result[bucket][t][3] = result[bucket][t - 1][3];
}
curAbsScores[bucket][tIndex] = absScore;
result[bucket][tIndex][0] = score;
result[bucket][tIndex][1] = octave;
result[bucket][tIndex][2] = y;
result[bucket][tIndex][3] = x;
}
}//for j<height
}//for i<width
}
});
// combine all buckets into a single list
const list = [];
for (let i = 0; i < nBuckets; i++) {
for (let j = 0; j < nFeatures; j++) {
list.push(result[i][j]);
}
}
return list;
}
_buildExtremas(image0, image1, image2) {
/* const imageHeight = image1.shape[0];
const imageWidth = image1.shape[1];
const kernelKey = 'w' + imageWidth;
if (!this.kernelCaches.buildExtremas) {
this.kernelCaches.buildExtremas = {};
}
if (!this.kernelCaches.buildExtremas[kernelKey]) {
const kernel = {
variableNames: ['image0', 'image1', 'image2'],
outputShape: [imageHeight, imageWidth],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int y = coords[0];
int x = coords[1];
float value = getImage1(y, x);
// Step 1: find local maxima/minima
if (value * value < ${LAPLACIAN_SQR_THRESHOLD}.) {
setOutput(0.);
return;
}
if (y < ${FREAK_EXPANSION_FACTOR} || y > ${imageHeight - 1 - FREAK_EXPANSION_FACTOR}) {
setOutput(0.);
return;
}
if (x < ${FREAK_EXPANSION_FACTOR} || x > ${imageWidth - 1 - FREAK_EXPANSION_FACTOR}) {
setOutput(0.);
return;
}
bool isMax = true;
bool isMin = true;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
float value0 = getImage0(y+dy, x+dx);
float value1 = getImage1(y+dy, x+dx);
float value2 = getImage2(y+dy, x+dx);
if (value < value0 || value < value1 || value < value2) {
isMax = false;
}
if (value > value0 || value > value1 || value > value2) {
isMin = false;
}
}
}
if (!isMax && !isMin) {
setOutput(0.);
return;
}
// compute edge score and reject based on threshold
float dxx = getImage1(y, x+1) + getImage1(y, x-1) - 2. * getImage1(y, x);
float dyy = getImage1(y+1, x) + getImage1(y-1, x) - 2. * getImage1(y, x);
float dxy = 0.25 * (getImage1(y-1,x-1) + getImage1(y+1,x+1) - getImage1(y-1,x+1) - getImage1(y+1,x-1));
float det = (dxx * dyy) - (dxy * dxy);
if (abs(det) < 0.0001) { // determinant undefined. no solution
setOutput(0.);
return;
}
float edgeScore = (dxx + dyy) * (dxx + dyy) / det;
if (abs(edgeScore) >= ${EDGE_HESSIAN_THRESHOLD} ) {
setOutput(0.);
return;
}
setOutput(getImage1(y,x));
}
`
};
this.kernelCaches.buildExtremas[kernelKey] = kernel;
} */
return tf.tidy(() => {
return tf.engine().runKernel('BuildExtremas', { image0, image1, image2 });
/* const program = this.kernelCaches.buildExtremas[kernelKey];
image0 = this._downsampleBilinear(image0);
image2 = this._upsampleBilinear(image2, image1); */
//this._compileAndRun(program, [image0, image1, image2]);
//return this._runWebGLProgram(program, [image0, image1, image2], 'float32');
});
}
/**
*
* @param {tf.Tensor<tf.Rank>} image1
* @param {tf.Tensor<tf.Rank>} image2
* @returns
*/
_differenceImageBinomial(image1, image2) {
return tf.tidy(() => {
return image1.sub(image2);
});
}
// 4th order binomail filter [1,4,6,4,1] X [1,4,6,4,1]
_applyFilter(image) {
/* const imageHeight = image.shape[0];
const imageWidth = image.shape[1];
const kernelKey = 'w' + imageWidth;
if (!this.kernelCaches.applyFilter) {
this.kernelCaches.applyFilter = {};
}
if (!this.kernelCaches.applyFilter[kernelKey]) {
const kernel1 = {
variableNames: ['p'],
outputShape: [imageHeight, imageWidth],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
float sum = getP(coords[0], coords[1]-2);
sum += getP(coords[0], coords[1]-1) * 4.;
sum += getP(coords[0], coords[1]) * 6.;
sum += getP(coords[0], coords[1]+1) * 4.;
sum += getP(coords[0], coords[1]+2);
setOutput(sum);
}
`
};
const kernel2 = {
variableNames: ['p'],
outputShape: [imageHeight, imageWidth],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
float sum = getP(coords[0]-2, coords[1]);
sum += getP(coords[0]-1, coords[1]) * 4.;
sum += getP(coords[0], coords[1]) * 6.;
sum += getP(coords[0]+1, coords[1]) * 4.;
sum += getP(coords[0]+2, coords[1]);
sum /= 256.;
setOutput(sum);
}
`
};
this.kernelCaches.applyFilter[kernelKey] = [kernel1, kernel2];
}
*/
return tf.tidy(() => {
/* const [program1, program2] = this.kernelCaches.applyFilter[kernelKey];
const result1 = this._compileAndRun(program1, [image]);
const result2 = this._compileAndRun(program2, [result1]);
return result2; */
return tf.engine().runKernel('BinomialFilter', { image });
});
}
/* _upsampleBilinear(image, targetImage) {
const imageHeight = image.shape[0];
const imageWidth = image.shape[1];
const kernelKey = 'w' + imageWidth;
if (!this.kernelCaches.upsampleBilinear) {
this.kernelCaches.upsampleBilinear = {};
}
if (!this.kernelCaches.upsampleBilinear[kernelKey]) {
const kernel = {
variableNames: ['p'],
outputShape: [targetImage.shape[0], targetImage.shape[1]],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int j = coords[0];
int i = coords[1];
float sj = 0.5 * float(j) - 0.25;
float si = 0.5 * float(i) - 0.25;
float sj0 = floor(sj);
float sj1 = ceil(sj);
float si0 = floor(si);
float si1 = ceil(si);
int sj0I = int(sj0);
int sj1I = int(sj1);
int si0I = int(si0);
int si1I = int(si1);
float sum = 0.0;
sum += getP(sj0I, si0I) * (si1 - si) * (sj1 - sj);
sum += getP(sj1I, si0I) * (si1 - si) * (sj - sj0);
sum += getP(sj0I, si1I) * (si - si0) * (sj1 - sj);
sum += getP(sj1I, si1I) * (si - si0) * (sj - sj0);
setOutput(sum);
}
`
};
this.kernelCaches.upsampleBilinear[kernelKey] = kernel;
}
return tf.tidy(() => {
const program = this.kernelCaches.upsampleBilinear[kernelKey];
return tf.engine().runKernel("UpsampleBilinear", { x: image, width: image.shape[1], height: image.shape[0] });//this._compileAndRun(program, [image]);
});
} */
_downsampleBilinear(image) {
/* const imageHeight = image.shape[0];
const imageWidth = image.shape[1];
const kernelKey = 'w' + imageWidth;
if (!this.kernelCaches.downsampleBilinear) {
this.kernelCaches.downsampleBilinear = {};
}
if (!this.kernelCaches.downsampleBilinear[kernelKey]) {
const kernel = {
variableNames: ['p'],
outputShape: [Math.floor(imageHeight / 2), Math.floor(imageWidth / 2)],
userCode: `
void main() {
ivec2 coords = getOutputCoords();
int y = coords[0] * 2;
int x = coords[1] * 2;
float sum = getP(y, x) * 0.25;
sum += getP(y+1,x) * 0.25;
sum += getP(y, x+1) * 0.25;
sum += getP(y+1,x+1) * 0.25;
setOutput(sum);
}
`
};
this.kernelCaches.downsampleBilinear[kernelKey] = kernel;
} */
return tf.tidy(() => {
//const program = this.kernelCaches.downsampleBilinear[kernelKey];
return tf.engine().runKernel("DownsampleBilinear", { image });//this._compileAndRun(program, [image]);
});
}
/**
*
* @param {tf.MathBackendWebGL.GPGPUProgram} program
* @param {*} inputs
* @returns
*/
_compileAndRun(program, inputs) {
const outInfo = tf.backend().compileAndRun(program, inputs);
return tf.engine().makeTensorFromDataId(outInfo.dataId, outInfo.shape, outInfo.dtype);
}
_runWebGLProgram(program, inputs, outputType) {
const outInfo = tf.backend().runWebGLProgram(program, inputs, outputType);
return tf.engine().makeTensorFromDataId(outInfo.dataId, outInfo.shape, outInfo.dtype);
}
}
export {
Detector
};