mind-ar
Version:
web augmented reality framework
335 lines (281 loc) • 11.7 kB
JavaScript
import {Cumsum} from '../utils/cumsum.js';
const SEARCH_SIZE1 = 10;
const SEARCH_SIZE2 = 2;
//const TEMPLATE_SIZE = 22 // DEFAULT
const TEMPLATE_SIZE = 6;
const TEMPLATE_SD_THRESH = 5.0;
const MAX_SIM_THRESH = 0.95;
const MAX_THRESH = 0.9;
//const MIN_THRESH = 0.55;
const MIN_THRESH = 0.2;
const SD_THRESH = 8.0;
const OCCUPANCY_SIZE = 24 * 2 / 3;
/*
* Input image is in grey format. the imageData array size is width * height. value range from 0-255
* pixel value at row r and c = imageData[r * width + c]
*
* @param {Uint8Array} options.imageData
* @param {int} options.width image width
* @param {int} options.height image height
*/
const extract = (image) => {
const {data: imageData, width, height, scale} = image;
// Step 1 - filter out interesting points. Interesting points have strong pixel value changed across neighbours
const isPixelSelected = [width * height];
for (let i = 0; i < isPixelSelected.length; i++) isPixelSelected[i] = false;
// Step 1.1 consider a pixel at position (x, y). compute:
// dx = ((data[x+1, y-1] - data[x-1, y-1]) + (data[x+1, y] - data[x-1, y]) + (data[x+1, y+1] - data[x-1, y-1])) / 256 / 3
// dy = ((data[x+1, y+1] - data[x+1, y-1]) + (data[x, y+1] - data[x, y-1]) + (data[x-1, y+1] - data[x-1, y-1])) / 256 / 3
// dValue = sqrt(dx^2 + dy^2) / 2;
const dValue = new Float32Array(imageData.length);
for (let i = 0; i < width; i++) {
dValue[i] = -1;
dValue[width * (height-1) + i] = -1;
}
for (let j = 0; j < height; j++) {
dValue[j*width] = -1;
dValue[j*width + width-1] = -1;
}
for (let i = 1; i < width-1; i++) {
for (let j = 1; j < height-1; j++) {
let pos = i + width * j;
let dx = 0.0;
let dy = 0.0;
for (let k = -1; k <= 1; k++) {
dx += (imageData[pos + width*k + 1] - imageData[pos + width*k -1]);
dy += (imageData[pos + width + k] - imageData[pos - width + k]);
}
dx /= (3 * 256);
dy /= (3 * 256);
dValue[pos] = Math.sqrt( (dx * dx + dy * dy) / 2);
}
}
// Step 1.2 - select all pixel which is dValue largest than all its neighbour as "potential" candidate
// the number of selected points is still too many, so we use the value to further filter (e.g. largest the dValue, the better)
const dValueHist = new Uint32Array(1000); // histogram of dvalue scaled to [0, 1000)
for (let i = 0; i < 1000; i++) dValueHist[i] = 0;
const neighbourOffsets = [-1, 1, -width, width];
let allCount = 0;
for (let i = 1; i < width-1; i++) {
for (let j = 1; j < height-1; j++) {
let pos = i + width * j;
let isMax = true;
for (let d = 0; d < neighbourOffsets.length; d++) {
if (dValue[pos] <= dValue[pos + neighbourOffsets[d]]) {
isMax = false;
break;
}
}
if (isMax) {
let k = Math.floor(dValue[pos] * 1000);
if (k > 999) k = 999; // k>999 should not happen if computaiton is correction
if (k < 0) k = 0; // k<0 should not happen if computaiton is correction
dValueHist[k] += 1;
allCount += 1;
isPixelSelected[pos] = true;
}
}
}
// reduce number of points according to dValue.
// actually, the whole Step 1. might be better to just sort the dvalues and pick the top (0.02 * width * height) points
const maxPoints = 0.02 * width * height;
let k = 999;
let filteredCount = 0;
while (k >= 0) {
filteredCount += dValueHist[k];
if (filteredCount > maxPoints) break;
k--;
}
//console.log("image size: ", width * height);
//console.log("extracted featues: ", allCount);
//console.log("filtered featues: ", filteredCount);
for (let i = 0; i < isPixelSelected.length; i++) {
if (isPixelSelected[i]) {
if (dValue[i] * 1000 < k) isPixelSelected[i] = false;
}
}
//console.log("selected count: ", isPixelSelected.reduce((a, b) => {return a + (b?1:0);}, 0));
// Step 2
// prebuild cumulative sum matrix for fast computation
const imageDataSqr = [];
for (let i = 0; i < imageData.length; i++) {
imageDataSqr[i] = imageData[i] * imageData[i];
}
const imageDataCumsum = new Cumsum(imageData, width, height);
const imageDataSqrCumsum = new Cumsum(imageDataSqr, width, height);
// holds the max similariliy value computed within SEARCH area of each pixel
// idea: if there is high simliarity with another pixel in nearby area, then it's not a good feature point
// next step is to find pixel with low similarity
const featureMap = new Float32Array(imageData.length);
for (let i = 0; i < width; i++) {
for (let j = 0; j < height; j++) {
const pos = j * width + i;
if (!isPixelSelected[pos]) {
featureMap[pos] = 1.0;
continue;
}
const vlen = _templateVar({image, cx: i, cy: j, sdThresh: TEMPLATE_SD_THRESH, imageDataCumsum, imageDataSqrCumsum});
if (vlen === null) {
featureMap[pos] = 1.0;
continue;
}
let max = -1.0;
for (let jj = -SEARCH_SIZE1; jj <= SEARCH_SIZE1; jj++) {
for (let ii = -SEARCH_SIZE1; ii <= SEARCH_SIZE1; ii++) {
if (ii * ii + jj * jj <= SEARCH_SIZE2 * SEARCH_SIZE2) continue;
const sim = _getSimilarity({image, cx: i+ii, cy: j+jj, vlen: vlen, tx: i, ty: j, imageDataCumsum, imageDataSqrCumsum});
if (sim === null) continue;
if (sim > max) {
max = sim;
if (max > MAX_SIM_THRESH) break;
}
}
if (max > MAX_SIM_THRESH) break;
}
featureMap[pos] = max;
}
}
// Step 2.2 select feature
const coords = _selectFeature({image, featureMap, templateSize: TEMPLATE_SIZE, searchSize: SEARCH_SIZE2, occSize: OCCUPANCY_SIZE, maxSimThresh: MAX_THRESH, minSimThresh: MIN_THRESH, sdThresh: SD_THRESH, imageDataCumsum, imageDataSqrCumsum});
return coords;
}
const _selectFeature = (options) => {
let {image, featureMap, templateSize, searchSize, occSize, maxSimThresh, minSimThresh, sdThresh, imageDataCumsum, imageDataSqrCumsum} = options;
const {data: imageData, width, height, scale} = image;
//console.log("params: ", templateSize, templateSize, occSize, maxSimThresh, minSimThresh, sdThresh);
//occSize *= 2;
occSize = Math.floor(Math.min(image.width, image.height) / 10);
const divSize = (templateSize * 2 + 1) * 3;
const xDiv = Math.floor(width / divSize);
const yDiv = Math.floor(height / divSize);
let maxFeatureNum = Math.floor(width / occSize) * Math.floor(height / occSize) + xDiv * yDiv;
//console.log("max feature num: ", maxFeatureNum);
const coords = [];
const image2 = new Float32Array(imageData.length);
for (let i = 0; i < image2.length; i++) {
image2[i] = featureMap[i];
}
let num = 0;
while (num < maxFeatureNum) {
let minSim = maxSimThresh;
let cx = -1;
let cy = -1;
for (let j = 0; j < height; j++) {
for (let i = 0; i < width; i++) {
if (image2[j*width+i] < minSim) {
minSim = image2[j*width+i];
cx = i;
cy = j;
}
}
}
if (cx === -1) break;
const vlen = _templateVar({image, cx: cx, cy: cy, sdThresh: 0, imageDataCumsum, imageDataSqrCumsum});
if (vlen === null) {
image2[ cy * width + cx ] = 1.0;
continue;
}
if (vlen / (templateSize * 2 + 1) < sdThresh) {
image2[ cy * width + cx ] = 1.0;
continue;
}
let min = 1.0;
let max = -1.0;
for (let j = -searchSize; j <= searchSize; j++) {
for (let i = -searchSize; i <= searchSize; i++) {
if (i*i + j*j > searchSize * searchSize) continue;
if (i === 0 && j === 0) continue;
const sim = _getSimilarity({image, vlen, cx: cx+i, cy: cy+j, tx: cx, ty:cy, imageDataCumsum, imageDataSqrCumsum});
if (sim === null) continue;
if (sim < min) {
min = sim;
if (min < minSimThresh && min < minSim) break;
}
if (sim > max) {
max = sim;
if (max > 0.99) break;
}
}
if( (min < minSimThresh && min < minSim) || max > 0.99 ) break;
}
if( (min < minSimThresh && min < minSim) || max > 0.99 ) {
image2[ cy * width + cx ] = 1.0;
continue;
}
coords.push({x: cx, y: cy});
//coords.push({
//mx: 1.0 * cx / scale,
//my: 1.0 * (height - cy) / scale,
//})
num += 1;
//console.log(num, '(', cx, ',', cy, ')', minSim, 'min = ', min, 'max = ', max, 'sd = ', vlen/(templateSize*2+1));
// no other feature points within occSize square
for (let j = -occSize; j <= occSize; j++) {
for (let i = -occSize; i <= occSize; i++) {
if (cy + j < 0 || cy + j >= height || cx + i < 0 || cx + i >= width) continue;
image2[ (cy+j)*width + (cx+i) ] = 1.0;
}
}
}
return coords;
}
// compute variances of the pixels, centered at (cx, cy)
const _templateVar = ({image, cx, cy, sdThresh, imageDataCumsum, imageDataSqrCumsum}) => {
if (cx - TEMPLATE_SIZE < 0 || cx + TEMPLATE_SIZE >= image.width) return null;
if (cy - TEMPLATE_SIZE < 0 || cy + TEMPLATE_SIZE >= image.height) return null;
const templateWidth = 2 * TEMPLATE_SIZE + 1;
const nPixels = templateWidth * templateWidth;
let average = imageDataCumsum.query(cx - TEMPLATE_SIZE, cy - TEMPLATE_SIZE, cx + TEMPLATE_SIZE, cy+TEMPLATE_SIZE);
average /= nPixels;
//v = sum((pixel_i - avg)^2) for all pixel i within the template
// = sum(pixel_i^2) - sum(2 * avg * pixel_i) + sum(avg^avg)
let vlen = imageDataSqrCumsum.query(cx - TEMPLATE_SIZE, cy - TEMPLATE_SIZE, cx + TEMPLATE_SIZE, cy+TEMPLATE_SIZE);
vlen -= 2 * average * imageDataCumsum.query(cx - TEMPLATE_SIZE, cy - TEMPLATE_SIZE, cx + TEMPLATE_SIZE, cy+TEMPLATE_SIZE);
vlen += nPixels * average * average;
if (vlen / nPixels < sdThresh * sdThresh) return null;
vlen = Math.sqrt(vlen);
return vlen;
}
const _getSimilarity = (options) => {
const {image, cx, cy, vlen, tx, ty, imageDataCumsum, imageDataSqrCumsum} = options;
const {data: imageData, width, height} = image;
const templateSize = TEMPLATE_SIZE;
if (cx - templateSize < 0 || cx + templateSize >= width) return null;
if (cy - templateSize < 0 || cy + templateSize >= height) return null;
const templateWidth = 2 * templateSize + 1;
let sx = imageDataCumsum.query(cx-templateSize, cy-templateSize, cx+templateSize, cy+templateSize);
let sxx = imageDataSqrCumsum.query(cx-templateSize, cy-templateSize, cx+templateSize, cy+templateSize);
let sxy = 0;
// !! This loop is the performance bottleneck. Use moving pointers to optimize
//
// for (let i = cx - templateSize, i2 = tx - templateSize; i <= cx + templateSize; i++, i2++) {
// for (let j = cy - templateSize, j2 = ty - templateSize; j <= cy + templateSize; j++, j2++) {
// sxy += imageData[j*width + i] * imageData[j2*width + i2];
// }
// }
//
let p1 = (cy-templateSize) * width + (cx-templateSize);
let p2 = (ty-templateSize) * width + (tx-templateSize);
let nextRowOffset = width - templateWidth;
for (let j = 0; j < templateWidth; j++) {
for (let i = 0; i < templateWidth; i++) {
sxy += imageData[p1] * imageData[p2];
p1 +=1;
p2 +=1;
}
p1 += nextRowOffset;
p2 += nextRowOffset;
}
let templateAverage = imageDataCumsum.query(tx-templateSize, ty-templateSize, tx+templateSize, ty+templateSize);
templateAverage /= templateWidth * templateWidth;
sxy -= templateAverage * sx;
let vlen2 = sxx - sx*sx / (templateWidth * templateWidth);
if (vlen2 == 0) return null;
vlen2 = Math.sqrt(vlen2);
// covariance between template and current pixel
const sim = 1.0 * sxy / (vlen * vlen2);
return sim;
}
export {
extract
};