speedy-vision
Version:
GPU-accelerated Computer Vision for JavaScript
573 lines (501 loc) • 22 kB
JavaScript
/*
* speedy-vision.js
* GPU-accelerated Computer Vision for JavaScript
* Copyright 2020-2022 Alexandre Martins <alemartf(at)gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* speedy-lsh.js
* GPU-based LSH tables for fast matching of binary descriptors
*/
import { SpeedyTexture, SpeedyDrawableTexture } from './speedy-texture';
import { SpeedyDescriptorDB } from './speedy-descriptordb';
import { Utils } from '../utils/utils';
import { MATCH_MAX_INDEX } from '../utils/globals';
/*
* ALE'S GPU-BASED LSH FOR APPROXIMATE KNN MATCHING
* ------------------------------------------------
*
* Here is my variant of Locality Sensitive Hashing for GPU-based KNN matching!
* Indices of keypoint descriptors are stored in several tables, each with many
* buckets of fixed capacity. In a nutshell, I create a data structure of fixed
* size to match the keypoints.
*
* Buckets in video memory may get full. Wouldn't it be cool if we could use a
* probabilistic approach to let us work within their storage capacity?
*
* Let there be n buckets in a table, each with storage capacity c (holding
* up to c elements). Buckets are numbered from 0 to n-1.
*
* We pick uniformly a random bucket to store a new element in the table. Let
* X be the chosen bucket. The probability that we'll store the new element in
* any particular bucket k is:
*
* P(X = k) = 1/n (k = 0, 1, 2, ... n-1)
*
* On average, each new element stored in the table inserts 1/n of an element
* in each bucket. If we add m new elements to the table, each bucket receives
* m/n elements, on average(*).
*
* (*) for all k, define the Ik random variable as 1 if X = k and 0 otherwise.
* It follows that the expected value of Ik, E(Ik), is 1/n for all k. In
* addition, the expected value of (m Ik) is m * E(ik) = m/n.
*
* Now let Yi be the number of elements inserted in bucket i in m additions to
* the table. We model Yi as Poisson(m/n), since on average, m additions to
* the table result in m/n new elements being inserted in bucket i. Buckets
* are picked independently. Hence, for all i, the probability that we insert
* q elements in bucket i in m additions to the table is:
*
* P(Yi = q) = (m/n)^q * exp(-m/n) / q! (q = 0, 1, 2...)
*
* Given that each bucket has storage capacity c, we require Yi <= c with a
* high probability p (say, p = 0.99). This means that, in m additions, we
* don't want to exceed the capacity c with high probability. So, let us find
* a (large) value of m such that:
*
* P(Yi <= c) >= p
*
* Sounds good! We can find the largest matching m using binary search.
*
* I don't think we need to enforce a high probability that ALL buckets stay
* within their capacity - n is large, we need to use the available space, and
* we have multiple tables anyway.
*
* In practice, the assumption that buckets are picked uniformly doesn't hold:
* keypoints that are nearby tend to have similar descriptors and buckets are
* picked according to those descriptors. Still, this model works well enough
* in practice and it is simple! That's what I like about it!
*
* ... now, how I actually do the matching is the theme of the next episode!
*/
/** @type {number} Default number of tables in a LSH data structure */
export const LSH_DEFAULT_NUMBER_OF_TABLES = 8;
/** @type {number} Default number of bits of a hash */
export const LSH_DEFAULT_HASH_SIZE = 15;
/** @type {number[]} Acceptable number of tables for a LSH data structure */
export const LSH_ACCEPTABLE_NUMBER_OF_TABLES = [4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32];
/** @type {number[]} Acceptable values for hashSize, in bits */
export const LSH_ACCEPTABLE_HASH_SIZES = [10,11,12,13,14,15,16,17,18,19,20];
/** @type {number[]} Acceptable sizes for keypoint descriptors, in bytes */
export const LSH_ACCEPTABLE_DESCRIPTOR_SIZES = [32,64];
/**
* @typedef {Object} LSHProfile LSH profile
* @property {string} name name of the profile
* @property {number} capacity maximum number of keypoints that can be stored in such a table
* @property {number} hashSize number of bits in a keypoint descriptor hash (at most 16)
* @property {number} tableCount number of tables, preferably a power of 2 (at most 16)
* @property {number} bucketCapacity maximum number of entries of a bucket of a table
*/
/** @type {function(number,number,number):LSHProfile[]|null} generate LSH profiles sorted by increasing capacity */
const generateLSHProfiles = (t,h,p) => !LSH_ACCEPTABLE_HASH_SIZES.includes(h) || !LSH_ACCEPTABLE_NUMBER_OF_TABLES.includes(t) ? null : [
{
name: 'x-small',
bucketCapacity: 1,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 1, p),
},
{
name: 'small',
bucketCapacity: 2,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 2, p),
},
{
name: 'small-plus',
bucketCapacity: 3,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 3, p),
},
{
name: 'medium',
bucketCapacity: 4,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 4, p),
},
{
name: 'medium-plus',
bucketCapacity: 5,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 5, p),
},
{
name: 'large',
bucketCapacity: 6,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 6, p),
},
{
name: 'x-large',
bucketCapacity: 8,
tableCount: t,
hashSize: h,
capacity: findTableCapacity(h, 8, p),
},
];
//
// LSH hash sequences: random bits in increasing order
// We generate a few sequences (one for each table) supporting up to 16 hash bits
// We pad each sequence with invalid values at the end - we want to pick any bit with equal probability
//
/** @typedef {Uint32Array} BitSequences flattened array of LSH_SEQUENCE_COUNT sequences of LSH_SEQUENCE_MAXLEN elements each - each entry represents a bit index */
/** @typedef {Object<number,BitSequences>} BitSequencesIndexedByDescriptorSize */
/** @typedef {Object<number,BitSequencesIndexedByDescriptorSize>} LSHSequences */
/** @type {number} maximum number of elements of a sequence */
export const LSH_SEQUENCE_MAXLEN = Math.max(...LSH_ACCEPTABLE_HASH_SIZES);
/** @type {number} number of sequences in a BitSequences object */
export const LSH_SEQUENCE_COUNT = Math.max(...LSH_ACCEPTABLE_NUMBER_OF_TABLES);
/** @type {function(BitSequences): BitSequences} Sort subsequences of random bits in ascending order */
const partitionedSort = seq => (Utils.range(LSH_SEQUENCE_COUNT)
.forEach(i => seq.subarray(i * LSH_SEQUENCE_MAXLEN, (i+1) * LSH_SEQUENCE_MAXLEN).sort()),
seq);
/** @type {function(number, BitSequences): BitSequences} Set the last p entries of the input subsequences to an invalid value */
const padSequences = (p, seq) => (Utils.range(LSH_SEQUENCE_COUNT)
.forEach(i => seq.subarray((i+1) * LSH_SEQUENCE_MAXLEN - p, (i+1) * LSH_SEQUENCE_MAXLEN).fill(0xBADCAFE)),
seq);
/** @type {LSHSequences} the bits we pick to form the hashes, laid out in ascending order and indexed by descriptorSize and hashSize */
const LSH_SEQUENCES = (f => LSH_ACCEPTABLE_HASH_SIZES.reduce((p,o) => ((p[o]=f(o)), p), {}))(h => ({
// for 256-bit descriptors
32: partitionedSort(padSequences(LSH_SEQUENCE_MAXLEN - h, new Uint32Array([
...(Utils.shuffle(Utils.range(256))),
...(Utils.shuffle(Utils.range(256))),
...(Utils.shuffle(Utils.range(256))),
].slice(0, LSH_SEQUENCE_COUNT * LSH_SEQUENCE_MAXLEN)))),
// for 512-bit descriptors
64: partitionedSort(padSequences(LSH_SEQUENCE_MAXLEN - h, new Uint32Array([
...(Utils.shuffle(Utils.range(512))),
...(Utils.shuffle(Utils.range(512))),
].slice(0, LSH_SEQUENCE_COUNT * LSH_SEQUENCE_MAXLEN)))),
}));
//
// Misc
//
/** @type {number} we use RGBA8 textures (32 bits per pixel) as storage */
const LSH_BYTESPERPIXEL = 4;
/** @type {function(number): number} next power of 2 */
const nextPot = x => x > 1 ? 1 << Math.ceil(Math.log2(x)) : 1;
/**
* GPU-based LSH tables for fast matching of binary descriptors
*/
export class SpeedyLSH
{
/**
* Constructor
* @param {SpeedyTexture} lshTables texture to be used as the set of LSH tables
* @param {SpeedyTexture} descriptorDB texture to be used as the descriptor database
* @param {Uint8Array[]} descriptors the binary descriptors you'll store (make sure you don't repeat them, otherwise they will just waste space)
* @param {number} [tableCount] number of LSH tables, preferably a power of two
* @param {number} [hashSize] number of bits of a hash of a descriptor
* @param {number} [probability] probability of no discard events happening in the theoretical model
*/
constructor(lshTables, descriptorDB, descriptors, tableCount = LSH_DEFAULT_NUMBER_OF_TABLES, hashSize = LSH_DEFAULT_HASH_SIZE, probability = 0.95)
{
const descriptorCount = descriptors.length;
const descriptorSize = descriptorCount > 0 ? descriptors[0].byteLength : 0;
const lshProfiles = generateLSHProfiles(tableCount, hashSize, probability);
// validate input
Utils.assert(descriptorCount > 0, `Can't build LSH tables without descriptors!`);
Utils.assert(LSH_ACCEPTABLE_DESCRIPTOR_SIZES.includes(descriptorSize), `Can't build LSH tables: unacceptable descriptor size of ${descriptorSize} bytes`);
Utils.assert(descriptors.findIndex(d => d.byteLength !== descriptorSize) < 0, `Can't build LSH tables: incorrectly sized descriptors. Expected ${descriptorSize} bytes for each`);
Utils.assert(descriptorCount < MATCH_MAX_INDEX, `Can't build LSH tables: too many descriptors (${descriptors.length})`);
Utils.assert(lshProfiles != null, `Can't build LSH tables: unacceptable number of tables (${tableCount}) x hash size (${hashSize})`);
/** @type {LSHProfile} LSH profile */
this._profile = lshProfiles.find(profile => descriptorCount <= profile.capacity) || lshProfiles[lshProfiles.length - 1];
/** @type {number} descriptor size, in bytes */
this._descriptorSize = descriptorSize;
/** @type {number} number of descriptors */
this._descriptorCount = descriptorCount;
/** @type {BitSequences} bit sequences */
this._sequences = this._pickSequences(this._descriptorSize);
/** @type {SpeedyTexture} LSH tables storing indices of descriptors */
this._tables = this._createStaticTables(lshTables, this._sequences, descriptors, descriptorSize);
/** @type {SpeedyTexture} a storage of descriptors */
this._descriptorDB = SpeedyDescriptorDB.create(descriptorDB, descriptors, descriptorSize);
}
/**
* Descriptor size, in bytes
* @returns {number}
*/
get descriptorSize()
{
return this._descriptorSize;
}
/**
* Number of descriptors stored in this LSH data structure
* @returns {number}
*/
get descriptorCount()
{
return this._descriptorCount;
}
/**
* LSH bit sequences
* @returns {BitSequences}
*/
get sequences()
{
return this._sequences;
}
/**
* Number of bits that make a hash
* @returns {number}
*/
get hashSize()
{
return this._profile.hashSize;
}
/**
* Maximum number of descriptors that can be stored in a bucket of a table
* @returns {number}
*/
get bucketCapacity()
{
return this._profile.bucketCapacity;
}
/**
* How many buckets per table do we have?
* @returns {number}
*/
get bucketsPerTable()
{
return 1 << this._profile.hashSize;
}
/**
* Number of LSH tables
* @returns {number}
*/
get tableCount()
{
return this._profile.tableCount;
}
/**
* Size of one LSH table, in bytes
* @returns {number}
*/
get tableSize()
{
return this.bucketsPerTable * this.bucketCapacity * LSH_BYTESPERPIXEL;
}
/**
* Size of all LSH tables combined, in bytes
* @returns {number}
*/
get totalSize()
{
// actually, the total memory in VRAM may be a bit larger than
// this value, depending on the actual size of the texture
return this.tableCount * this.tableSize;
}
/**
* LSH tables texture
* @returns {SpeedyDrawableTexture}
*/
get tables()
{
return this._tables;
}
/**
* A collection of descriptors
* @returns {SpeedyDrawableTexture}
*/
get descriptorDB()
{
return this._descriptorDB;
}
/**
* Pick the appropriate LSH sequences for a particular descriptor size
* @param {number} descriptorSize in bytes
* @returns {BitSequences}
*/
_pickSequences(descriptorSize)
{
Utils.assert(Object.prototype.hasOwnProperty.call(LSH_SEQUENCES, this.hashSize));
Utils.assert(Object.prototype.hasOwnProperty.call(LSH_SEQUENCES[this.hashSize], descriptorSize));
return LSH_SEQUENCES[this.hashSize][descriptorSize];
}
/**
* Create LSH tables
* @param {SpeedyTexture} texture output texture
* @param {BitSequences} sequences bit sequences
* @param {Uint8Array[]} descriptors non-empty array of binary descriptors, ALL HAVING THE SAME SIZE
* @param {number} descriptorSize in bytes
* @returns {SpeedyTexture} texture
*/
_createStaticTables(texture, sequences, descriptors, descriptorSize)
{
const END_OF_LIST = 0xFFFFFFFF;
const profileName = this._profile.name;
const tableCapacity = this._profile.capacity;
const tableCount = this.tableCount;
const bucketsPerTable = this.bucketsPerTable;
const bucketSize = this.bucketCapacity * LSH_BYTESPERPIXEL;
const hashSize = this.hashSize;
const numberOfPixels = this.tableCount * this.bucketsPerTable * this.bucketCapacity; // watch for overflow?
const textureWidth = Math.min(nextPot(Math.sqrt(numberOfPixels)), 4096); // 4096 is compatible with most devices according to MDN
const textureHeight = Math.ceil(numberOfPixels / textureWidth);
const numberOfDescriptors = descriptors.length;
// validate input
Utils.assert(hashSize <= LSH_SEQUENCE_MAXLEN);
Utils.assert(tableCount <= LSH_SEQUENCE_COUNT);
Utils.assert(numberOfPixels <= textureWidth * textureHeight);
// log
const MEGABYTE = 1048576;
Utils.log(
`Building ${tableCount} ${profileName} LSH tables with ${numberOfDescriptors} ` +
`${descriptorSize * 8}-bit descriptors each and hashSize = ${hashSize} bits ` +
`(${textureWidth}x${textureHeight}, with ${(this.tableSize / MEGABYTE).toFixed(2)} ` +
`MB per table and total size = ${(this.totalSize / MEGABYTE).toFixed(2)} MB), `
);
// warn the user if there are too many descriptors
if(numberOfDescriptors > tableCapacity) {
const exceedingPercentage = 100 * numberOfDescriptors / tableCapacity;
Utils.warning(`There are too many descriptors (${numberOfDescriptors}) for a ${profileName} LSH table. That's ${exceedingPercentage.toFixed(2)}% of its theoretical capacity. Consider increasing the hashSize (currently set to ${hashSize}) or reducing the number of descriptors to avoid degradation.`);
}
// create empty LSH tables
const buffer = new ArrayBuffer(textureWidth * textureHeight * LSH_BYTESPERPIXEL);
const bytes = (new Uint8Array(buffer)).fill(0xFF);
const data = new DataView(buffer);
// shuffle the descriptors...
// it seems like a good idea to handle collisions of similar descriptors,
// which may be located next to each other in the array
const permutation = Utils.shuffle(Utils.range(numberOfDescriptors));
// for each descriptor
// do everything in little-endian format!
const numberOfDiscardedDescriptorsPerTable = (new Array(tableCount)).fill(0);
for(let i = 0; i < numberOfDescriptors; i++) {
const descriptorIndex = permutation[i]; //i;
const hashes = this._hashCodes(descriptors[descriptorIndex], sequences);
// for each table
for(let table = 0; table < tableCount; table++) {
// compute hash & memory addresses
const hash = hashes[table];
const tableByteOffset = table * bucketsPerTable * bucketSize;
const bucketByteOffset = tableByteOffset + hash * bucketSize;
// find the end of the list
let index = END_OF_LIST;
for(let entryByteOffset = 0; entryByteOffset < bucketSize; entryByteOffset += LSH_BYTESPERPIXEL) {
const byteOffset = bucketByteOffset + entryByteOffset;
index = data.getUint32(byteOffset, true);
// add the keypoint
if(index == END_OF_LIST) {
data.setUint32(byteOffset, descriptorIndex, true);
break;
}
}
// note: if the bucket is full, we just discard the entry :\
// we give this event a probabilistic treatment (see above),
// so it happens with low probability
if(index != END_OF_LIST)
numberOfDiscardedDescriptorsPerTable[table]++;
}
}
// log data for further study
const numberOfDiscardedDescriptors = numberOfDiscardedDescriptorsPerTable.reduce((sum, val) => sum + val, 0);
const profile = numberOfDiscardedDescriptorsPerTable.map(d => 100 * d / numberOfDescriptors);
Utils.log(
`When building ${tableCount} ${profileName} LSH tables with ${numberOfDescriptors} ` +
`${descriptorSize * 8}-bit descriptors each and hashSize = ${hashSize} bits, ` +
`I got the following discard profile: ` + profile.map(x => x.toFixed(2) + '%').join(', ') + `. ` +
`Average: ${(100 * numberOfDiscardedDescriptors / (tableCount * numberOfDescriptors)).toFixed(2)}%. ` +
`Minimum: ${Math.min(...profile).toFixed(2)}%. ` +
`Table capacity: ${tableCapacity}.`
);
// upload the LSH tables to the GPU
texture.resize(textureWidth, textureHeight);
texture.upload(bytes);
return texture;
}
/**
* Pick bits from a binary descriptor
* @param {Uint8Array} descriptor a single descriptor
* @param {BitSequences} sequences flattened array of tableCount sequences of LSH_SEQUENCE_MAXLEN elements each
* @returns {number[]} hash code for each table
*/
_hashCodes(descriptor, sequences)
{
const tableCount = this.tableCount;
const hashSize = this.hashSize;
const bucketsPerTable = this.bucketsPerTable;
const hashes = new Array(tableCount);
//const descriptorSize = descriptor.length;
// just to be sure...
Utils.assert(
hashSize <= LSH_SEQUENCE_MAXLEN &&
sequences.length >= LSH_SEQUENCE_MAXLEN * tableCount
);
// for each table
for(let table = 0; table < tableCount; table++) {
const offset = LSH_SEQUENCE_MAXLEN * table;
// pick bits [ sequences[offset] .. sequences[offset + hashSize-1] ]
let hash = 0;
for(let i = 0; i < hashSize; i++) {
let bit = sequences[offset + i];
let b = bit >>> 3;
let m = 1 << (bit & 7);
//Utils.assert(b < descriptorSize);
hash = (hash << 1) | ((descriptor[b] & m) != 0);
}
// validate & store
Utils.assert(hash >= 0 && hash < bucketsPerTable);
hashes[table] = hash;
}
// done!
return hashes;
}
}
/**
* Compute P(X <= k), where X ~ Poisson(lambda)
* @param {number} lambda positive number
* @param {number} k non-negative integer
* @returns {number}
*/
function cumulativePoisson(lambda, k)
{
const exp = Math.exp(-lambda);
let sum = 1, fat = 1, pow = 1;
// k should be small!!!
for(let i = 1; i <= k; i++)
sum += (pow *= lambda) / (fat *= i);
return sum * exp;
}
/**
* Find the maximum number of keypoint descriptors that a table can hold
* @param {number} hashSize positive integer
* @param {number} bucketCapacity positive integer
* @param {number} [probability] probability of no discard events happening in the theoretical model
* @return {number} optimal table capacity
*/
function findTableCapacity(hashSize, bucketCapacity, probability = 0.99)
{
const n = 1 << hashSize // number of buckets
const c = bucketCapacity;
const p = probability;
let l = 1, r = n * c; // watch for overflow!
let m = 0, pm = 0;
// binary search
while(l < r) {
m = Math.floor((l + r) / 2);
pm = cumulativePoisson(m / n, c);
if(pm > p) //if(1-pm < 1-p)
l = m + 1;
else
r = m;
}
return m;
}