speedy-vision
Version:
GPU-accelerated Computer Vision for JavaScript
494 lines (412 loc) • 23.6 kB
JavaScript
/*
* speedy-vision.js
* GPU-accelerated Computer Vision for JavaScript
* Copyright 2020-2022 Alexandre Martins <alemartf(at)gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* keypoints.js
* Facade for various keypoint detection algorithms
*/
import { SpeedyGPU } from '../speedy-gpu';
import { SpeedyProgramGroup } from '../speedy-program-group';
import { SpeedyTexture, SpeedyDrawableTexture } from '../speedy-texture';
import { LSH_SEQUENCE_COUNT, LSH_SEQUENCE_MAXLEN, LSH_ACCEPTABLE_DESCRIPTOR_SIZES, LSH_ACCEPTABLE_HASH_SIZES } from '../speedy-lsh';
import { importShader } from '../shader-declaration';
// FAST corner detector
const fast9_16 = importShader('keypoints/fast.glsl', 'keypoints/fast.vs.glsl')
.withDefines({ 'FAST_TYPE': 916 })
.withArguments('corners', 'pyramid', 'lod', 'threshold');
// Harris corner detector
const harris = [1, 3, 5, 7].reduce((obj, win) => ((obj[win] =
importShader('keypoints/harris.glsl')
.withDefines({ 'WINDOW_SIZE': win })
.withArguments('corners', 'pyramid', 'derivatives', 'lod', 'lodStep', 'gaussian')
), obj), {});
const harrisScoreFindMax = importShader('keypoints/score-findmax.glsl')
.withArguments('corners', 'iterationNumber');
const harrisScoreCutoff = importShader('keypoints/harris-cutoff.glsl')
.withArguments('corners', 'maxScore', 'quality');
// Subpixel refinement
const subpixelQuadratic1d = importShader('keypoints/subpixel-refinement.glsl')
.withDefines({ 'METHOD': 0 })
.withArguments('pyramid', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxIterations', 'epsilon');
const subpixelTaylor2d = importShader('keypoints/subpixel-refinement.glsl')
.withDefines({ 'METHOD': 1 })
.withArguments('pyramid', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxIterations', 'epsilon');
const subpixelBilinear = importShader('keypoints/subpixel-refinement.glsl')
.withDefines({ 'METHOD': 2 })
.withArguments('pyramid', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxIterations', 'epsilon');
const subpixelBicubic = importShader('keypoints/subpixel-refinement.glsl')
.withDefines({ 'METHOD': 3 })
.withArguments('pyramid', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxIterations', 'epsilon');
// Scale refinement
const refineScaleLoG = importShader('keypoints/refine-scale.glsl')
.withDefines({ 'METHOD': 0 })
.withArguments('pyramid', 'lodStep', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const refineScaleFAST916 = importShader('keypoints/refine-scale.glsl')
.withDefines({ 'METHOD': 1 })
.withArguments('pyramid', 'lodStep', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'threshold');
// Pixel allocation
const allocateDescriptors = importShader('keypoints/allocate-descriptors.glsl')
.withArguments('inputEncodedKeypoints', 'inputDescriptorSize', 'inputExtraSize', 'inputEncoderLength', 'outputDescriptorSize', 'outputExtraSize', 'outputEncoderLength');
const allocateExtra = importShader('keypoints/allocate-extra.glsl')
.withArguments('inputEncodedKeypoints', 'inputDescriptorSize', 'inputExtraSize', 'inputEncoderLength', 'outputDescriptorSize', 'outputExtraSize', 'outputEncoderLength');
const transferToExtra = importShader('keypoints/transfer-to-extra.glsl')
.withArguments('encodedData', 'strideOfEncodedData', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
// ORB descriptors
const orbDescriptor = importShader('keypoints/orb-descriptor.glsl')
.withArguments('image', 'encodedCorners', 'extraSize', 'encoderLength');
const orbOrientation = importShader('keypoints/orb-orientation.glsl')
.withArguments('image', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
// Non-maximum suppression
const nonMaxSuppression = importShader('keypoints/nonmax-suppression.glsl')
.withDefines({ 'MULTISCALE': 0 })
.withArguments('image', 'lodStep');
const multiscaleNonMaxSuppression = importShader('keypoints/nonmax-suppression.glsl')
.withDefines({ 'MULTISCALE': 1 })
.withArguments('image', 'lodStep');
const nonmaxSpace = importShader('keypoints/nonmax-space.glsl')
.withArguments('corners');
const nonmaxScale = importShader('keypoints/nonmax-scale.glsl')
.withDefines({ 'USE_LAPLACIAN': 1 })
.withArguments('corners', 'pyramid', 'pyrLaplacian', 'lodStep');
const nonmaxScaleSimple = importShader('keypoints/nonmax-scale.glsl')
.withDefines({ 'USE_LAPLACIAN': 0 })
.withArguments('corners', 'pyramid', 'lodStep');
const laplacian = importShader('keypoints/laplacian.glsl')
.withArguments('corners', 'pyramid', 'lodStep', 'lodOffset');
// Keypoint tracking & optical-flow
const lk = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21].reduce((obj, win) => ((obj[win] =
importShader('keypoints/lk.glsl')
.withDefines({ 'WINDOW_SIZE': win })
.withArguments('encodedFlow', 'prevKeypoints', 'nextPyramid', 'prevPyramid', 'level', 'depth', 'numberOfIterations', 'discardThreshold', 'epsilon', 'descriptorSize', 'extraSize', 'encoderLength')
), obj), {});
const transferFlow = importShader('keypoints/transfer-flow.glsl')
.withArguments('encodedFlow', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
// Brute-force matching
const bfMatcherInitCandidates = importShader('keypoints/knn-init.glsl')
.withDefines({ 'ENCODE_FILTERS': 0 });
const bfMatcherInitFilters = importShader('keypoints/knn-init.glsl')
.withDefines({ 'ENCODE_FILTERS': 1 });
const bfMatcherTransfer = importShader('keypoints/knn-transfer.glsl')
.withArguments('encodedMatches', 'encodedKthMatches', 'numberOfMatchesPerKeypoint', 'kthMatch');
const bfMatcher32 = importShader('keypoints/bf-knn.glsl')
.withDefines({
'DESCRIPTOR_SIZE': 32,
'NUMBER_OF_KEYPOINTS_PER_PASS': 16,
})
.withArguments('encodedMatches', 'encodedFilters', 'matcherLength', 'dbEncodedKeypoints', 'dbDescriptorSize', 'dbExtraSize', 'dbEncoderLength', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'passId');
const bfMatcher64 = importShader('keypoints/bf-knn.glsl')
.withDefines({
'DESCRIPTOR_SIZE': 64,
'NUMBER_OF_KEYPOINTS_PER_PASS': 8,
})
.withArguments('encodedMatches', 'encodedFilters', 'matcherLength', 'dbEncodedKeypoints', 'dbDescriptorSize', 'dbExtraSize', 'dbEncoderLength', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'passId');
// LSH-based KNN matching
const lshKnnInitCandidates = importShader('keypoints/knn-init.glsl')
.withDefines({ 'ENCODE_FILTERS': 0 });
const lshKnnInitFilters = importShader('keypoints/knn-init.glsl')
.withDefines({ 'ENCODE_FILTERS': 1 });
const lshKnn = LSH_ACCEPTABLE_DESCRIPTOR_SIZES.reduce((obj, descriptorSize) => ((obj[descriptorSize] = LSH_ACCEPTABLE_HASH_SIZES.reduce((obj, hashSize) => ((obj[hashSize] = [0, 1, 2].reduce((obj, level) => ((obj[level] =
importShader('keypoints/lsh-knn.glsl')
.withDefines({
'DESCRIPTOR_SIZE': descriptorSize,
'HASH_SIZE': hashSize,
'LEVEL': level,
'SEQUENCE_MAXLEN': LSH_SEQUENCE_MAXLEN,
'SEQUENCE_COUNT': LSH_SEQUENCE_COUNT,
})
.withArguments('candidates', 'filters', 'matcherLength', 'tables', 'descriptorDB', 'tableIndex', 'bucketCapacity', 'bucketsPerTable', 'tablesStride', 'descriptorDBStride', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength')
), obj), {})), obj), {})), obj), {});
const lshKnnTransfer = importShader('keypoints/knn-transfer.glsl')
.withArguments('encodedMatches', 'encodedKthMatches', 'numberOfMatchesPerKeypoint', 'kthMatch');
// Keypoint sorting
const sortCreatePermutation = importShader('keypoints/sort-keypoints.glsl')
.withDefines({ 'STAGE': 1 })
.withArguments('encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const sortMergePermutation = importShader('keypoints/sort-keypoints.glsl')
.withDefines({ 'STAGE': 2 })
.withArguments('permutation', 'blockSize', 'dblLog2BlockSize');
const sortApplyPermutation = importShader('keypoints/sort-keypoints.glsl')
.withDefines({ 'STAGE': 3 })
.withArguments('permutation', 'maxKeypoints', 'encodedKeypoints', 'descriptorSize', 'extraSize');
// Keypoint mixing
const mixKeypointsPreInit = importShader('keypoints/mix-keypoints.glsl')
.withDefines({ 'STAGE': 1 })
.withArguments('encodedKeypointsA', 'encodedKeypointsB', 'encoderLengthA', 'encoderLengthB', 'encoderCapacityA', 'encoderCapacityB', 'descriptorSize', 'extraSize', 'encoderLength');
const mixKeypointsInit = importShader('keypoints/mix-keypoints.glsl')
.withDefines({ 'STAGE': 2 })
.withArguments('encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxKeypoints');
const mixKeypointsSort = importShader('keypoints/mix-keypoints.glsl')
.withDefines({ 'STAGE': 3 })
.withArguments('array', 'blockSize');
const mixKeypointsView = importShader('keypoints/mix-keypoints.glsl')
.withDefines({ 'STAGE': 5 })
.withArguments('array');
const mixKeypointsApply = importShader('keypoints/mix-keypoints.glsl')
.withDefines({ 'STAGE': 4 })
.withArguments('array', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
// Keypoint encoding
const initLookupTable = importShader('keypoints/lookup-of-locations.glsl')
.withDefines({ 'FS_OUTPUT_TYPE': 2, 'STAGE': 1 })
.withArguments('corners');
const sortLookupTable = importShader('keypoints/lookup-of-locations.glsl', 'keypoints/lookup-of-locations.vs.glsl')
.withDefines({ 'FS_OUTPUT_TYPE': 2, 'FS_USE_CUSTOM_PRECISION': 1, 'STAGE': 2 })
.withArguments('lookupTable', 'blockSize', 'width', 'height');
const viewLookupTable = importShader('keypoints/lookup-of-locations.glsl')
.withDefines({ 'STAGE': -1 })
.withArguments('lookupTable');
const encodeKeypoints = importShader('keypoints/encode-keypoints.glsl')
.withArguments('corners', 'lookupTable', 'stride', 'descriptorSize', 'extraSize', 'encoderLength', 'encoderCapacity');
const encodeKeypointSkipOffsets = importShader('keypoints/encode-keypoint-offsets.glsl')
.withArguments('corners', 'imageSize');
const encodeKeypointLongSkipOffsets = importShader('keypoints/encode-keypoint-long-offsets.glsl')
.withDefines({ 'MAX_ITERATIONS': 6 }) // dependent texture reads :(
.withArguments('offsetsImage', 'imageSize');
const encodeKeypointPositions = importShader('keypoints/encode-keypoint-positions.glsl')
.withArguments('offsetsImage', 'imageSize', 'passId', 'numPasses', 'keypointLimit', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const encodeKeypointProperties = importShader('keypoints/encode-keypoint-properties.glsl')
.withArguments('corners', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const encodeNullKeypoints = importShader('keypoints/encode-null-keypoints.glsl')
.withArguments();
const transferOrientation = importShader('keypoints/transfer-orientation.glsl')
.withArguments('encodedOrientations', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const uploadKeypoints = importShader('keypoints/upload-keypoints.glsl')
.withDefines({
// UBOs can hold at least 16KB of data;
// gl.MAX_UNIFORM_BLOCK_SIZE >= 16384
// according to the GL ES 3 reference.
// Each keypoint uses 16 bytes (vec4)
'BUFFER_SIZE': 1024 //16384 / 16
})
.withArguments('encodedKeypoints', 'startIndex', 'endIndex', 'descriptorSize', 'extraSize', 'encoderLength');
// Geometric transformations
const applyHomography = importShader('keypoints/apply-homography.glsl')
.withArguments('homography', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
// Keypoint filters
const clipBorder = importShader('keypoints/clip-border.glsl')
.withArguments('imageWidth', 'imageHeight', 'borderTop', 'borderRight', 'borderBottom', 'borderLeft', 'encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const distanceFilter = importShader('keypoints/distance-filter.glsl')
.withArguments('encodedKeypointsA', 'encoderLengthA', 'encodedKeypointsB', 'encoderLengthB', 'descriptorSize', 'extraSize', 'encoderLength', 'threshold');
const hammingDistanceFilter32 = importShader('keypoints/hamming-distance-filter.glsl')
.withDefines({ 'DESCRIPTOR_SIZE': 32 })
.withArguments('encodedKeypointsA', 'encoderLengthA', 'encodedKeypointsB', 'encoderLengthB', 'descriptorSize', 'extraSize', 'encoderLength', 'threshold');
const hammingDistanceFilter64 = importShader('keypoints/hamming-distance-filter.glsl')
.withDefines({ 'DESCRIPTOR_SIZE': 64 })
.withArguments('encodedKeypointsA', 'encoderLengthA', 'encodedKeypointsB', 'encoderLengthB', 'descriptorSize', 'extraSize', 'encoderLength', 'threshold');
// Other utilities
const shuffle = importShader('keypoints/shuffle.glsl')
.withDefines({ 'PERMUTATION_MAXLEN': 2048 })
.withArguments('encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength');
const clip = importShader('keypoints/clip.glsl')
.withArguments('encodedKeypoints', 'descriptorSize', 'extraSize', 'encoderLength', 'maxKeypoints');
/**
* SpeedyProgramGroupKeypoints
* Keypoint detection
*/
export class SpeedyProgramGroupKeypoints extends SpeedyProgramGroup
{
/**
* Class constructor
* @param {SpeedyGPU} gpu
*/
constructor(gpu)
{
super(gpu);
this
//
// FAST corner detector
//
.declare('fast9_16', fast9_16, {
...this.program.usesPingpongRendering()
})
//
// Harris corner detector
//
.declare('harris1', harris[1], {
...this.program.usesPingpongRendering()
})
.declare('harris3', harris[3], {
...this.program.usesPingpongRendering()
})
.declare('harris5', harris[5], {
...this.program.usesPingpongRendering()
})
.declare('harris7', harris[7], {
...this.program.usesPingpongRendering()
})
.declare('harrisScoreFindMax', harrisScoreFindMax, {
...this.program.usesPingpongRendering()
})
.declare('harrisScoreCutoff', harrisScoreCutoff)
//
// Subpixel refinement
//
.declare('subpixelQuadratic1d', subpixelQuadratic1d)
.declare('subpixelTaylor2d', subpixelTaylor2d)
.declare('subpixelBicubic', subpixelBicubic)
.declare('subpixelBilinear', subpixelBilinear)
//
// Scale refinement
//
.declare('refineScaleLoG', refineScaleLoG)
.declare('refineScaleFAST916', refineScaleFAST916)
//
// Pixel allocation
//
.declare('allocateDescriptors', allocateDescriptors)
.declare('allocateExtra', allocateExtra)
.declare('transferToExtra', transferToExtra)
//
// ORB descriptors
//
.declare('orbDescriptor', orbDescriptor)
.declare('orbOrientation', orbOrientation)
//
// Non-maximum suppression
//
.declare('nonmax', nonMaxSuppression)
.declare('pyrnonmax', multiscaleNonMaxSuppression)
.declare('nonmaxSpace', nonmaxSpace)
.declare('nonmaxScale', nonmaxScale)
.declare('nonmaxScaleSimple', nonmaxScaleSimple)
.declare('laplacian', laplacian)
//
// LK optical-flow
//
.declare('lk21', lk[21], {
...this.program.usesPingpongRendering()
})
.declare('lk19', lk[19], {
...this.program.usesPingpongRendering()
})
.declare('lk17', lk[17], {
...this.program.usesPingpongRendering()
})
.declare('lk15', lk[15], {
...this.program.usesPingpongRendering()
})
.declare('lk13', lk[13], {
...this.program.usesPingpongRendering()
})
.declare('lk11', lk[11], {
...this.program.usesPingpongRendering()
})
.declare('lk9', lk[9], {
...this.program.usesPingpongRendering()
})
.declare('lk7', lk[7], {
...this.program.usesPingpongRendering()
})
.declare('lk5', lk[5], {
...this.program.usesPingpongRendering()
})
.declare('lk3', lk[3], {
...this.program.usesPingpongRendering()
})
.declare('transferFlow', transferFlow)
//
// Brute-force KNN matching
//
.declare('bfMatcherInitCandidates', bfMatcherInitCandidates)
.declare('bfMatcherInitFilters', bfMatcherInitFilters)
.declare('bfMatcherTransfer', bfMatcherTransfer, {
...this.program.usesPingpongRendering()
})
.declare('bfMatcher32', bfMatcher32, {
...this.program.usesPingpongRendering()
})
.declare('bfMatcher64', bfMatcher64, {
...this.program.usesPingpongRendering()
})
//
// LSH-based KNN matching
//
.declare('lshKnnInitCandidates', lshKnnInitCandidates)
.declare('lshKnnInitFilters', lshKnnInitFilters)
.declare('lshKnnTransfer', lshKnnTransfer, {
...this.program.usesPingpongRendering()
})
//
// Keypoint sorting
//
.declare('sortCreatePermutation', sortCreatePermutation)
.declare('sortMergePermutation', sortMergePermutation, {
...this.program.usesPingpongRendering()
})
.declare('sortApplyPermutation', sortApplyPermutation)
//
// Keypoint mixing
//
.declare('mixKeypointsPreInit', mixKeypointsPreInit)
.declare('mixKeypointsInit', mixKeypointsInit)
.declare('mixKeypointsSort', mixKeypointsSort, {
...this.program.usesPingpongRendering()
})
.declare('mixKeypointsView', mixKeypointsView)
.declare('mixKeypointsApply', mixKeypointsApply)
//
// Keypoint encoders
//
.declare('encodeNullKeypoints', encodeNullKeypoints)
.declare('encodeKeypoints', encodeKeypoints)
.declare('initLookupTable', initLookupTable)
.declare('sortLookupTable', sortLookupTable, {
...this.program.usesPingpongRendering()
})
.declare('viewLookupTable', viewLookupTable)
.declare('encodeKeypointSkipOffsets', encodeKeypointSkipOffsets)
.declare('encodeKeypointLongSkipOffsets', encodeKeypointLongSkipOffsets, {
...this.program.usesPingpongRendering()
})
.declare('encodeKeypointPositions', encodeKeypointPositions, {
...this.program.usesPingpongRendering()
})
.declare('encodeKeypointProperties', encodeKeypointProperties)
.declare('transferOrientation', transferOrientation)
.declare('uploadKeypoints', uploadKeypoints, {
...this.program.usesPingpongRendering()
})
//
// Geometric transformations
//
.declare('applyHomography', applyHomography)
//
// Keypoint filters
//
.declare('clipBorder', clipBorder)
.declare('distanceFilter', distanceFilter)
.declare('hammingDistanceFilter32', hammingDistanceFilter32)
.declare('hammingDistanceFilter64', hammingDistanceFilter64)
//
// Other utilities
//
.declare('shuffle', shuffle)
.declare('clip', clip)
;
//
// LSH-based KNN matching
//
for(const descriptorSize of Object.keys(lshKnn)) {
for(const hashSize of Object.keys(lshKnn[descriptorSize])) {
for(const level of Object.keys(lshKnn[descriptorSize][hashSize])) {
const name = `lshKnn${descriptorSize}h${hashSize}lv${level}`;
this.declare(name, lshKnn[descriptorSize][hashSize][level], {
...this.program.usesPingpongRendering()
});
}
}
}
}
}