UNPKG

speedy-vision

Version:

GPU-accelerated Computer Vision for JavaScript

374 lines (317 loc) 14 kB
/* * speedy-vision.js * GPU-accelerated Computer Vision for JavaScript * Copyright 2020-2022 Alexandre Martins <alemartf(at)gmail.com> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * speedy-texture-reader.js * Reads data from textures */ import { Utils } from '../utils/utils'; import { Settings } from '../core/settings'; import { SpeedyGPU } from './speedy-gpu'; import { SpeedyPromise } from '../core/speedy-promise'; import { SpeedyDrawableTexture } from './speedy-texture'; import { asap } from '../utils/asap'; import { IllegalOperationError, TimeoutError, GLError } from '../utils/errors'; /** @type {number} number of PBOs; used to get a performance boost in gl.readPixels() */ const DEFAULT_NUMBER_OF_BUFFERS = 2; /** @type {(fn: Function, ...args: any[]) => number} Run function fn on the "next frame" */ const runOnNextFrame = navigator.userAgent.includes('Firefox') ? ((fn, ...args) => setTimeout(fn, 10, ...args)) : // RAF produces a warning on Firefox ((fn, ...args) => requestAnimationFrame(() => fn.apply(undefined, args))); // reduce battery usage /** * Reads data from textures */ export class SpeedyTextureReader { /** * Constructor * @param {number} [numberOfBuffers] */ constructor(numberOfBuffers = DEFAULT_NUMBER_OF_BUFFERS) { Utils.assert(numberOfBuffers > 0); /** @type {boolean} is this object initialized? */ this._initialized = false; /** @type {Uint8Array[]} pixel buffers for data transfers (each stores RGBA data) */ this._pixelBuffer = (new Array(numberOfBuffers)).fill(null).map(() => new Uint8Array(0)); /** @type {WebGLBuffer[]} Pixel Buffer Objects (PBOs) */ this._pbo = (new Array(numberOfBuffers)).fill(null); /** @type {number} the index of the buffer that will be consumed in this frame */ this._consumerIndex = 0; /** @type {number} the index of the buffer that will be produced next */ this._producerIndex = numberOfBuffers - 1; /** @type {SpeedyPromise<void>[]} producer-consumer promises */ this._promise = Array.from({ length: numberOfBuffers }, () => SpeedyPromise.resolve()); /** @type {boolean[]} are the contents of the ith buffer being produced? */ this._busy = (new Array(numberOfBuffers)).fill(false); /** @type {boolean[]} can the ith buffer be consumed? */ this._ready = (new Array(numberOfBuffers)).fill(true); } /** * Initialize this object * @param {SpeedyGPU} gpu */ init(gpu) { this._allocatePBOs(gpu); gpu.subscribe(this._allocatePBOs, this, gpu); this._initialized = true; } /** * Release resources * @param {SpeedyGPU} gpu * @returns {null} */ release(gpu) { gpu.unsubscribe(this._allocatePBOs, this); this._deallocatePBOs(gpu); this._initialized = false; return null; } /** * Read pixels from a texture, synchronously. * You may optionally specify a (x,y,width,height) sub-rectangle. * @param {SpeedyDrawableTexture} texture a texture with a FBO * @param {number} [x] * @param {number} [y] * @param {number} [width] * @param {number} [height] * @returns {Uint8Array} pixels in the RGBA format */ readPixelsSync(texture, x = 0, y = 0, width = texture.width, height = texture.height) { Utils.assert(this._initialized); const gl = texture.gl; const fbo = texture.glFbo; // clamp values width = Math.max(0, Math.min(width, texture.width)); height = Math.max(0, Math.min(height, texture.height)); x = Math.max(0, Math.min(x, texture.width - width)); y = Math.max(0, Math.min(y, texture.height - height)); // buffer allocation const sizeofBuffer = width * height * 4; // 4 bytes per pixel (RGBA) this._reallocate(sizeofBuffer); // lost context? if(gl.isContextLost()) return this._pixelBuffer[0].subarray(0, sizeofBuffer); // read pixels gl.bindFramebuffer(gl.FRAMEBUFFER, fbo); gl.readPixels(x, y, width, height, gl.RGBA, gl.UNSIGNED_BYTE, this._pixelBuffer[0]); gl.bindFramebuffer(gl.FRAMEBUFFER, null); // done! return this._pixelBuffer[0].subarray(0, sizeofBuffer); } /** * Read pixels from a texture, asynchronously, with PBOs. * You may optionally specify a (x,y,width,height) sub-rectangle. * @param {SpeedyDrawableTexture} texture a texture with a FBO * @param {number} [x] * @param {number} [y] * @param {number} [width] * @param {number} [height] * @param {boolean} [useBufferedDownloads] accelerate downloads by returning pixels from the texture of the previous call (useful for streaming) * @returns {SpeedyPromise<Uint8Array>} resolves to an array of pixels in the RGBA format */ readPixelsAsync(texture, x = 0, y = 0, width = texture.width, height = texture.height, useBufferedDownloads = false) { Utils.assert(this._initialized); const gl = texture.gl; const fbo = texture.glFbo; // clamp values width = Math.max(0, Math.min(width, texture.width)); height = Math.max(0, Math.min(height, texture.height)); x = Math.max(0, Math.min(x, texture.width - width)); y = Math.max(0, Math.min(y, texture.height - height)); // buffer allocation const sizeofBuffer = width * height * 4; // 4 bytes per pixel (RGBA) this._reallocate(sizeofBuffer); // lost context? if(gl.isContextLost()) return SpeedyPromise.resolve(this._pixelBuffer[0].subarray(0, sizeofBuffer)); // do not optimize? if(!useBufferedDownloads) { const pixelBuffer = this._pixelBuffer[0].subarray(0, sizeofBuffer); return SpeedyTextureReader._readPixelsViaPBO(gl, this._pbo[0], pixelBuffer, fbo, x, y, width, height).then(() => pixelBuffer ); } // Hide latency with a Producer-Consumer mechanism const numberOfBuffers = this._pixelBuffer.length; // GPU needs to produce data const producerIndex = this._producerIndex; if(!this._busy[producerIndex]) { const pbo = this._pbo[producerIndex]; const pixelBuffer = this._pixelBuffer[producerIndex].subarray(0, sizeofBuffer); this._producerIndex = (producerIndex + 1) % numberOfBuffers; this._ready[producerIndex] = false; this._busy[producerIndex] = true; //console.time("produce "+producerIndex); this._promise[producerIndex] = SpeedyTextureReader._readPixelsViaPBO(gl, pbo, pixelBuffer, fbo, x, y, width, height).then(() => { //console.timeEnd("produce "+producerIndex); this._busy[producerIndex] = false; this._ready[producerIndex] = true; }); } //else console.log("skip",producerIndex); else /* skip frame */ ; // CPU needs to consume data const consumerIndex = this._consumerIndex; this._consumerIndex = (consumerIndex + 1) % numberOfBuffers; if(!this._ready[consumerIndex]) { //console.time("consume "+consumerIndex); return this._promise[consumerIndex].then(() => { //console.timeEnd("consume "+consumerIndex); this._ready[consumerIndex] = false; return this._pixelBuffer[consumerIndex]; }); } //console.log("NO WAIT "+consumerIndex); this._ready[consumerIndex] = false; return SpeedyPromise.resolve(this._pixelBuffer[consumerIndex]); } /** * Reallocate the pixel buffers, so that they can hold the required number of bytes * If the pixel buffers already have the required capacity, then nothing is done * @param {number} size in bytes */ _reallocate(size) { // no need to reallocate if(size <= this._pixelBuffer[0].byteLength) return; // reallocate for(let i = 0; i < this._pixelBuffer.length; i++) { const newBuffer = new Uint8Array(size); //newBuffer.set(this._pixelBuffer[i]); // make this optional? this._pixelBuffer[i] = newBuffer; } } /** * Allocate PBOs * @param {SpeedyGPU} gpu */ _allocatePBOs(gpu) { const gl = gpu.gl; for(let i = 0; i < this._pbo.length; i++) this._pbo[i] = gl.createBuffer(); } /** * Deallocate PBOs * @param {SpeedyGPU} gpu */ _deallocatePBOs(gpu) { const gl = gpu.gl; for(let i = this._pbo.length - 1; i >= 0; i--) { gl.deleteBuffer(this._pbo[i]); this._pbo[i] = null; } } /** * Read pixels to a Uint8Array, asynchronously, using a Pixel Buffer Object (PBO) * It's assumed that the target texture is in the RGBA8 format * @param {WebGL2RenderingContext} gl * @param {WebGLBuffer} pbo * @param {Uint8Array} outputBuffer with size >= width * height * 4 * @param {WebGLFramebuffer} fbo * @param {GLint} x * @param {GLint} y * @param {GLsizei} width * @param {GLsizei} height * @returns {SpeedyPromise<void>} */ static _readPixelsViaPBO(gl, pbo, outputBuffer, fbo, x, y, width, height) { /* When testing Speedy on Chrome (mobile) using about:tracing with the --enable-gpu-service-tracing flag, I found that A LOT of time is spent in TraceGLAPI::glMapBufferRange, which takes place just after GLES2DecoderImpl::HandleReadPixels and GLES2DecoderImpl::glReadPixels. Using multiple PBOs doesn't seem to impact Chrome too much. Performance is much better on Firefox. This suggests there is room for improvement. I do not yet understand clearly the cause for this lag on Chrome. It may be a CPU-GPU synchronization issue. EDIT: I have found that using gl.flush() aggressively greatly improves things. WebGL commands will be pushed frequently! See also: https://www.khronos.org/registry/webgl/specs/latest/2.0/#3.7.3 (Buffer objects) https://github.com/chromium/chromium/blob/master/docs/gpu/debugging_gpu_related_code.md */ const size = width * height * 4; // validate outputBuffer Utils.assert(outputBuffer.byteLength >= size, `Invalid buffer size`); // read pixels into the PBO gl.bindBuffer(gl.PIXEL_PACK_BUFFER, pbo); gl.bufferData(gl.PIXEL_PACK_BUFFER, size, gl.DYNAMIC_READ); gl.bindFramebuffer(gl.FRAMEBUFFER, fbo); gl.readPixels(x, y, width, height, gl.RGBA, gl.UNSIGNED_BYTE, 0); gl.bindFramebuffer(gl.FRAMEBUFFER, null); gl.bindBuffer(gl.PIXEL_PACK_BUFFER, null); // create a fence const sync = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0); gl.flush(); // make sure the sync command is read // wait for the commands to be processed by the GPU return new SpeedyPromise((resolve, reject) => { // according to the WebGL2 spec sec 3.7.14 Sync objects, // "sync objects may only transition to the signaled state // when the user agent's event loop is not executing a task" // in other words, it won't be signaled in the same frame if(Settings.gpuPollingMode != 'asap') runOnNextFrame(SpeedyTextureReader._clientWaitAsync, gl, sync, 0, resolve, reject); else asap(SpeedyTextureReader._clientWaitAsync, gl, sync, 0, resolve, reject); }).then(() => { gl.bindBuffer(gl.PIXEL_PACK_BUFFER, pbo); gl.getBufferSubData(gl.PIXEL_PACK_BUFFER, 0, outputBuffer); gl.bindBuffer(gl.PIXEL_PACK_BUFFER, null); }).catch(err => { throw new IllegalOperationError(`Can't getBufferSubDataAsync(): error in clientWaitAsync()`, err); }).finally(() => { gl.deleteSync(sync); }); } /** * Waits for a sync object to become signaled * @param {WebGL2RenderingContext} gl * @param {WebGLSync} sync * @param {GLbitfield} flags may be gl.SYNC_FLUSH_COMMANDS_BIT or 0 * @param {Function} resolve * @param {Function} reject * @param {number} [pollInterval] in milliseconds * @param {number} [remainingAttempts] for timeout */ static _clientWaitAsync(gl, sync, flags, resolve, reject, pollInterval = 10, remainingAttempts = 1000) { (function poll() { const status = gl.clientWaitSync(sync, flags, 0); if(remainingAttempts-- <= 0) { reject(new TimeoutError(`GPU polling timeout`, GLError.from(gl))); } else if(status === gl.CONDITION_SATISFIED || status === gl.ALREADY_SIGNALED) { resolve(); } else { //setTimeout(poll, pollInterval); if(Settings.gpuPollingMode != 'asap') requestAnimationFrame(poll); // RAF is a rather unusual way to do polling at ~60 fps. Does it reduce CPU usage? else asap(poll); } })(); } }