dcp-client
Version:
Core libraries for accessing DCP network
322 lines (287 loc) • 13.1 kB
JavaScript
/**
* @file lift-webgpu.js
* Copyright (c) 2023, Distributive, Ltd.
* All Rights Reserved. Licensed under the terms of the MIT License.
*
* In short, after running this file. WebGPU usage is tracked within reasonable grounds without hooking into
* the runtime directly *and* without using `timestamp-query` feature as most implementation do not support
* such feature.
*
* WebGPU standard defines three timelines, content, queue, and device. Each of them can be thought of as
* one mode of operation. They require different strategies for timing.
*
* Content timeline is just fancy speak for JS thread, we can ignore that since it's handled somewhere
* else.
*
* Device timeline is mostly the GPU driver thread that is invisible to the user. Most functions that
* primarily operate on the device timeline do not return promises and look blocking from the perspective
* of the javascript thread. These are simple to time, no different than how you might measure any other
* blocking functions; the only practical differences is they ought to be considered as gpu usage.
*
* Queue timeline is the meat of where GPU work occurs. They represent computations that occur on the GPU
* directly. Most functions that operate on the queue timeline return promises, as you don't know when will
* the computation finish, otherwise why bother running the computation if you already know the result.
*
* The standard is very clear on *not* providing much guarantee on what order these promises will resolve.
* See https://www.w3.org/TR/webgpu/#asynchrony. Fortunately, most of the functions don't carry much
* information across two calls and the timing can simply done via intercepting the resolution and
* recording how long for the promise to resolve.
*
* The difficult bit lies in `GPUQueue.submit`. It submits a workload onto GPU and nothing else. The GPU
* will start executing the work whenever it finds appropriate. In other words, the function solely
* exists to perform side effects and side effects make everything more ugly. However, if programs do not
* perform side effects, then they're mostly useless.
*
* This begs the question, how are computation result observed in webGPU? It's actually no different than
* how many computation is observed on hardware. You check an memory address *when the computation* is
* done. If you know which buffer the result will be written to and you know the last command to
* write to that buffer has been submitted. The resolution of `mapAsync` of that buffer will indicate the
* completion of that command.
*
* The strategy above should be the most accurate without using `timestamp-query`. Sadly this requires
* building a graph of dependencies from command, bind groups, and shaders to buffers and knowing when will
* the completion of mapAsync denote the completion the series of commands. Instead, we take a simpler
* strategy but relying on `onSubmittedWorkDone`.
*
* `onSubmittedWorkDone` is somewhat special among the promise returning functions in webGPU. The standard
* guarantees two things:
* 1. The resolution of `onSubmittedWorkDone` is FIFO with respect to the order of call of `submit`
* 2. The resolution of `onSubmittedWorkDone` always guarantees the resolution of `mapAync` that touched
* the buffer
*
* Hence the strategy is as follows:
* 1. Start a timer when user submits any work onto the GPU queue
* 2. Immediately call `onSubmittedWorkDone`, without awaiting/blocking
* 3. Upon the resolution of `onSubmittedWorkDone`, stop the timer and update our tracking
* 4. Rinse and repeat.
*
* This will be transparent to the user since we start our call immediately after they submit, when they
* call onSubmittedWorkDone later, ours will go on the micro task queue before due to the ordering of
* micro task queue.
*
* @author Liang Wang, liang@distributive.network
* @date May 2023
*/
self.wrapScriptLoading({ scriptName: 'lift-webgpu' }, function liftWebGPU$$fn(protectedStorage, ring0PostMessage) {
protectedStorage.webGPUInitialization = async function webGPUInitialization()
{
// dcp-native lazy-loading for webgpu.
if (typeof initWebGPU === 'function')
{
const webgpuReady = await initWebGPU();
if (!webgpuReady)
return;
}
if ((typeof navigator === 'undefined') || !('gpu' in navigator))
return;
// Determine who owns the navigator descriptor - slightly different in native vs web workers
var navigatorOwner;
if (Object.getOwnPropertyDescriptor(globalThis, 'navigator')) // native
navigatorOwner = globalThis;
else // web worker
navigatorOwner = Object.getPrototypeOf(Object.getPrototypeOf(globalThis));
const navigatorDescriptor = Object.getOwnPropertyDescriptor(navigatorOwner, 'navigator');
const submitDescriptor = Object.getOwnPropertyDescriptor(GPUQueue.prototype, 'submit');
const GPUDescriptor = Object.getOwnPropertyDescriptor(globalThis, 'GPU');
// Fatal: globalThis.navigator OR globalThis.GPU are non-writable/configurable. This would prevent these scripts from being able
// to block access to webgpu for jobs that do not explicitly require it - allowing jobs to bypass scheduling decisions based
// on gpu availability must crash the sandbox, may want to stop the worker as well
if (!((GPUDescriptor.writable || GPUDescriptor.configurable )
&& (navigatorDescriptor.writable || navigatorDescriptor.configurable)))
{
postMessage({ request: 'unrecoverable-evaluator', message: 'webgpu exists but is not wrapable' });
close();
}
// Non-fatal: GPUQueue.prototype.submit is non-writable/configurable. This would prevent our gpu timing code from functioning
// properly, so we cannot use webGPU for this sandbox, however by writing over the navigator.gpu and globalThis.GPU symbols,
// we can fully block webGPU access, allowing the sandbox to live for CPU-compute purposes.
if (!(submitDescriptor.writable || submitDescriptor.configurable))
{
protectedStorage.forceDisableWebGPU = true;
return;
}
const TimeInterval = protectedStorage.TimeInterval;
const globalTrackers = protectedStorage.bigBrother.globalTrackers;
const webGPUTimer = globalTrackers.webGPUIntervals;
/**
* Factories to create wrappers for all webGPU function (except submit & onSubmittedWorkDone) to time them when they
* are run, recording the duration of the function calls on the webGPU timer.
*/
function webGPUAsyncTimingFactory(fn)
{
return function promiseWebGPUWrapper(...args)
{
const duration = new TimeInterval();
const original = fn.apply(this, args);
original.finally(() => webGPUTimer.push(duration.stop()));
return original;
}
}
function webGPUSyncTimingFactory(fn)
{
return function syncWebGPUWrapper(...args)
{
const duration = new TimeInterval();
const ret = fn.apply(this, args);
webGPUTimer.push(duration.stop());
return ret;
}
}
/**
* Wrap various webGPU functions such that their usage will be tracked
*
*/
function liftWebGPUPrototype(GPUClass)
{
// the standard dictates these functions will return promises
const promiseReturningFunctions = new Set([
'requestDevice',
'requestAdapterInfo',
'createComputePipelineAsync',
'createRenderPipelineAsync',
'mapAsync', // this would overestimate in some cases, a potential discussion
'getCompilationInfo',
'onSubmittedWorkDone',
'popErrorScope',
'requestAdapter',
]);
// TODO: consider what to do with 'destroy'
// while they appear to be blocking, the meat of the work happens on the gpu driver thread
const blockingFunctions = new Set([
// GPU
'getPreferedCanvasFormat',
// GPUDevice
'createBuffer',
'createTexture',
'createSampler',
'importExternalTexture',
'createBindGroupLayout',
'createPipelineLayout',
'createBindGroup',
'createShaderModule',
'createComputePipeline',
'createRenderPipeline',
'createCommandEncoder',
'createRenderBundleEncoder',
'createQuerySet',
// GPUBuffer
'getMappedRange',
'unmap',
// GPUTexture
'createView',
// GPUPipelineBase
'getBindGroupLayout',
// GPUDebugCommandsMixin
'pushDebugGroup',
'popDebugGroup',
'insertDebugWorker',
// GPUCommandEncoder
'beginRenderPass',
'beginComputePass',
'copyBufferToBuffer',
'copyBufferToTexture',
'copyTextureToBuffer',
'copyTextureToTexture',
'clearBuffer',
'writeTimestamp',
'resolveQuerySet',
'finish',
// GPUBindingsCommandMixin
'setBindGroup',
// GPUComputePassEncoder
'setPipeline',
'dispatchWorkgroups',
'dispatchWorkgroupsIndirect',
'end',
// GPURenderPassEncoder
'setViewPort',
'setScissorRect',
'setBlendConstant',
'setStencilReference',
'beginOcclusionQuery',
'endOcclusionQuery',
'executeBundles',
'end',
// GPURenderCommandsMixin
'setPipeline',
'setIndexBuffer',
'draw',
'drawIndexed',
'drawIndirect',
'drawIndexedIndirect',
// GPURenderBundleEncoder
'finish',
// GPUCanvasContext
'configure',
'unconfigure',
// GPUQueue
'writeBuffer',
'writeTexture',
'copyExternalImageToTexture',
'pushErrorScope',
]);
// Iterating through all things 'GPU' on global object, some may not be classes. Skip those without a prototype.
if (!self[GPUClass].prototype)
return;
for (let prop of Object.keys(self[GPUClass].prototype))
{
if (promiseReturningFunctions.has(prop))
{
const fn = self[GPUClass].prototype[prop];
self[GPUClass].prototype[prop] = webGPUAsyncTimingFactory(fn);
}
else if (blockingFunctions.has(prop))
{
const fn = self[GPUClass].prototype[prop];
self[GPUClass].prototype[prop] = webGPUSyncTimingFactory(fn);
}
}
}
// Want to use the submit/onSubmittedWorkDone original functions for timing.
const underlyingOnSubmittedWorkDone = GPUQueue.prototype.onSubmittedWorkDone;
const underlyingSubmit = GPUQueue.prototype.submit;
// some of them will get re-wrapped, that's fine, we always refer to the original function
const requiredWrappingGPUClasses = [
'GPU',
'GPUAdapter',
'GPUDevice',
'GPUBuffer',
'GPUTexture',
'GPUShaderModule',
'GPUComputePipeline',
'GPURenderPipeline',
'GPUCommandEncoder',
'GPUComputePassEncoder',
'GPURenderPassEncoder',
'GPURenderBundleEncoder',
'GPUQueue',
'GPUQuerySet',
'GPUCanvasContext',
];
requiredWrappingGPUClasses.forEach(liftWebGPUPrototype);
let locked = false;
const submittedDonePromises = [];
protectedStorage.webGPU = {
lock: () => { locked = true; },
unlock: () => { locked = false; },
waitAllCommandToFinish: () => { return Promise.allSettled(submittedDonePromises); },
};
// our submit keeps a global tracker of all submissions, so we can track the time of each submission
GPUQueue.prototype.submit = function submit(commandBuffers)
{
if (locked)
throw new Error('Attempted to submit webGPU queue after work function resolved');
underlyingSubmit.call(this, commandBuffers);
const submitTime = performance.now();
const submitDonePromise = underlyingOnSubmittedWorkDone.call(this).then(() => {
const idx = submittedDonePromises.indexOf(submitDonePromise);
submittedDonePromises.splice(idx);
const completedAt = performance.now();
const duration = new TimeInterval();
duration.overrideInterval(submitTime, completedAt);
webGPUTimer.push(duration);
});
submittedDonePromises.push(submitDonePromise);
}
}
});