@playcanvas/splat-transform
Version:
Library and CLI tool for 3D Gaussian splat format conversion and transformation
82 lines (81 loc) • 3.89 kB
TypeScript
/**
* WGSL sources for the 5 compute shaders that make up the GPU dilation
* pipeline (extract → clear → dilateX → dilateZ → dilateY → compact),
* plus the small block-type + Fibonacci-hash constants block that the
* extract and compact shaders share. Plain TS template-string
* composition — the constants block is interpolated into each consuming
* shader via `${dilationConstants}` rather than going through the
* engine's `#include` preprocessor.
*
* The orchestrator class lives in `gpu-dilation.ts` and imports each
* `xxxWgsl()` generator individually.
*/
/**
* Extract shader — converts a `SparseVoxelGrid` (uploaded as types + open-
* addressed mask hash) directly into a row-aligned dense bit buffer for one
* outer chunk. One thread per source block in the chunk's outer block range.
*
* For MIXED blocks the shader does Fibonacci-hash linear-probe lookup against
* the uploaded `srcKeys`/`srcLo`/`srcHi` arrays (matches the CPU
* `BlockMaskMap.slot` formula bit-for-bit). The block's 4×4 X-row pattern
* lands in a single dense word at bit offset `(blockX*4) & 31`; multiple
* blocks share the same dense word at non-overlapping bit positions, so the
* write is `atomicOr`. Caller must clear the dense buffer first.
*
* @returns WGSL source for the extract compute shader.
*/
declare const extractWgsl: () => string;
/**
* Compact shader — converts a dilated dense bit buffer back into per-block
* `(type, lo, hi)` form for the chunk's INNER block region. One thread per
* inner block; reads its 16 dense-word patterns to assemble the block's
* 64-bit mask, classifies as EMPTY/SOLID/MIXED, and writes to two parallel
* outputs:
* - `typesOut`: 2-bit-per-block packed (matches `dst.types` layout).
* Multiple threads write the same word, so atomicOr (caller clears).
* - `masksOut`: `[lo, hi]` pairs per inner block, indexed by inner-local
* block index. Always written (non-atomic; one thread per slot).
*
* @returns WGSL source for the compact compute shader.
*/
declare const compactWgsl: () => string;
/**
* Clear shader — writes 0 to every word in the destination buffer up to
* `numWords`. Dispatched in the same command encoder as the dilation passes
* so it's ordered with them on the GPU; using `queue.writeBuffer` for inter-
* pass clears would race because writes are queued separately from encoder
* commands and execute *all writes first*, then the command buffer.
*
* @returns WGSL source for the clear compute shader.
*/
declare const clearWgsl: () => string;
/**
* X-axis dilation shader — per-word.
*
* Each thread produces one 32-bit output word at `(xWord, y, z)` and writes
* it directly (no atomics). The output bit at relative X position `b` (in
* `[0, 31]`) is the OR of input bits in `[xWord*32 + b - r, xWord*32 + b + r]`.
* For each distance `d` in `[1, r]`, the shader reads the source word(s)
* containing bits shifted by `d`, so radii can span any number of 32-bit words.
*
* Bound by the chunk's `numXWords` (= ceil(nx / 32)). Out-of-bounds neighbors
* are read as 0.
*
* @returns WGSL source for the X-axis dilation compute shader.
*/
declare const dilateXWgsl: () => string;
/**
* Y/Z-axis dilation shader — per-word.
*
* Each thread reads up to `2 * halfExtent + 1` input words at the same
* `xWord` along the chosen axis (Y or Z) and OR's them into one output word.
* No bit shifts needed because words at the same `xWord` are bit-aligned
* across rows (row stride is `numXWords` words). Caller picks the axis by
* setting `stride` and `axisLen`:
* - Y-pass: `stride = numXWords`, `axisLen = ny`.
* - Z-pass: `stride = numXWords * ny`, `axisLen = nz`.
*
* @returns WGSL source for the Y/Z-axis dilation compute shader.
*/
declare const dilateYZWgsl: () => string;
export { extractWgsl, compactWgsl, clearWgsl, dilateXWgsl, dilateYZWgsl };