@playcanvas/splat-transform

Version:

Library and CLI tool for 3D Gaussian splat format conversion and transformation

82 lines (81 loc) • 3.89 kB

TypeScript

/** * WGSL sources for the 5 compute shaders that make up the GPU dilation * pipeline (extract → clear → dilateX → dilateZ → dilateY → compact), * plus the small block-type + Fibonacci-hash constants block that the * extract and compact shaders share. Plain TS template-string * composition — the constants block is interpolated into each consuming * shader via `${dilationConstants}` rather than going through the * engine's `#include` preprocessor. * * The orchestrator class lives in `gpu-dilation.ts` and imports each * `xxxWgsl()` generator individually. */ /** * Extract shader — converts a `SparseVoxelGrid` (uploaded as types + open- * addressed mask hash) directly into a row-aligned dense bit buffer for one * outer chunk. One thread per source block in the chunk's outer block range. * * For MIXED blocks the shader does Fibonacci-hash linear-probe lookup against * the uploaded `srcKeys`/`srcLo`/`srcHi` arrays (matches the CPU * `BlockMaskMap.slot` formula bit-for-bit). The block's 4×4 X-row pattern * lands in a single dense word at bit offset `(blockX*4) & 31`; multiple * blocks share the same dense word at non-overlapping bit positions, so the * write is `atomicOr`. Caller must clear the dense buffer first. * * @returns WGSL source for the extract compute shader. */ declare const extractWgsl: () => string; /** * Compact shader — converts a dilated dense bit buffer back into per-block * `(type, lo, hi)` form for the chunk's INNER block region. One thread per * inner block; reads its 16 dense-word patterns to assemble the block's * 64-bit mask, classifies as EMPTY/SOLID/MIXED, and writes to two parallel * outputs: * - `typesOut`: 2-bit-per-block packed (matches `dst.types` layout). * Multiple threads write the same word, so atomicOr (caller clears). * - `masksOut`: `[lo, hi]` pairs per inner block, indexed by inner-local * block index. Always written (non-atomic; one thread per slot). * * @returns WGSL source for the compact compute shader. */ declare const compactWgsl: () => string; /** * Clear shader — writes 0 to every word in the destination buffer up to * `numWords`. Dispatched in the same command encoder as the dilation passes * so it's ordered with them on the GPU; using `queue.writeBuffer` for inter- * pass clears would race because writes are queued separately from encoder * commands and execute *all writes first*, then the command buffer. * * @returns WGSL source for the clear compute shader. */ declare const clearWgsl: () => string; /** * X-axis dilation shader — per-word. * * Each thread produces one 32-bit output word at `(xWord, y, z)` and writes * it directly (no atomics). The output bit at relative X position `b` (in * `[0, 31]`) is the OR of input bits in `[xWord*32 + b - r, xWord*32 + b + r]`. * For each distance `d` in `[1, r]`, the shader reads the source word(s) * containing bits shifted by `d`, so radii can span any number of 32-bit words. * * Bound by the chunk's `numXWords` (= ceil(nx / 32)). Out-of-bounds neighbors * are read as 0. * * @returns WGSL source for the X-axis dilation compute shader. */ declare const dilateXWgsl: () => string; /** * Y/Z-axis dilation shader — per-word. * * Each thread reads up to `2 * halfExtent + 1` input words at the same * `xWord` along the chosen axis (Y or Z) and OR's them into one output word. * No bit shifts needed because words at the same `xWord` are bit-aligned * across rows (row stride is `numXWords` words). Caller picks the axis by * setting `stride` and `axisLen`: * - Y-pass: `stride = numXWords`, `axisLen = ny`. * - Z-pass: `stride = numXWords * ny`, `axisLen = nz`. * * @returns WGSL source for the Y/Z-axis dilation compute shader. */ declare const dilateYZWgsl: () => string; export { extractWgsl, compactWgsl, clearWgsl, dilateXWgsl, dilateYZWgsl };