@playcanvas/splat-transform

import { GraphicsDevice } from 'playcanvas'; import { type Projection } from '../render/camera'; /** * Configuration for a `GpuSplatRasterizer`. Fixed across the lifetime of * a render — `numSHBands` and the group tile dimensions determine GPU * buffer sizes and shader uniform layouts. * * Sizes are expressed as a "group" tile rectangle (`groupTilesX × * groupTilesY`). For a single-pass render the group covers the whole * image, so the buffers are exactly image-sized. The group abstraction * is retained as a hook for future subframe splitting (each subframe is * an independent group sharing the global depth sort) — the project * shader's group-AABB cull and group-pixel-origin uniforms still * exercise this code path. */ interface SplatRasterizerOptions { /** Number of SH bands above DC (0–3). Determines input stride. */ numSHBands: 0 | 1 | 2 | 3; /** * Camera projection mode. Specializes the project, emit-pairs and * rasterize-binned shaders. `pinhole` (default) uses the classical * perspective + EWA Jacobian path; `equirect` uses spherical * (atan2/asin) screen mapping, a non-linear Jacobian, radial view * depth, and tile-bin / rasterize paths that wrap the X axis at the * ±π longitude seam. */ projection: Projection; /** Tiles per group along X (≤ imageTilesX). Sizes runningState/output. */ groupTilesX: number; /** Tiles per group along Y (≤ imageTilesY). Sizes runningState/output. */ groupTilesY: number; /** Max gaussians per chunk; sizes the input + projection + pair buffers. */ chunkCap: number; /** * Hard upper bound on per-splat tile coverage. The project shader * clamps `coverage[i] = min(rawBboxArea, maxCoveragePerSplat)`, so * the pair buffer is bounded by `chunkCap × maxCoveragePerSplat` * regardless of scene/screen size. If the cap ever bites, the * emit-pairs shader walks the bbox row-major and stops once it * has written `coverage[i]` pairs — i.e. it truncates the bbox at * its bottom-right corner. * * The orchestrator sets this to the group's full tile area so the * clamp is geometrically unreachable (any in-group bbox ≤ group * area ≤ cap), making truncation a non-issue in practice. The cap * is retained as a defensive ceiling on the pair buffer. */ maxCoveragePerSplat: number; /** Output image width in pixels (constant per render). */ imageWidth: number; /** Output image height in pixels (constant per render). */ imageHeight: number; /** Near plane distance in world units. */ near: number; /** Camera basis: rows are (right, down, forward) of the world→camera rotation. */ rightX: number; rightY: number; rightZ: number; downX: number; downY: number; downZ: number; forwardX: number; forwardY: number; forwardZ: number; /** Camera eye position in world space. */ eyeX: number; eyeY: number; eyeZ: number; /** Focal lengths in pixel units. */ focalX: number; focalY: number; /** * Camera-space Z of the focus plane, world units. Pinhole-only; * unused when `projection === 'equirect'`. */ focusDistance: number; /** * DoF strength as a pixel-space scalar: the CoC radius in pixels when * `|1 − focusDistance/cz| = 1`. `0` disables defocus. The writer * derives this from `--f-stop` + `--sensor-size` using the thin-lens * CoC formula. Pinhole-only. */ apertureScale: number; /** RGBA background, each channel in [0, 1]. */ bgR: number; bgG: number; bgB: number; bgA: number; } /** * GPU-accelerated splat rasterizer. * * Owns eight compute shaders — project, prefix-sum, emit-pairs, * prepare-indirect, init-tile-offsets, find-boundaries, rasterize-binned, * finalize-pack — a shared `ComputeRadixSort` (used in indirect mode, * key + value), and GPU buffers. The per-chunk pipeline is fully * GPU-resident: the caller never reads back coverage, sorted keys, or * tile offsets. * * Per-render flow: * 1. `beginGroup(...)` — clears the running state and sets uniforms * for this group (covers the whole image for a single-pass render). * 2. For each chunk of depth-sorted splats: `dispatchChunk(data, * chunkSize)` runs the whole tile-bin + rasterize pipeline in one * submission — project + coverage → prefix-sum (writes emitOffsets * + totalPairs) → emit-pairs → prepare-indirect → radix sortIndirect * → init-tile-offsets → find-boundaries → rasterize-binned. No * readbacks; one `submit()` per chunk to capture each compute's * uniform state before the next chunk overwrites it. * 3. `finishGroup()` — dispatches finalize-pack and starts an async * readback. Returns a `Promise<Uint8Array>` resolved when the GPU has * finished writing this group's RGBA bytes. */ declare class GpuSplatRasterizer { private device; private options; private projectShader; private prefixSumShader; private emitPairsShader; private prepareIndirectShader; private initTileOffsetsShader; private findBoundariesShader; private rasterizeBinnedShader; private finalizeShader; private projectBgFormat; private prefixSumBgFormat; private emitPairsBgFormat; private prepareIndirectBgFormat; private initTileOffsetsBgFormat; private findBoundariesBgFormat; private rasterizeBinnedBgFormat; private finalizeBgFormat; private buffers; /** * Single shared `ComputeRadixSort` for the GPU tile-bin pipeline. * Used in key+value mode: tile-index keys + splat-index values. */ private radixSort; /** sortIndirect numBits, derived from numTiles (multiple of 4). */ private sortKeyBits; private clearStatePattern; /** Active group's tile dimensions, set by `beginGroup`. */ private activeTilesX; private activeTilesY; /** Floats per gaussian in the input buffer (depends on SH band count). */ readonly inputStride: number; /** Group tile dimensions (X). */ readonly groupTilesX: number; /** Group tile dimensions (Y). */ readonly groupTilesY: number; /** Max gaussians per chunk. */ readonly chunkCap: number; /** Pixels per group axis (X). */ readonly groupPixelW: number; /** Pixels per group axis (Y). */ readonly groupPixelH: number; constructor(device: GraphicsDevice, options: SplatRasterizerOptions); /** * Apply the global (camera + image + background) uniforms to every * pipeline compute instance, plus the per-group origin/extent fields. * * The group abstraction is retained as a hook for future subframe * rendering — when a render is split into multiple groups, each call * sets the current group's pixel rectangle so the project shader's * AABB cull skips splats outside the group. * * @param groupX - Group index along X. * @param groupY - Group index along Y. * @param groupTilesX - Number of tiles in this group along X. * @param groupTilesY - Number of tiles in this group along Y. */ private setUniforms; /** * Begin processing a group. Clears running state and sets uniforms. * * @param groupX - Group index along X. * @param groupY - Group index along Y. * @param groupTilesX - Number of tiles in this group along X. * @param groupTilesY - Number of tiles in this group along Y. */ beginGroup(groupX: number, groupY: number, groupTilesX: number, groupTilesY: number): void; /** * Commit pending GPU work. Called at chunk boundaries so each chunk's * uniform-buffer values are captured before the next chunk overwrites * them — a `Compute` instance's persistent uniform buffer is updated * by `setParameter`, and the dispatch only captures the value on * submit. Within a chunk, every dispatch uses a distinct `Compute` * instance, so no internal submits are needed. */ submit(): void; /** * Reserve a fresh sort + find-boundaries slot pair in the device's * indirect-dispatch buffer for this chunk. The returned indices are * consumed by `dispatchTileBinChunk` (internally) and exposed for * cross-cutting use (e.g. the radix sort needs the sort slot). * * @returns Two fresh slot indices in the device's indirect dispatch * buffer: one for the radix sort's indirect dispatch, one for the * find-boundaries indirect dispatch. */ private acquireIndirectSlots; /** * Dispatch the entire per-chunk tile-bin + rasterize pipeline on the * GPU with zero CPU readbacks: * * pack-and-upload → project + coverage → prefix-sum (writes * emitOffsets + totalPairs) → emit-pairs (writes tileKeys + * splatValues) → prepare-indirect (writes workgroup counts into * the device's indirect-dispatch buffer for the sort and * find-boundaries) → radix sortIndirect (key+value: tileKeys * sorted, splatValues reordered) → init tile-offsets to sentinel * → find-boundaries (atomicMin) → rasterize. * * All eight dispatches use distinct `Compute` instances, so their * persistent uniform buffers don't alias each other within a chunk; * a single `submit()` after the rasterize captures everything before * the next chunk starts overwriting `setParameter` values. * * @param chunkData - Float32Array containing `chunkSize × inputStride` floats. * @param chunkSize - Number of gaussians in this chunk (≤ chunkCap). */ dispatchChunk(chunkData: Float32Array, chunkSize: number): void; /** * Finish processing a group. Dispatches finalize-pack and starts an * async readback of the group's RGBA8 pixel bytes. * * Dispatch + readback are sized to the ACTIVE group dimensions (set * by the most recent `beginGroup`), not the constructor-provided * maximum, so edge sub-frames smaller than the max don't pay for * unused workgroups or readback bytes. * * @returns Promise resolving to the active group's RGBA byte buffer * (`activeTilesX·16 × activeTilesY·16 × 4` bytes). */ finishGroup(): Promise<Uint8Array>; /** * Release all GPU resources. */ destroy(): void; } export { GpuSplatRasterizer, type SplatRasterizerOptions };