@playcanvas/splat-transform
Version:
Library and CLI tool for 3D Gaussian splat format conversion and transformation
242 lines (241 loc) • 10.5 kB
TypeScript
import { GraphicsDevice } from 'playcanvas';
import { type Projection } from '../render/camera';
/**
* Configuration for a `GpuSplatRasterizer`. Fixed across the lifetime of
* a render — `numSHBands` and the group tile dimensions determine GPU
* buffer sizes and shader uniform layouts.
*
* Sizes are expressed as a "group" tile rectangle (`groupTilesX ×
* groupTilesY`). For a single-pass render the group covers the whole
* image, so the buffers are exactly image-sized. The group abstraction
* is retained as a hook for future subframe splitting (each subframe is
* an independent group sharing the global depth sort) — the project
* shader's group-AABB cull and group-pixel-origin uniforms still
* exercise this code path.
*/
interface SplatRasterizerOptions {
/** Number of SH bands above DC (0–3). Determines input stride. */
numSHBands: 0 | 1 | 2 | 3;
/**
* Camera projection mode. Specializes the project, emit-pairs and
* rasterize-binned shaders. `pinhole` (default) uses the classical
* perspective + EWA Jacobian path; `equirect` uses spherical
* (atan2/asin) screen mapping, a non-linear Jacobian, radial view
* depth, and tile-bin / rasterize paths that wrap the X axis at the
* ±π longitude seam.
*/
projection: Projection;
/** Tiles per group along X (≤ imageTilesX). Sizes runningState/output. */
groupTilesX: number;
/** Tiles per group along Y (≤ imageTilesY). Sizes runningState/output. */
groupTilesY: number;
/** Max gaussians per chunk; sizes the input + projection + pair buffers. */
chunkCap: number;
/**
* Hard upper bound on per-splat tile coverage. The project shader
* clamps `coverage[i] = min(rawBboxArea, maxCoveragePerSplat)`, so
* the pair buffer is bounded by `chunkCap × maxCoveragePerSplat`
* regardless of scene/screen size. If the cap ever bites, the
* emit-pairs shader walks the bbox row-major and stops once it
* has written `coverage[i]` pairs — i.e. it truncates the bbox at
* its bottom-right corner.
*
* The orchestrator sets this to the group's full tile area so the
* clamp is geometrically unreachable (any in-group bbox ≤ group
* area ≤ cap), making truncation a non-issue in practice. The cap
* is retained as a defensive ceiling on the pair buffer.
*/
maxCoveragePerSplat: number;
/** Output image width in pixels (constant per render). */
imageWidth: number;
/** Output image height in pixels (constant per render). */
imageHeight: number;
/** Near plane distance in world units. */
near: number;
/** Camera basis: rows are (right, down, forward) of the world→camera rotation. */
rightX: number;
rightY: number;
rightZ: number;
downX: number;
downY: number;
downZ: number;
forwardX: number;
forwardY: number;
forwardZ: number;
/** Camera eye position in world space. */
eyeX: number;
eyeY: number;
eyeZ: number;
/** Focal lengths in pixel units. */
focalX: number;
focalY: number;
/**
* Camera-space Z of the focus plane, world units. Pinhole-only;
* unused when `projection === 'equirect'`.
*/
focusDistance: number;
/**
* DoF strength as a pixel-space scalar: the CoC radius in pixels when
* `|1 − focusDistance/cz| = 1`. `0` disables defocus. The writer
* derives this from `--f-stop` + `--sensor-size` using the thin-lens
* CoC formula. Pinhole-only.
*/
apertureScale: number;
/** RGBA background, each channel in [0, 1]. */
bgR: number;
bgG: number;
bgB: number;
bgA: number;
}
/**
* GPU-accelerated splat rasterizer.
*
* Owns eight compute shaders — project, prefix-sum, emit-pairs,
* prepare-indirect, init-tile-offsets, find-boundaries, rasterize-binned,
* finalize-pack — a shared `ComputeRadixSort` (used in indirect mode,
* key + value), and GPU buffers. The per-chunk pipeline is fully
* GPU-resident: the caller never reads back coverage, sorted keys, or
* tile offsets.
*
* Per-render flow:
* 1. `beginGroup(...)` — clears the running state and sets uniforms
* for this group (covers the whole image for a single-pass render).
* 2. For each chunk of depth-sorted splats: `dispatchChunk(data,
* chunkSize)` runs the whole tile-bin + rasterize pipeline in one
* submission — project + coverage → prefix-sum (writes emitOffsets
* + totalPairs) → emit-pairs → prepare-indirect → radix sortIndirect
* → init-tile-offsets → find-boundaries → rasterize-binned. No
* readbacks; one `submit()` per chunk to capture each compute's
* uniform state before the next chunk overwrites it.
* 3. `finishGroup()` — dispatches finalize-pack and starts an async
* readback. Returns a `Promise<Uint8Array>` resolved when the GPU has
* finished writing this group's RGBA bytes.
*/
declare class GpuSplatRasterizer {
private device;
private options;
private projectShader;
private prefixSumShader;
private emitPairsShader;
private prepareIndirectShader;
private initTileOffsetsShader;
private findBoundariesShader;
private rasterizeBinnedShader;
private finalizeShader;
private projectBgFormat;
private prefixSumBgFormat;
private emitPairsBgFormat;
private prepareIndirectBgFormat;
private initTileOffsetsBgFormat;
private findBoundariesBgFormat;
private rasterizeBinnedBgFormat;
private finalizeBgFormat;
private buffers;
/**
* Single shared `ComputeRadixSort` for the GPU tile-bin pipeline.
* Used in key+value mode: tile-index keys + splat-index values.
*/
private radixSort;
/** sortIndirect numBits, derived from numTiles (multiple of 4). */
private sortKeyBits;
private clearStatePattern;
/** Active group's tile dimensions, set by `beginGroup`. */
private activeTilesX;
private activeTilesY;
/** Floats per gaussian in the input buffer (depends on SH band count). */
readonly inputStride: number;
/** Group tile dimensions (X). */
readonly groupTilesX: number;
/** Group tile dimensions (Y). */
readonly groupTilesY: number;
/** Max gaussians per chunk. */
readonly chunkCap: number;
/** Pixels per group axis (X). */
readonly groupPixelW: number;
/** Pixels per group axis (Y). */
readonly groupPixelH: number;
constructor(device: GraphicsDevice, options: SplatRasterizerOptions);
/**
* Apply the global (camera + image + background) uniforms to every
* pipeline compute instance, plus the per-group origin/extent fields.
*
* The group abstraction is retained as a hook for future subframe
* rendering — when a render is split into multiple groups, each call
* sets the current group's pixel rectangle so the project shader's
* AABB cull skips splats outside the group.
*
* @param groupX - Group index along X.
* @param groupY - Group index along Y.
* @param groupTilesX - Number of tiles in this group along X.
* @param groupTilesY - Number of tiles in this group along Y.
*/
private setUniforms;
/**
* Begin processing a group. Clears running state and sets uniforms.
*
* @param groupX - Group index along X.
* @param groupY - Group index along Y.
* @param groupTilesX - Number of tiles in this group along X.
* @param groupTilesY - Number of tiles in this group along Y.
*/
beginGroup(groupX: number, groupY: number, groupTilesX: number, groupTilesY: number): void;
/**
* Commit pending GPU work. Called at chunk boundaries so each chunk's
* uniform-buffer values are captured before the next chunk overwrites
* them — a `Compute` instance's persistent uniform buffer is updated
* by `setParameter`, and the dispatch only captures the value on
* submit. Within a chunk, every dispatch uses a distinct `Compute`
* instance, so no internal submits are needed.
*/
submit(): void;
/**
* Reserve a fresh sort + find-boundaries slot pair in the device's
* indirect-dispatch buffer for this chunk. The returned indices are
* consumed by `dispatchTileBinChunk` (internally) and exposed for
* cross-cutting use (e.g. the radix sort needs the sort slot).
*
* @returns Two fresh slot indices in the device's indirect dispatch
* buffer: one for the radix sort's indirect dispatch, one for the
* find-boundaries indirect dispatch.
*/
private acquireIndirectSlots;
/**
* Dispatch the entire per-chunk tile-bin + rasterize pipeline on the
* GPU with zero CPU readbacks:
*
* pack-and-upload → project + coverage → prefix-sum (writes
* emitOffsets + totalPairs) → emit-pairs (writes tileKeys +
* splatValues) → prepare-indirect (writes workgroup counts into
* the device's indirect-dispatch buffer for the sort and
* find-boundaries) → radix sortIndirect (key+value: tileKeys
* sorted, splatValues reordered) → init tile-offsets to sentinel
* → find-boundaries (atomicMin) → rasterize.
*
* All eight dispatches use distinct `Compute` instances, so their
* persistent uniform buffers don't alias each other within a chunk;
* a single `submit()` after the rasterize captures everything before
* the next chunk starts overwriting `setParameter` values.
*
* @param chunkData - Float32Array containing `chunkSize × inputStride` floats.
* @param chunkSize - Number of gaussians in this chunk (≤ chunkCap).
*/
dispatchChunk(chunkData: Float32Array, chunkSize: number): void;
/**
* Finish processing a group. Dispatches finalize-pack and starts an
* async readback of the group's RGBA8 pixel bytes.
*
* Dispatch + readback are sized to the ACTIVE group dimensions (set
* by the most recent `beginGroup`), not the constructor-provided
* maximum, so edge sub-frames smaller than the max don't pay for
* unused workgroups or readback bytes.
*
* @returns Promise resolving to the active group's RGBA byte buffer
* (`activeTilesX·16 × activeTilesY·16 × 4` bytes).
*/
finishGroup(): Promise<Uint8Array>;
/**
* Release all GPU resources.
*/
destroy(): void;
}
export { GpuSplatRasterizer, type SplatRasterizerOptions };