@thi.ng/simd
Version:
WASM based SIMD vector operations for batch processing
244 lines • 9.4 kB
TypeScript
export interface SIMD {
abs4_f32(out: number, a: number, num: number, so: number, sa: number): number;
add4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
addn4_f32(out: number, a: number, n: number, num: number, so: number, sa: number): number;
/**
* Takes three vec4 buffers, clamps `a` componentwise to `min(max(a, b),
* c)` and stores results in `out`. Both AOS / SOA layouts are
* supported, as long as all buffers are using the same layout.
*
* All strides must by multiples of 4. All pointers must be aligned to
* multiples of 16. Returns `out` pointer.
*
* Set `sb` and `sc` to 0 for clamping all `a` vectors against same
* bounds.
*
* @param out -
* @param a -
* @param b -
* @param c -
* @param num - number of vec4
* @param so - out element stride
* @param sa - A element stride
* @param sb - B element stride
* @param sc - C element stride
*/
clamp4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sb: number, sc: number): number;
clampn4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number): number;
div4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
divn4_f32(out: number, a: number, n: number, num: number, so: number, sa: number): number;
/**
* Takes two densely packed vec2 AOS buffers `a` and `b`, computes their
* 2D dot products and stores results in `out`. Computes two results per
* iteration, hence `num` must be an even number or else the last vector
* will not be processed. `so` should be 1 for packed result buffers.
*
* @remarks
* `a` and `b` should be aligned to 16, `out` to multiples of 4.
*
* @param out -
* @param a -
* @param b -
* @param num -
*/
dot2_f32_aos(out: number, a: number, b: number, num: number): number;
/**
* Takes two vec4 AOS buffers, computes their dot products and stores
* results in `out`. `so` should be 1 for a packed result buffer. `sa`
* and `sb` indicate the stride lengths (in floats) between each vector
* in each respective buffer and should be a multiple of 4.
*
* @param out -
* @param a -
* @param b -
* @param num -
* @param so -
* @param sa -
* @param sb -
*/
dot4_f32_aos(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
/**
* Takes two vec4 SOA buffers and computes their 4D dot products and
* writes results to `out`. `sa` and `sb` indicate the element
* stride size (in floats) of the respective vectors (should be
* multiple of 4). The results are always stored in a packed layout.
* Processes 4 vectors per iteration, hence `num` should be a
* multiple of 4 too.
*
* @param out -
* @param a -
* @param b -
* @param num -
* @param sa -
* @param sb -
*/
dot4_f32_soa(out: number, a: number, b: number, num: number, sa: number, sb: number): number;
/**
* Also see {@link SIMD.sqrt4_f32}
*
* @param out -
* @param a -
* @param num -
* @param so -
* @param sa -
*/
invsqrt4_f32(out: number, a: number, num: number, so: number, sa: number): number;
/**
* Takes three vec4 buffers, computes componentwise a * b + c and stores
* results in `out`. Both AOS / SOA layouts are supported, as long as
* all buffers are using the same layout.
*
* All strides must by multiples of 4. All pointers must be aligned to
* multiples of 16. Returns `out` pointer.
*
* @param out -
* @param a -
* @param b -
* @param c -
* @param num - number of vec4
* @param so - out element stride
* @param sa - A element stride
* @param sb - B element stride
* @param sc - C element stride
*/
madd4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sb: number, sc: number): number;
maddn4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sc: number): number;
mag2_f32_aos(out: number, a: number, num: number): number;
magsq2_f32_aos(out: number, a: number, num: number): number;
mag4_f32_aos(out: number, a: number, num: number, so: number, sa: number): number;
magsq4_f32_aos(out: number, a: number, num: number, so: number, sa: number): number;
max4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
min4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
mix4_f32(out: number, a: number, b: number, t: number, num: number, so: number, sa: number, sb: number, st: number): number;
mixn4_f32(out: number, a: number, b: number, t: number, num: number, so: number, sa: number, sb: number): number;
mul4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
muln4_f32(out: number, a: number, n: number, num: number, so: number, sa: number): number;
mul_m22v2_aos(out: number, mat: number, vec: number, num: number): number;
mul_m23v2_aos(out: number, mat: number, vec: number, num: number): number;
mul_m44v4_aos(out: number, mat: number, vec: number, num: number, so: number, sv: number): number;
msub4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sb: number, sc: number): number;
/**
* Takes three vec4 buffers, computes componentwise a * b - c and stores
* results in `out`. Both AOS / SOA layouts are supported, as long as
* all buffers are using the same layout.
*
* All strides must by multiples of 4. All pointers must be aligned to
* multiples of 16. Returns `out` pointer.
*
* @param out -
* @param a -
* @param b - scalar factor
* @param c -
* @param num - number of vec4
* @param so - out element stride
* @param sa - A element stride
* @param sc - C element stride
*/
msubn4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sc: number): number;
neg4_f32(out: number, a: number, num: number, so: number, sa: number): number;
normalize2_f32_aos(out: number, a: number, num: number, norm: number): number;
normalize4_f32_aos(out: number, a: number, num: number, norm: number, so: number, sa: number): number;
/**
* Sets a single float vector lane `id` in `num` items from `addr`,
* spaced by `stride`. `id` is used as start offset for `addr`. Both
* `id` and `stride` are in floats, not bytes. Returns `addr`.
*
* ```ts
* // see README for simd initialization
*
* // set Y component in AOS vec4 buffer from addr 0x1000
* simd.set_lane_f32(0x1000, 1, 1.23, 4, 4)
* ```
*
* @param addr -
* @param id -
* @param x -
* @param num -
* @param stride -
*/
set_lane_f32(addr: number, id: number, x: number, num: number, stride: number): number;
/**
* Also see {@link SIMD.invsqrt4_f32}
*
* @param out -
* @param a -
* @param num -
* @param so -
* @param sa -
*/
sqrt4_f32(out: number, a: number, num: number, so: number, sa: number): number;
sub4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;
subn4_f32(out: number, a: number, n: number, num: number, so: number, sa: number): number;
sum4_f32(a: number, num: number, sa: number): number;
/**
* Swaps, reorders or replaces vector components in an AOS f32/u32 vec4
* buffer. The `x`,`y`,`z`,`w` args indicate the intended lane values
* (each 0-3).
*
* @example
* ```ts
* // see README for simd initialization
*
* simd.f32.set([10, 20, 30, 40], 0)
* simd.swizzle4_f32(
* 16, // dest ptr
* 0, // src ptr
* 3, 0, 1, 2, // lane IDs
* 1, // num vectors
* 4, // output stride (in f32/u32)
* 4 // input stride
* )
*
* simd.f32.slice(4, 8)
* // [40, 10, 20, 30]
* ```
*
* @param out -
* @param a -
* @param x -
* @param y -
* @param z -
* @param w -
* @param num -
* @param so -
* @param sa -
*/
swizzle4_32_aos(out: number, a: number, x: number, y: number, z: number, w: number, num: number, so: number, sa: number): number;
/**
* WASM memory instance given to `init()`.
*/
memory: WebAssembly.Memory;
/**
* Float64 view of WASM memory.
*/
f64: Float64Array;
/**
* Float32 view of WASM memory.
*/
f32: Float32Array;
/**
* Uint32 view of WASM memory.
*/
u32: Uint32Array;
/**
* Int32 view of WASM memory.
*/
i32: Int32Array;
/**
* Uint16 of WASM memory.
*/
u16: Uint16Array;
/**
* Int16 view of WASM memory.
*/
i16: Int16Array;
/**
* Uint8 view of WASM memory.
*/
u8: Uint8Array;
/**
* Int8 view of WASM memory.
*/
i8: Int8Array;
}
//# sourceMappingURL=api.d.ts.map