UNPKG

gpu.js

Version:

GPU Accelerated JavaScript

496 lines (427 loc) 12.1 kB
// language=GLSL const fragmentShader = `__HEADER__; __FLOAT_TACTIC_DECLARATION__; __INT_TACTIC_DECLARATION__; __SAMPLER_2D_TACTIC_DECLARATION__; const int LOOP_MAX = __LOOP_MAX__; __PLUGINS__; __CONSTANTS__; varying vec2 vTexCoord; float acosh(float x) { return log(x + sqrt(x * x - 1.0)); } float sinh(float x) { return (pow(${Math.E}, x) - pow(${Math.E}, -x)) / 2.0; } float asinh(float x) { return log(x + sqrt(x * x + 1.0)); } float atan2(float v1, float v2) { if (v1 == 0.0 || v2 == 0.0) return 0.0; return atan(v1 / v2); } float atanh(float x) { x = (x + 1.0) / (x - 1.0); if (x < 0.0) { return 0.5 * log(-x); } return 0.5 * log(x); } float cbrt(float x) { if (x >= 0.0) { return pow(x, 1.0 / 3.0); } else { return -pow(x, 1.0 / 3.0); } } float cosh(float x) { return (pow(${Math.E}, x) + pow(${Math.E}, -x)) / 2.0; } float expm1(float x) { return pow(${Math.E}, x) - 1.0; } float fround(highp float x) { return x; } float imul(float v1, float v2) { return float(int(v1) * int(v2)); } float log10(float x) { return log2(x) * (1.0 / log2(10.0)); } float log1p(float x) { return log(1.0 + x); } float _pow(float v1, float v2) { if (v2 == 0.0) return 1.0; return pow(v1, v2); } float tanh(float x) { float e = exp(2.0 * x); return (e - 1.0) / (e + 1.0); } float trunc(float x) { if (x >= 0.0) { return floor(x); } else { return ceil(x); } } vec4 _round(vec4 x) { return floor(x + 0.5); } float _round(float x) { return floor(x + 0.5); } const int BIT_COUNT = 32; int modi(int x, int y) { return x - y * (x / y); } int bitwiseOr(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BIT_COUNT; i++) { if ((modi(a, 2) == 1) || (modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if(!(a > 0 || b > 0)) { break; } } return result; } int bitwiseXOR(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BIT_COUNT; i++) { if ((modi(a, 2) == 1) != (modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if(!(a > 0 || b > 0)) { break; } } return result; } int bitwiseAnd(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BIT_COUNT; i++) { if ((modi(a, 2) == 1) && (modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if(!(a > 0 && b > 0)) { break; } } return result; } int bitwiseNot(int a) { int result = 0; int n = 1; for (int i = 0; i < BIT_COUNT; i++) { if (modi(a, 2) == 0) { result += n; } a = a / 2; n = n * 2; } return result; } int bitwiseZeroFillLeftShift(int n, int shift) { int maxBytes = BIT_COUNT; for (int i = 0; i < BIT_COUNT; i++) { if (maxBytes >= n) { break; } maxBytes *= 2; } for (int i = 0; i < BIT_COUNT; i++) { if (i >= shift) { break; } n *= 2; } int result = 0; int byteVal = 1; for (int i = 0; i < BIT_COUNT; i++) { if (i >= maxBytes) break; if (modi(n, 2) > 0) { result += byteVal; } n = int(n / 2); byteVal *= 2; } return result; } int bitwiseSignedRightShift(int num, int shifts) { return int(floor(float(num) / pow(2.0, float(shifts)))); } int bitwiseZeroFillRightShift(int n, int shift) { int maxBytes = BIT_COUNT; for (int i = 0; i < BIT_COUNT; i++) { if (maxBytes >= n) { break; } maxBytes *= 2; } for (int i = 0; i < BIT_COUNT; i++) { if (i >= shift) { break; } n /= 2; } int result = 0; int byteVal = 1; for (int i = 0; i < BIT_COUNT; i++) { if (i >= maxBytes) break; if (modi(n, 2) > 0) { result += byteVal; } n = int(n / 2); byteVal *= 2; } return result; } vec2 integerMod(vec2 x, float y) { vec2 res = floor(mod(x, y)); return res * step(1.0 - floor(y), -res); } vec3 integerMod(vec3 x, float y) { vec3 res = floor(mod(x, y)); return res * step(1.0 - floor(y), -res); } vec4 integerMod(vec4 x, vec4 y) { vec4 res = floor(mod(x, y)); return res * step(1.0 - floor(y), -res); } float integerMod(float x, float y) { float res = floor(mod(x, y)); return res * (res > floor(y) - 1.0 ? 0.0 : 1.0); } int integerMod(int x, int y) { return x - (y * int(x / y)); } __DIVIDE_WITH_INTEGER_CHECK__; // Here be dragons! // DO NOT OPTIMIZE THIS CODE // YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE // LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME const vec2 MAGIC_VEC = vec2(1.0, -256.0); const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0); const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536 float decode32(vec4 texel) { __DECODE32_ENDIANNESS__; texel *= 255.0; vec2 gte128; gte128.x = texel.b >= 128.0 ? 1.0 : 0.0; gte128.y = texel.a >= 128.0 ? 1.0 : 0.0; float exponent = 2.0 * texel.a - 127.0 + dot(gte128, MAGIC_VEC); float res = exp2(_round(exponent)); texel.b = texel.b - 128.0 * gte128.x; res = dot(texel, SCALE_FACTOR) * exp2(_round(exponent-23.0)) + res; res *= gte128.y * -2.0 + 1.0; return res; } float decode16(vec4 texel, int index) { int channel = integerMod(index, 2); if (channel == 0) return texel.r * 255.0 + texel.g * 65280.0; if (channel == 1) return texel.b * 255.0 + texel.a * 65280.0; return 0.0; } float decode8(vec4 texel, int index) { int channel = integerMod(index, 4); if (channel == 0) return texel.r * 255.0; if (channel == 1) return texel.g * 255.0; if (channel == 2) return texel.b * 255.0; if (channel == 3) return texel.a * 255.0; return 0.0; } vec4 legacyEncode32(float f) { float F = abs(f); float sign = f < 0.0 ? 1.0 : 0.0; float exponent = floor(log2(F)); float mantissa = (exp2(-exponent) * F); // exponent += floor(log2(mantissa)); vec4 texel = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV; texel.rg = integerMod(texel.rg, 256.0); texel.b = integerMod(texel.b, 128.0); texel.a = exponent*0.5 + 63.5; texel.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0; texel = floor(texel); texel *= 0.003921569; // 1/255 __ENCODE32_ENDIANNESS__; return texel; } // https://github.com/gpujs/gpu.js/wiki/Encoder-details vec4 encode32(float value) { if (value == 0.0) return vec4(0, 0, 0, 0); float exponent; float mantissa; vec4 result; float sgn; sgn = step(0.0, -value); value = abs(value); exponent = floor(log2(value)); mantissa = value*pow(2.0, -exponent)-1.0; exponent = exponent+127.0; result = vec4(0,0,0,0); result.a = floor(exponent/2.0); exponent = exponent - result.a*2.0; result.a = result.a + 128.0*sgn; result.b = floor(mantissa * 128.0); mantissa = mantissa - result.b / 128.0; result.b = result.b + exponent*128.0; result.g = floor(mantissa*32768.0); mantissa = mantissa - result.g/32768.0; result.r = floor(mantissa*8388608.0); return result/255.0; } // Dragons end here int index; ivec3 threadId; ivec3 indexTo3D(int idx, ivec3 texDim) { int z = int(idx / (texDim.x * texDim.y)); idx -= z * int(texDim.x * texDim.y); int y = int(idx / texDim.x); int x = int(integerMod(idx, texDim.x)); return ivec3(x, y, z); } float get32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int w = texSize.x; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize)); return decode32(texel); } float get16(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int w = texSize.x * 2; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize.x * 2, texSize.y)); return decode16(texel, index); } float get8(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int w = texSize.x * 4; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize.x * 4, texSize.y)); return decode8(texel, index); } float getMemoryOptimized32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int channel = integerMod(index, 4); index = index / 4; int w = texSize.x; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize)); if (channel == 0) return texel.r; if (channel == 1) return texel.g; if (channel == 2) return texel.b; if (channel == 3) return texel.a; return 0.0; } vec4 getImage2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int w = texSize.x; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; return texture2D(tex, st / vec2(texSize)); } float getFloatFromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { vec4 result = getImage2D(tex, texSize, texDim, z, y, x); return result[0]; } vec2 getVec2FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { vec4 result = getImage2D(tex, texSize, texDim, z, y, x); return vec2(result[0], result[1]); } vec2 getMemoryOptimizedVec2(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + (texDim.x * (y + (texDim.y * z))); int channel = integerMod(index, 2); index = index / 2; int w = texSize.x; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize)); if (channel == 0) return vec2(texel.r, texel.g); if (channel == 1) return vec2(texel.b, texel.a); return vec2(0.0, 0.0); } vec3 getVec3FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { vec4 result = getImage2D(tex, texSize, texDim, z, y, x); return vec3(result[0], result[1], result[2]); } vec3 getMemoryOptimizedVec3(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int fieldIndex = 3 * (x + texDim.x * (y + texDim.y * z)); int vectorIndex = fieldIndex / 4; int vectorOffset = fieldIndex - vectorIndex * 4; int readY = vectorIndex / texSize.x; int readX = vectorIndex - readY * texSize.x; vec4 tex1 = texture2D(tex, (vec2(readX, readY) + 0.5) / vec2(texSize)); if (vectorOffset == 0) { return tex1.xyz; } else if (vectorOffset == 1) { return tex1.yzw; } else { readX++; if (readX >= texSize.x) { readX = 0; readY++; } vec4 tex2 = texture2D(tex, vec2(readX, readY) / vec2(texSize)); if (vectorOffset == 2) { return vec3(tex1.z, tex1.w, tex2.x); } else { return vec3(tex1.w, tex2.x, tex2.y); } } } vec4 getVec4FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { return getImage2D(tex, texSize, texDim, z, y, x); } vec4 getMemoryOptimizedVec4(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { int index = x + texDim.x * (y + texDim.y * z); int channel = integerMod(index, 2); int w = texSize.x; vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; vec4 texel = texture2D(tex, st / vec2(texSize)); return vec4(texel.r, texel.g, texel.b, texel.a); } vec4 actualColor; void color(float r, float g, float b, float a) { actualColor = vec4(r,g,b,a); } void color(float r, float g, float b) { color(r,g,b,1.0); } void color(sampler2D image) { actualColor = texture2D(image, vTexCoord); } float modulo(float number, float divisor) { if (number < 0.0) { number = abs(number); if (divisor < 0.0) { divisor = abs(divisor); } return -mod(number, divisor); } if (divisor < 0.0) { divisor = abs(divisor); } return mod(number, divisor); } __INJECTED_NATIVE__; __MAIN_CONSTANTS__; __MAIN_ARGUMENTS__; __KERNEL__; void main(void) { index = int(vTexCoord.s * float(uTexSize.x)) + int(vTexCoord.t * float(uTexSize.y)) * uTexSize.x; __MAIN_RESULT__; }`; module.exports = { fragmentShader };