summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntti Määttä <antti.maatta@qt.io>2019-12-11 13:27:07 +0200
committerAntti Määttä <antti.maatta@qt.io>2019-12-13 11:14:49 +0200
commite363af8a8b660f0268c4b649963d8bd60c393d84 (patch)
tree26e8670f55af17a13c3fde4ad23023b6fd408eb3
parent7846b24ed4c2e753f5bc6bf5fc72e506ce4373f9 (diff)
Support RGBE hdr images directly
Do not convert RGBE images to 16-bit floating point, but use then directly in the shaders. Task-number: QT3DS-4031 Change-Id: Iecfc775247553d5fd8976c77c572435462b79e64 Reviewed-by: Janne Kangas <janne.kangas@qt.io> Reviewed-by: Tomi Korpipää <tomi.korpipaa@qt.io>
-rw-r--r--res/effectlib/gles2/sampleProbe.glsllib224
-rw-r--r--res/effectlib/sampleProbe.glsllib224
-rw-r--r--src/render/Qt3DSRenderBaseTypes.h179
-rw-r--r--src/render/Qt3DSRenderTexture2D.cpp10
-rw-r--r--src/render/backends/gl/Qt3DSOpenGLUtil.h2
-rw-r--r--src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp10
-rw-r--r--src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp39
-rw-r--r--src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp346
-rw-r--r--src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h9
9 files changed, 404 insertions, 639 deletions
diff --git a/res/effectlib/gles2/sampleProbe.glsllib b/res/effectlib/gles2/sampleProbe.glsllib
index f785918..fb202e5 100644
--- a/res/effectlib/gles2/sampleProbe.glsllib
+++ b/res/effectlib/gles2/sampleProbe.glsllib
@@ -39,6 +39,8 @@
#define QT3DS_ENABLE_IBL_FOV 0
#endif
+#define USE_RGBE
+
uniform sampler2D light_probe;
uniform vec4 light_probe_props;
uniform vec4 light_probe_rotation;
@@ -75,17 +77,9 @@ mat3 tangentFrame( vec3 N, vec3 p )
// get edge vectors of the pixel triangle
vec3 dp1 = dFdx( p );
vec3 dp2 = dFdy( p );
- // Using dPdu and dPdv would be nicer, but the nature of our materials
- // are not ones with intrinsic UVs, so we can't really go there.
-// vec2 duv1 = dFdx( uv );
-// vec2 duv2 = dFdy( uv );
-
// solve the linear system
vec3 dp2perp = cross( dp2, N );
vec3 dp1perp = cross( N, dp1 );
-// vec3 T = dp2perp * duv1.x + dp1perp * duv2.x;
-// vec3 B = dp2perp * duv1.y + dp1perp * duv2.y;
-
vec3 T = normalize(dp1perp);
vec3 B = normalize(dp2perp);
return mat3( T , B , N );
@@ -99,6 +93,16 @@ vec2 transformSample( vec2 origUV, vec4 probeRot, vec2 probeOfs )
return retUV;
}
+vec3 textureProbe(sampler2D lightProbe, vec2 coord, float lod)
+{
+#ifdef USE_RGBE
+ vec4 ret = textureLod(lightProbe, coord, lod);
+ return ret.rgb * pow(2.0, ret.a * 255.0 - 128.0);
+#else
+ return textureLod(lightProbe, coord, lod).rgb;
+#endif
+}
+
// This is broken out into its own routine so that if we get some other
// format image than a lat-long, then we can account for that by changing
// the code here alone.
@@ -142,7 +146,7 @@ vec4 getTopLayerSample( vec3 inDir, float lodShift, vec3 lodOffsets )
vec3 getProbeSample( vec3 smpDir, float lodShift, vec3 normal )
{
vec2 smpUV = getProbeSampleUV( smpDir, light_probe_rotation, light_probe_offset.xy );
- return textureLod( light_probe, smpUV , lodShift ).xyz;
+ return textureProbe( light_probe, smpUV , lodShift );
}
vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 normal )
@@ -184,10 +188,10 @@ vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3
lodShift = max( lodShift, minLod );
- vec3 retVal = 0.4 * textureLod( light_probe, smpUV , lodShift ).xyz;
- retVal += 0.2 * textureLod( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ).xyz;
- retVal += 0.3 * textureLod( light_probe, smpUV , lodShift+lodOffsets.y ).xyz;
- retVal += 0.1 * textureLod( light_probe, smpUV , lodShift+lodOffsets.z ).xyz;
+ vec3 retVal = 0.4 * textureProbe( light_probe, smpUV , lodShift );
+ retVal += 0.2 * textureProbe( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) );
+ retVal += 0.3 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.y );
+ retVal += 0.1 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.z );
#if QT3DS_ENABLE_LIGHT_PROBE_2
vec4 topSmp = getTopLayerSample( smpDir, lodShift, lodOffsets );
@@ -257,9 +261,9 @@ vec3 getProbeAnisoSample( vec3 smpDir, float roughU, float roughV, mat3 tanFrame
wt = sigma / (sigma + float(i * i));
vec2 uv0 = getProbeSampleUV(normalize(smpDir + smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy);
vec2 uv1 = getProbeSampleUV(normalize(smpDir - smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy);
- result.xyz += wt * textureLod( light_probe, uv0 , lodMin ).xyz;
+ result.xyz += wt * textureProbe( light_probe, uv0 , lodMin );
result.w += wt;
- result.xyz += wt * textureLod( light_probe, uv1 , lodMin ).xyz;
+ result.xyz += wt * textureProbe( light_probe, uv1 , lodMin );
result.w += wt;
}
@@ -272,78 +276,7 @@ vec4 sampleDiffuse( mat3 tanFrame )
if ( light_probe_props.w < 0.005 )
return vec4( 0.0 );
-// if ( light_probe_offset.w > 0.5 )
-// {
- // The LOD offset comes from the assumption that a full diffuse convolution
- // has a support of pi/2, which translates into x pixels, and the base 2 log
- // gives us this LOD... Technically, "x" pixels depends on what the original
- // texture resolution was, which is why we use light_probe_offset.w, which holds
- // the number of mip levels the texture has.
-
- return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 );
-// }
-
- /*
- // PKC -- the code below is for full-blown IBL, which we'll skip for now
-
- // Hand-calculated Hammersley points for t = 2, n = 33
- // I exclude the 0,0 first point, hence why n=33 and not 32
- // Nice thing about 2d Hammersley points is that any subset is
- // also stratified, so even if I have 1000 points and truncate
- // anywhere, I'm fine. Each of these represent the y of an xy
- // while x for the kth point is always (k+1)/n.
- float kernel[32];
- kernel[0] = 0.5; kernel[1] = 0.25;
- kernel[2] = 0.75; kernel[3] = 0.125;
- kernel[4] = 0.625; kernel[5] = 0.375;
- kernel[6] = 0.875; kernel[7] = 0.0625;
- kernel[8] = 0.5625; kernel[9] = 0.3125;
- kernel[10] = 0.8125; kernel[11] = 0.1875;
- kernel[12] = 0.6875; kernel[13] = 0.4375;
- kernel[14] = 0.9375; kernel[15] = 0.03125;
- kernel[16] = 0.53125; kernel[17] = 0.28125;
- kernel[18] = 0.78125; kernel[19] = 0.15625;
- kernel[20] = 0.65625; kernel[21] = 0.40625;
- kernel[22] = 0.90625; kernel[23] = 0.09375;
- kernel[24] = 0.59375; kernel[25] = 0.34375;
- kernel[26] = 0.84375; kernel[27] = 0.28175;
- kernel[28] = 0.71875; kernel[29] = 0.46875;
- kernel[30] = 0.96875; kernel[31] = 0.015625;
-
- float phiShift = noise1d(gl_FragCoord.xy) - 0.5;
-
- vec3 ret = vec3(0, 0, 0);
-
- int ct = 24;
- float step = 25.0;
-
- // Importance sampling a cosine-weighted distribution. Since this
- // matches the BSDF exactly, we are just going to assume that the PDF
- // and the BSDF cancel out in sampling, so we just need to accumulate
- // texture colors. The noise function puts randomized "twist" into
- // the sampled directions.
- for( int i = 0; i < ct; ++i )
- {
- vec3 localDir;
- float phi = 6.28318530718 * (kernel[i] + phiShift);
- float cosTheta = sqrt( float(i+1) / step);
- localDir.z = sqrt(1.0 - cosTheta*cosTheta);
- localDir.x = cos(phi) * cosTheta;
- localDir.y = sin(phi) * cosTheta;
- vec3 smpDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z;
-
-
- float lodShift = light_probe_offset.w - 2 + log2( 3.1415926535 / (localDir.z * step) );
- vec3 smpColor = getProbeSample( smpDir, lodShift, tanFrame[2] );
-
- // The assumption here is that the BSDF and the sampling PDF are identical
- // so they cancel out and therefore, we don't need to include it here.
- ret += smpColor;
- }
-
- ret *= aoFactor / 24.0;
- return ret;
- */
+ return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 );
}
vec4 sampleDiffuseCustomMaterial( vec3 normal, vec3 worldPos, float aoFactor )
@@ -358,114 +291,23 @@ vec4 sampleGlossyAniso( mat3 tanFrame, vec3 viewDir, float roughU, float roughV
if ( light_probe_props.w < 0.005 )
return vec4( 0.0 );
- // PKC : If we do the full IBL sampling, it's useful to square the roughnesses because
- // it makes the effect of roughness feel more linear in the low end. This isn't necessary
- // for fast IBL.
-// float sigmaU = clamp(roughU*roughU, 0.0001, 1.0);
-// float sigmaV = clamp(roughV*roughV, 0.0001, 1.0);
float sigmaU = smoothstep( 0.0, 1.0, clamp(roughU, 0.0001, 1.0) );
float sigmaV = smoothstep( 0.0, 1.0, clamp(roughV, 0.0001, 1.0) );
vec3 ret = vec3(0, 0, 0);
-// if ( light_probe_offset.w > 0.5 )
-// {
- vec3 smpDir = reflect( -viewDir, tanFrame[2] );
- float sigma = sqrt(sigmaU * sigmaV);
-
- // Compute the Geometric occlusion/self-shadowing term
- float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995);
- float k = sigma * 0.31830988618; // roughness / pi
- float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
-
- vec3 outColor;
-
- outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame );
-
- return vec4( light_probe_props.w * Gl * outColor, 1.0 );
-// }
-
- // PKC -- the code below is for full-blown IBL, which we'll skip for now
-
-/*
- float step = clamp( ceil(32.0 * sqrt(max(sigmaU, sigmaV))), 4.0, 32.0 );
- int actualCt = int(step);
- float phiShift = noise1d(gl_FragCoord.xy) - 0.5;
-
- // Hand-calculated Hammersley points for t = 2, n = 33
- // I exclude the 0,0 first point, hence why n=33 and not 32
- // Nice thing about 2d Hammersley points is that any subset is
- // also stratified, so even if I have 1000 points and truncate
- // anywhere, I'm fine. Each of these represent the y of an xy
- // while x for the kth point is always (k+1)/n.
- float kernel[32];
- kernel[0] = 0.5; kernel[1] = 0.25;
- kernel[2] = 0.75; kernel[3] = 0.125;
- kernel[4] = 0.625; kernel[5] = 0.375;
- kernel[6] = 0.875; kernel[7] = 0.0625;
- kernel[8] = 0.5625; kernel[9] = 0.3125;
- kernel[10] = 0.8125; kernel[11] = 0.1875;
- kernel[12] = 0.6875; kernel[13] = 0.4375;
- kernel[14] = 0.9375; kernel[15] = 0.03125;
- kernel[16] = 0.53125; kernel[17] = 0.28125;
- kernel[18] = 0.78125; kernel[19] = 0.15625;
- kernel[20] = 0.65625; kernel[21] = 0.40625;
- kernel[22] = 0.90625; kernel[23] = 0.09375;
- kernel[24] = 0.59375; kernel[25] = 0.34375;
- kernel[26] = 0.84375; kernel[27] = 0.28175;
- kernel[28] = 0.71875; kernel[29] = 0.46875;
- kernel[30] = 0.96875; kernel[31] = 0.015625;
-
- float thetaI = acos( dot(viewDir, tanFrame[2]) );
-
- // NOTE : The model I'm using here is actually based on the KGGX model used in
- // physGlossyBSDF. This is my own variation on the original GGX which uses something
- // closer to a pure Cauchy distribution in tangent space, but also supports anisotropy.
- for (int i = 0; i < actualCt; ++i)
- {
- vec3 localDir;
-
- float phi = 6.28318530718 * (kernel[i] + phiShift);
- float u = float(i + 1) / (step + 1.0);
- float rU = cos(phi) * sigmaU;
- float rV = sin(phi) * sigmaV;
- float sigma = sqrt(rU * rU + rV * rV);
-
- float boundA = atan( ((thetaI - 1.57079632679) * 0.5) / sigma );
- float boundB = atan( ((thetaI + 1.57079632679) * 0.5) / sigma );
- float t = (1.0 - u) * boundA + u * boundB;
- float thetaH = tan( t ) * sigma;
-
- float cosThetaH = cos( thetaH );
- float sinThetaH = sin( thetaH );
- localDir.z = cosThetaH;
- localDir.y = sin(phi) * sinThetaH;
- localDir.x = cos(phi) * sinThetaH;
-
- vec3 halfDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z;
- halfDir = normalize(halfDir);
- vec3 smpDir = reflect( -viewDir, halfDir );
-
- vec2 scaledXY = localDir.xy / vec2(sigmaU, sigmaV);
- float PDF = (sigmaU*sigmaV) / (sigmaU*sigmaV + dot(scaledXY, scaledXY));
- vec3 Haf = smpDir + viewDir; // We need the unnormalized half vecter as well as the normalized one
- float HdotL = dot(halfDir, smpDir);
- // normalize the PDF to compute the filter support
- // This gives us the ideal miplevel at which to sample the texture map.
- PDF *= dot(Haf, Haf) / (4.0 * dot(Haf, smpDir) * HdotL * sigmaU*sigmaV * (boundB-boundA)*(boundB-boundA));
-
- // Again assuming that the pdf and BSDF are equivalent -- that's not generally valid,
- // but it saves a lot of ALU cycles.
- float lodShift = log2( 512.0 * sigma / PDF );
-
- float k = sigma * 0.31830988618; // roughness / pi
- float Gl = clamp( (HdotL / (HdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
-
- vec3 smpColor = Gl * getProbeSample( smpDir, lodShift, tanFrame[2] );
- ret += smpColor;
- }
- ret /= float(actualCt);
- return vec4(ret, 1.0);
-*/
+ vec3 smpDir = reflect( -viewDir, tanFrame[2] );
+ float sigma = sqrt(sigmaU * sigmaV);
+
+ // Compute the Geometric occlusion/self-shadowing term
+ float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995);
+ float k = sigma * 0.31830988618; // roughness / pi
+ float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
+
+ vec3 outColor;
+
+ outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame );
+
+ return vec4( light_probe_props.w * Gl * outColor, 1.0 );
}
vec4 sampleGlossy( mat3 tanFrame, vec3 viewDir, float roughness )
diff --git a/res/effectlib/sampleProbe.glsllib b/res/effectlib/sampleProbe.glsllib
index 6556e51..361aad5 100644
--- a/res/effectlib/sampleProbe.glsllib
+++ b/res/effectlib/sampleProbe.glsllib
@@ -31,6 +31,8 @@
#ifndef SAMPLE_PROBE_GLSLLIB
#define SAMPLE_PROBE_GLSLLIB 1
+#define USE_RGBE
+
uniform sampler2D light_probe;
uniform vec4 light_probe_props;
uniform vec4 light_probe_rotation;
@@ -65,17 +67,9 @@ mat3 tangentFrame( vec3 N, vec3 p )
// get edge vectors of the pixel triangle
vec3 dp1 = dFdx( p );
vec3 dp2 = dFdy( p );
- // Using dPdu and dPdv would be nicer, but the nature of our materials
- // are not ones with intrinsic UVs, so we can't really go there.
-// vec2 duv1 = dFdx( uv );
-// vec2 duv2 = dFdy( uv );
-
// solve the linear system
vec3 dp2perp = cross( dp2, N );
vec3 dp1perp = cross( N, dp1 );
-// vec3 T = dp2perp * duv1.x + dp1perp * duv2.x;
-// vec3 B = dp2perp * duv1.y + dp1perp * duv2.y;
-
vec3 T = normalize(dp1perp);
vec3 B = normalize(dp2perp);
return mat3( T , B , N );
@@ -89,6 +83,16 @@ vec2 transformSample( vec2 origUV, vec4 probeRot, vec2 probeOfs )
return retUV;
}
+vec3 textureProbe(sampler2D lightProbe, vec2 coord, float lod)
+{
+#ifdef USE_RGBE
+ vec4 ret = textureLod(lightProbe, coord, lod);
+ return ret.rgb * pow(2.0, ret.a * 255.0 - 128.0);
+#else
+ return textureLod(lightProbe, coord, lod).rgb;
+#endif
+}
+
// This is broken out into its own routine so that if we get some other
// format image than a lat-long, then we can account for that by changing
// the code here alone.
@@ -138,7 +142,7 @@ vec4 getTopLayerSample( vec3 inDir, float lodShift, vec3 lodOffsets )
vec3 getProbeSample( vec3 smpDir, float lodShift, vec3 normal )
{
vec2 smpUV = getProbeSampleUV( smpDir, light_probe_rotation, light_probe_offset.xy );
- return textureLod( light_probe, smpUV , lodShift ).xyz;
+ return textureProbe( light_probe, smpUV , lodShift );
}
vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 normal )
@@ -180,10 +184,10 @@ vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3
lodShift = max( lodShift, minLod );
- vec3 retVal = 0.4 * textureLod( light_probe, smpUV , lodShift ).xyz;
- retVal += 0.2 * textureLod( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ).xyz;
- retVal += 0.3 * textureLod( light_probe, smpUV , lodShift+lodOffsets.y ).xyz;
- retVal += 0.1 * textureLod( light_probe, smpUV , lodShift+lodOffsets.z ).xyz;
+ vec3 retVal = 0.4 * textureProbe( light_probe, smpUV , lodShift );
+ retVal += 0.2 * textureProbe( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) );
+ retVal += 0.3 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.y );
+ retVal += 0.1 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.z );
#if QT3DS_ENABLE_LIGHT_PROBE_2
vec4 topSmp = getTopLayerSample( smpDir, lodShift, lodOffsets );
@@ -248,9 +252,9 @@ vec3 getProbeAnisoSample( vec3 smpDir, float roughU, float roughV, mat3 tanFrame
wt = sigma / (sigma + float(i * i));
vec2 uv0 = getProbeSampleUV(normalize(smpDir + smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy);
vec2 uv1 = getProbeSampleUV(normalize(smpDir - smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy);
- result.xyz += wt * textureLod( light_probe, uv0 , lodMin ).xyz;
+ result.xyz += wt * textureProbe( light_probe, uv0 , lodMin );
result.w += wt;
- result.xyz += wt * textureLod( light_probe, uv1 , lodMin ).xyz;
+ result.xyz += wt * textureProbe( light_probe, uv1 , lodMin );
result.w += wt;
}
@@ -263,78 +267,7 @@ vec4 sampleDiffuse( mat3 tanFrame )
if ( light_probe_props.w < 0.005 )
return vec4( 0.0 );
-// if ( light_probe_offset.w > 0.5 )
-// {
- // The LOD offset comes from the assumption that a full diffuse convolution
- // has a support of pi/2, which translates into x pixels, and the base 2 log
- // gives us this LOD... Technically, "x" pixels depends on what the original
- // texture resolution was, which is why we use light_probe_offset.w, which holds
- // the number of mip levels the texture has.
-
- return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 );
-// }
-
- /*
- // PKC -- the code below is for full-blown IBL, which we'll skip for now
-
- // Hand-calculated Hammersley points for t = 2, n = 33
- // I exclude the 0,0 first point, hence why n=33 and not 32
- // Nice thing about 2d Hammersley points is that any subset is
- // also stratified, so even if I have 1000 points and truncate
- // anywhere, I'm fine. Each of these represent the y of an xy
- // while x for the kth point is always (k+1)/n.
- float kernel[32];
- kernel[0] = 0.5; kernel[1] = 0.25;
- kernel[2] = 0.75; kernel[3] = 0.125;
- kernel[4] = 0.625; kernel[5] = 0.375;
- kernel[6] = 0.875; kernel[7] = 0.0625;
- kernel[8] = 0.5625; kernel[9] = 0.3125;
- kernel[10] = 0.8125; kernel[11] = 0.1875;
- kernel[12] = 0.6875; kernel[13] = 0.4375;
- kernel[14] = 0.9375; kernel[15] = 0.03125;
- kernel[16] = 0.53125; kernel[17] = 0.28125;
- kernel[18] = 0.78125; kernel[19] = 0.15625;
- kernel[20] = 0.65625; kernel[21] = 0.40625;
- kernel[22] = 0.90625; kernel[23] = 0.09375;
- kernel[24] = 0.59375; kernel[25] = 0.34375;
- kernel[26] = 0.84375; kernel[27] = 0.28175;
- kernel[28] = 0.71875; kernel[29] = 0.46875;
- kernel[30] = 0.96875; kernel[31] = 0.015625;
-
- float phiShift = noise1d(gl_FragCoord.xy) - 0.5;
-
- vec3 ret = vec3(0, 0, 0);
-
- int ct = 24;
- float step = 25.0;
-
- // Importance sampling a cosine-weighted distribution. Since this
- // matches the BSDF exactly, we are just going to assume that the PDF
- // and the BSDF cancel out in sampling, so we just need to accumulate
- // texture colors. The noise function puts randomized "twist" into
- // the sampled directions.
- for( int i = 0; i < ct; ++i )
- {
- vec3 localDir;
- float phi = 6.28318530718 * (kernel[i] + phiShift);
- float cosTheta = sqrt( float(i+1) / step);
- localDir.z = sqrt(1.0 - cosTheta*cosTheta);
- localDir.x = cos(phi) * cosTheta;
- localDir.y = sin(phi) * cosTheta;
- vec3 smpDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z;
-
-
- float lodShift = light_probe_offset.w - 2 + log2( 3.1415926535 / (localDir.z * step) );
- vec3 smpColor = getProbeSample( smpDir, lodShift, tanFrame[2] );
-
- // The assumption here is that the BSDF and the sampling PDF are identical
- // so they cancel out and therefore, we don't need to include it here.
- ret += smpColor;
- }
-
- ret *= aoFactor / 24.0;
- return ret;
- */
+ return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 );
}
vec4 sampleDiffuseCustomMaterial( vec3 normal, vec3 worldPos, float aoFactor )
@@ -349,114 +282,23 @@ vec4 sampleGlossyAniso( mat3 tanFrame, vec3 viewDir, float roughU, float roughV
if ( light_probe_props.w < 0.005 )
return vec4( 0.0 );
- // PKC : If we do the full IBL sampling, it's useful to square the roughnesses because
- // it makes the effect of roughness feel more linear in the low end. This isn't necessary
- // for fast IBL.
-// float sigmaU = clamp(roughU*roughU, 0.0001, 1.0);
-// float sigmaV = clamp(roughV*roughV, 0.0001, 1.0);
float sigmaU = smoothstep( 0.0, 1.0, clamp(roughU, 0.0001, 1.0) );
float sigmaV = smoothstep( 0.0, 1.0, clamp(roughV, 0.0001, 1.0) );
vec3 ret = vec3(0, 0, 0);
-// if ( light_probe_offset.w > 0.5 )
-// {
- vec3 smpDir = reflect( -viewDir, tanFrame[2] );
- float sigma = sqrt(sigmaU * sigmaV);
-
- // Compute the Geometric occlusion/self-shadowing term
- float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995);
- float k = sigma * 0.31830988618; // roughness / pi
- float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
-
- vec3 outColor;
-
- outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame );
-
- return vec4( light_probe_props.w * Gl * outColor, 1.0 );
-// }
-
- // PKC -- the code below is for full-blown IBL, which we'll skip for now
-
-/*
- float step = clamp( ceil(32.0 * sqrt(max(sigmaU, sigmaV))), 4.0, 32.0 );
- int actualCt = int(step);
- float phiShift = noise1d(gl_FragCoord.xy) - 0.5;
-
- // Hand-calculated Hammersley points for t = 2, n = 33
- // I exclude the 0,0 first point, hence why n=33 and not 32
- // Nice thing about 2d Hammersley points is that any subset is
- // also stratified, so even if I have 1000 points and truncate
- // anywhere, I'm fine. Each of these represent the y of an xy
- // while x for the kth point is always (k+1)/n.
- float kernel[32];
- kernel[0] = 0.5; kernel[1] = 0.25;
- kernel[2] = 0.75; kernel[3] = 0.125;
- kernel[4] = 0.625; kernel[5] = 0.375;
- kernel[6] = 0.875; kernel[7] = 0.0625;
- kernel[8] = 0.5625; kernel[9] = 0.3125;
- kernel[10] = 0.8125; kernel[11] = 0.1875;
- kernel[12] = 0.6875; kernel[13] = 0.4375;
- kernel[14] = 0.9375; kernel[15] = 0.03125;
- kernel[16] = 0.53125; kernel[17] = 0.28125;
- kernel[18] = 0.78125; kernel[19] = 0.15625;
- kernel[20] = 0.65625; kernel[21] = 0.40625;
- kernel[22] = 0.90625; kernel[23] = 0.09375;
- kernel[24] = 0.59375; kernel[25] = 0.34375;
- kernel[26] = 0.84375; kernel[27] = 0.28175;
- kernel[28] = 0.71875; kernel[29] = 0.46875;
- kernel[30] = 0.96875; kernel[31] = 0.015625;
-
- float thetaI = acos( dot(viewDir, tanFrame[2]) );
-
- // NOTE : The model I'm using here is actually based on the KGGX model used in
- // physGlossyBSDF. This is my own variation on the original GGX which uses something
- // closer to a pure Cauchy distribution in tangent space, but also supports anisotropy.
- for (int i = 0; i < actualCt; ++i)
- {
- vec3 localDir;
-
- float phi = 6.28318530718 * (kernel[i] + phiShift);
- float u = float(i + 1) / (step + 1.0);
- float rU = cos(phi) * sigmaU;
- float rV = sin(phi) * sigmaV;
- float sigma = sqrt(rU * rU + rV * rV);
-
- float boundA = atan( ((thetaI - 1.57079632679) * 0.5) / sigma );
- float boundB = atan( ((thetaI + 1.57079632679) * 0.5) / sigma );
- float t = (1.0 - u) * boundA + u * boundB;
- float thetaH = tan( t ) * sigma;
-
- float cosThetaH = cos( thetaH );
- float sinThetaH = sin( thetaH );
- localDir.z = cosThetaH;
- localDir.y = sin(phi) * sinThetaH;
- localDir.x = cos(phi) * sinThetaH;
-
- vec3 halfDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z;
- halfDir = normalize(halfDir);
- vec3 smpDir = reflect( -viewDir, halfDir );
-
- vec2 scaledXY = localDir.xy / vec2(sigmaU, sigmaV);
- float PDF = (sigmaU*sigmaV) / (sigmaU*sigmaV + dot(scaledXY, scaledXY));
- vec3 Haf = smpDir + viewDir; // We need the unnormalized half vecter as well as the normalized one
- float HdotL = dot(halfDir, smpDir);
- // normalize the PDF to compute the filter support
- // This gives us the ideal miplevel at which to sample the texture map.
- PDF *= dot(Haf, Haf) / (4.0 * dot(Haf, smpDir) * HdotL * sigmaU*sigmaV * (boundB-boundA)*(boundB-boundA));
-
- // Again assuming that the pdf and BSDF are equivalent -- that's not generally valid,
- // but it saves a lot of ALU cycles.
- float lodShift = log2( 512.0 * sigma / PDF );
-
- float k = sigma * 0.31830988618; // roughness / pi
- float Gl = clamp( (HdotL / (HdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
-
- vec3 smpColor = Gl * getProbeSample( smpDir, lodShift, tanFrame[2] );
- ret += smpColor;
- }
- ret /= float(actualCt);
- return vec4(ret, 1.0);
-*/
+ vec3 smpDir = reflect( -viewDir, tanFrame[2] );
+ float sigma = sqrt(sigmaU * sigmaV);
+
+ // Compute the Geometric occlusion/self-shadowing term
+ float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995);
+ float k = sigma * 0.31830988618; // roughness / pi
+ float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 );
+
+ vec3 outColor;
+
+ outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame );
+
+ return vec4( light_probe_props.w * Gl * outColor, 1.0 );
}
vec4 sampleGlossy( mat3 tanFrame, vec3 viewDir, float roughness )
diff --git a/src/render/Qt3DSRenderBaseTypes.h b/src/render/Qt3DSRenderBaseTypes.h
index 909077d..bfe034b 100644
--- a/src/render/Qt3DSRenderBaseTypes.h
+++ b/src/render/Qt3DSRenderBaseTypes.h
@@ -351,6 +351,7 @@ struct NVRenderRenderBufferFormats
QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGBA32F) \
QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(R11G11B10) \
QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGB9E5) \
+ QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGBE8) \
QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGBA_DXT1) \
QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGB_DXT1) \
QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGBA_DXT3) \
@@ -511,6 +512,7 @@ struct NVRenderTextureFormats
return 4;
case R32F:
return 4;
+ case RGBE8:
case RGBA8:
return 4;
case RGB8:
@@ -571,6 +573,7 @@ struct NVRenderTextureFormats
return 1;
case R32F:
return 1;
+ case RGBE8:
case RGBA8:
return 4;
case RGB8:
@@ -619,181 +622,9 @@ struct NVRenderTextureFormats
}
static void decodeToFloat(void *inPtr, QT3DSU32 byteOfs, float *outPtr,
- NVRenderTextureFormats::Enum inFmt)
- {
- outPtr[0] = 0.0f;
- outPtr[1] = 0.0f;
- outPtr[2] = 0.0f;
- outPtr[3] = 0.0f;
- QT3DSU8 *src = reinterpret_cast<QT3DSU8 *>(inPtr);
- // float divisor; // If we want to support RGBD?
- switch (inFmt) {
- case Alpha8:
- outPtr[0] = ((float)src[byteOfs]) / 255.0f;
- break;
-
- case Luminance8:
- case LuminanceAlpha8:
- case R8:
- case RG8:
- case RGB8:
- case RGBA8:
- case SRGB8:
- case SRGB8A8:
- // NOTE : RGBD Hack here for reference. Not meant for installation.
- // divisor = (NVRenderTextureFormats::getSizeofFormat(inFmt) == 4) ?
- // ((float)src[byteOfs+3]) / 255.0f : 1.0f;
- for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) {
- float val = ((float)src[byteOfs + i]) / 255.0f;
- outPtr[i] = (i < 3) ? powf(val, 0.4545454545f) : val;
- // Assuming RGBA8 actually means RGBD (which is stupid, I know)
- // if ( NVRenderTextureFormats::getSizeofFormat(inFmt) == 4 ) { outPtr[i] /=
- // divisor; }
- }
- // outPtr[3] = divisor;
- break;
-
- case R32F:
- outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
- break;
- case RG32F:
- outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
- outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
- break;
- case RGBA32F:
- outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
- outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
- outPtr[2] = reinterpret_cast<float *>(src + byteOfs)[2];
- outPtr[3] = reinterpret_cast<float *>(src + byteOfs)[3];
- break;
- case RGB32F:
- outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
- outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
- outPtr[2] = reinterpret_cast<float *>(src + byteOfs)[2];
- break;
-
- case R16F:
- case RG16F:
- case RGBA16F:
- for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) {
- // NOTE : This only works on the assumption that we don't have any denormals,
- // Infs or NaNs.
- // Every pixel in our source image should be "regular"
- QT3DSU16 h = reinterpret_cast<QT3DSU16 *>(src + byteOfs)[i];
- QT3DSU32 sign = (h & 0x8000) << 16;
- QT3DSU32 exponent = (((((h & 0x7c00) >> 10) - 15) + 127) << 23);
- QT3DSU32 mantissa = ((h & 0x3ff) << 13);
- QT3DSU32 result = sign | exponent | mantissa;
-
- if (h == 0 || h == 0x8000) {
- result = 0;
- } // Special case for zero and negative zero
- qt3ds::intrinsics::memCopy(reinterpret_cast<QT3DSU32 *>(outPtr) + i, &result, 4);
- }
- break;
-
- case R11G11B10:
- // place holder
- QT3DS_ASSERT(false);
- break;
-
- default:
- outPtr[0] = 0.0f;
- outPtr[1] = 0.0f;
- outPtr[2] = 0.0f;
- outPtr[3] = 0.0f;
- break;
- }
- }
-
+ NVRenderTextureFormats::Enum inFmt);
static void encodeToPixel(float *inPtr, void *outPtr, QT3DSU32 byteOfs,
- NVRenderTextureFormats::Enum inFmt)
- {
- QT3DSU8 *dest = reinterpret_cast<QT3DSU8 *>(outPtr);
- switch (inFmt) {
- case NVRenderTextureFormats::Alpha8:
- dest[byteOfs] = QT3DSU8(inPtr[0] * 255.0f);
- break;
-
- case Luminance8:
- case LuminanceAlpha8:
- case R8:
- case RG8:
- case RGB8:
- case RGBA8:
- case SRGB8:
- case SRGB8A8:
- for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) {
- inPtr[i] = (inPtr[i] > 1.0f) ? 1.0f : inPtr[i];
- if (i < 3)
- dest[byteOfs + i] = QT3DSU8(powf(inPtr[i], 2.2f) * 255.0f);
- else
- dest[byteOfs + i] = QT3DSU8(inPtr[i] * 255.0f);
- }
- break;
-
- case R32F:
- reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
- break;
- case RG32F:
- reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
- reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
- break;
- case RGBA32F:
- reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
- reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
- reinterpret_cast<float *>(dest + byteOfs)[2] = inPtr[2];
- reinterpret_cast<float *>(dest + byteOfs)[3] = inPtr[3];
- break;
- case RGB32F:
- reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
- reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
- reinterpret_cast<float *>(dest + byteOfs)[2] = inPtr[2];
- break;
-
- case R16F:
- case RG16F:
- case RGBA16F:
- for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) {
- // NOTE : This also has the limitation of not handling infs, NaNs and
- // denormals, but it should be
- // sufficient for our purposes.
- if (inPtr[i] > 65519.0f) {
- inPtr[i] = 65519.0f;
- }
- if (fabs(inPtr[i]) < 6.10352E-5f) {
- inPtr[i] = 0.0f;
- }
- QT3DSU32 f = reinterpret_cast<QT3DSU32 *>(inPtr)[i];
- QT3DSU32 sign = (f & 0x80000000) >> 16;
- QT3DSI32 exponent = (f & 0x7f800000) >> 23;
- QT3DSU32 mantissa = (f >> 13) & 0x3ff;
- exponent = exponent - 112;
- if (exponent > 31) {
- exponent = 31;
- }
- if (exponent < 0) {
- exponent = 0;
- }
- exponent = exponent << 10;
- reinterpret_cast<QT3DSU16 *>(dest + byteOfs)[i] =
- QT3DSU16(sign | exponent | mantissa);
- }
- break;
-
- case R11G11B10:
- // place holder
- QT3DS_ASSERT(false);
- break;
-
- default:
- dest[byteOfs] = 0;
- dest[byteOfs + 1] = 0;
- dest[byteOfs + 2] = 0;
- dest[byteOfs + 3] = 0;
- break;
- }
- }
+ NVRenderTextureFormats::Enum inFmt);
};
struct NVRenderTextureTargetType
diff --git a/src/render/Qt3DSRenderTexture2D.cpp b/src/render/Qt3DSRenderTexture2D.cpp
index 577264d..16fd5c9 100644
--- a/src/render/Qt3DSRenderTexture2D.cpp
+++ b/src/render/Qt3DSRenderTexture2D.cpp
@@ -125,7 +125,7 @@ namespace render {
}
void NVRenderTexture2D::SetTextureStorage(QT3DSU32 inLevels, QT3DSU32 width, QT3DSU32 height,
- NVRenderTextureFormats::Enum formaInternal,
+ NVRenderTextureFormats::Enum formatInternal,
NVRenderTextureFormats::Enum format,
NVDataRef<QT3DSU8> dataBuffer)
{
@@ -138,9 +138,9 @@ namespace render {
m_Width = width;
m_Height = height;
- m_Format = formaInternal;
+ m_Format = formatInternal;
if (format == NVRenderTextureFormats::Unknown)
- format = formaInternal;
+ format = formatInternal;
// get max size and check value
QT3DSU32 maxWidth, maxHeight;
@@ -157,8 +157,8 @@ namespace render {
m_MaxMipLevel = inLevels - 1; // we count from 0
// only uncompressed formats are supported and no depth
- if (NVRenderTextureFormats::isUncompressedTextureFormat(formaInternal)) {
- m_Backend->CreateTextureStorage2D(m_TextureHandle, m_TexTarget, inLevels, formaInternal,
+ if (NVRenderTextureFormats::isUncompressedTextureFormat(formatInternal)) {
+ m_Backend->CreateTextureStorage2D(m_TextureHandle, m_TexTarget, inLevels, formatInternal,
width, height);
m_Immutable = true;
diff --git a/src/render/backends/gl/Qt3DSOpenGLUtil.h b/src/render/backends/gl/Qt3DSOpenGLUtil.h
index 47f4230..bf877fb 100644
--- a/src/render/backends/gl/Qt3DSOpenGLUtil.h
+++ b/src/render/backends/gl/Qt3DSOpenGLUtil.h
@@ -969,6 +969,7 @@ namespace render {
outInternalFormat = GL_RG8;
outDataType = GL_UNSIGNED_BYTE;
return true;
+ case NVRenderTextureFormats::RGBE8:
case NVRenderTextureFormats::RGBA8:
outFormat = GL_RGBA;
outInternalFormat = GL_RGBA8;
@@ -1482,6 +1483,7 @@ namespace render {
return GL_R32UI;
case NVRenderTextureFormats::R32F:
return GL_R32F;
+ case NVRenderTextureFormats::RGBE8:
case NVRenderTextureFormats::RGBA8:
return GL_RGBA8;
case NVRenderTextureFormats::SRGB8A8:
diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp
index 20b0738..3d0781d 100644
--- a/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp
+++ b/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp
@@ -463,10 +463,12 @@ struct SBufferManager : public IBufferManager
if (inLoadedImage.data) {
qt3ds::render::NVRenderTextureFormats::Enum destFormat = inLoadedImage.format;
if (inBsdfMipmaps) {
- if (m_Context->GetRenderContextType() == render::NVRenderContextValues::GLES2)
- destFormat = qt3ds::render::NVRenderTextureFormats::RGBA8;
- else
- destFormat = qt3ds::render::NVRenderTextureFormats::RGBA16F;
+ if (inLoadedImage.format != NVRenderTextureFormats::RGBE8) {
+ if (m_Context->GetRenderContextType() == render::NVRenderContextValues::GLES2)
+ destFormat = qt3ds::render::NVRenderTextureFormats::RGBA8;
+ else
+ destFormat = qt3ds::render::NVRenderTextureFormats::RGBA16F;
+ }
}
else {
theTexture->SetTextureData(
diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp
index defff29..60e242d 100644
--- a/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp
+++ b/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp
@@ -145,18 +145,23 @@ static void decrunchScanline(FreeImageIO *io, fi_handle handle, RGBE *scanline,
static void decodeScanlineToTexture(RGBE *scanline, int width, void *outBuf, QT3DSU32 offset,
NVRenderTextureFormats::Enum inFormat)
{
- float rgbaF32[4];
-
- for (int i = 0; i < width; ++i) {
- rgbaF32[R] = convertComponent(scanline[i][E], scanline[i][R]);
- rgbaF32[G] = convertComponent(scanline[i][E], scanline[i][G]);
- rgbaF32[B] = convertComponent(scanline[i][E], scanline[i][B]);
- rgbaF32[3] = 1.0f;
-
- QT3DSU8 *target = reinterpret_cast<QT3DSU8 *>(outBuf);
- target += offset;
- NVRenderTextureFormats::encodeToPixel(
- rgbaF32, target, i * NVRenderTextureFormats::getSizeofFormat(inFormat), inFormat);
+
+ QT3DSU8 *target = reinterpret_cast<QT3DSU8 *>(outBuf);
+ target += offset;
+
+ if (inFormat == NVRenderTextureFormats::RGBE8) {
+ memcpy(target, scanline, size_t(4 * width));
+ } else {
+ float rgbaF32[4];
+ for (int i = 0; i < width; ++i) {
+ rgbaF32[R] = convertComponent(scanline[i][E], scanline[i][R]);
+ rgbaF32[G] = convertComponent(scanline[i][E], scanline[i][G]);
+ rgbaF32[B] = convertComponent(scanline[i][E], scanline[i][B]);
+ rgbaF32[3] = 1.0f;
+
+ NVRenderTextureFormats::encodeToPixel(
+ rgbaF32, target, i * NVRenderTextureFormats::getSizeofFormat(inFormat), inFormat);
+ }
}
}
@@ -242,14 +247,6 @@ SLoadedTexture *SLoadedTexture::LoadHDR(ISeekableIOStream &inStream, NVFoundatio
{
FreeImageIO theIO(inFnd.getAllocator(), inFnd);
SLoadedTexture *retval = nullptr;
- if (renderContextType == qt3ds::render::NVRenderContextValues::GLES2)
- retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBA8);
- else
- retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBA16F);
-
-
- // Let's just assume we don't support this just yet.
- // if ( retval )
- // retval->FreeImagePostProcess( inFlipY );
+ retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBE8);
return retval;
}
diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp
index 023964f..c9352dc 100644
--- a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp
+++ b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp
@@ -38,6 +38,213 @@ using namespace qt3ds;
using namespace qt3ds::render;
using namespace qt3ds::foundation;
+
+struct M8E8
+{
+ quint8 m;
+ quint8 e;
+ M8E8() : m(0), e(0){
+ }
+ M8E8(const float val) {
+ float l2 = 1.f + floor(log2f(val));
+ float mm = val / powf(2.f, l2);
+ m = quint8(mm * 255.f);
+ e = quint8(l2 + 128);
+ }
+ M8E8(const float val, quint8 exp) {
+ if (val <= 0) {
+ m = e = 0;
+ return;
+ }
+ float mm = val / powf(2.f, exp - 128);
+ m = quint8(mm * 255.f);
+ e = exp;
+ }
+};
+
+void NVRenderTextureFormats::decodeToFloat(void *inPtr, QT3DSU32 byteOfs, float *outPtr,
+ NVRenderTextureFormats::Enum inFmt)
+{
+ outPtr[0] = 0.0f;
+ outPtr[1] = 0.0f;
+ outPtr[2] = 0.0f;
+ outPtr[3] = 0.0f;
+ QT3DSU8 *src = reinterpret_cast<QT3DSU8 *>(inPtr);
+ switch (inFmt) {
+ case Alpha8:
+ outPtr[0] = ((float)src[byteOfs]) / 255.0f;
+ break;
+
+ case Luminance8:
+ case LuminanceAlpha8:
+ case R8:
+ case RG8:
+ case RGB8:
+ case RGBA8:
+ case SRGB8:
+ case SRGB8A8:
+ for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) {
+ float val = ((float)src[byteOfs + i]) / 255.0f;
+ outPtr[i] = (i < 3) ? powf(val, 0.4545454545f) : val;
+ }
+ break;
+ case RGBE8:
+ {
+ float pwd = powf(2.0f, int(src[byteOfs + 3]) - 128);
+ outPtr[0] = float(src[byteOfs + 0]) * pwd / 255.0;
+ outPtr[1] = float(src[byteOfs + 1]) * pwd / 255.0;
+ outPtr[2] = float(src[byteOfs + 2]) * pwd / 255.0;
+ outPtr[3] = 1.0f;
+ } break;
+
+ case R32F:
+ outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
+ break;
+ case RG32F:
+ outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
+ outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
+ break;
+ case RGBA32F:
+ outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
+ outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
+ outPtr[2] = reinterpret_cast<float *>(src + byteOfs)[2];
+ outPtr[3] = reinterpret_cast<float *>(src + byteOfs)[3];
+ break;
+ case RGB32F:
+ outPtr[0] = reinterpret_cast<float *>(src + byteOfs)[0];
+ outPtr[1] = reinterpret_cast<float *>(src + byteOfs)[1];
+ outPtr[2] = reinterpret_cast<float *>(src + byteOfs)[2];
+ break;
+
+ case R16F:
+ case RG16F:
+ case RGBA16F:
+ for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) {
+ // NOTE : This only works on the assumption that we don't have any denormals,
+ // Infs or NaNs.
+ // Every pixel in our source image should be "regular"
+ QT3DSU16 h = reinterpret_cast<QT3DSU16 *>(src + byteOfs)[i];
+ QT3DSU32 sign = (h & 0x8000) << 16;
+ QT3DSU32 exponent = (((((h & 0x7c00) >> 10) - 15) + 127) << 23);
+ QT3DSU32 mantissa = ((h & 0x3ff) << 13);
+ QT3DSU32 result = sign | exponent | mantissa;
+
+ if (h == 0 || h == 0x8000)
+ result = 0;
+ qt3ds::intrinsics::memCopy(reinterpret_cast<QT3DSU32 *>(outPtr) + i, &result, 4);
+ }
+ break;
+
+ case R11G11B10:
+ // place holder
+ QT3DS_ASSERT(false);
+ break;
+
+ default:
+ outPtr[0] = 0.0f;
+ outPtr[1] = 0.0f;
+ outPtr[2] = 0.0f;
+ outPtr[3] = 0.0f;
+ break;
+ }
+}
+
+void NVRenderTextureFormats::encodeToPixel(float *inPtr, void *outPtr, QT3DSU32 byteOfs,
+ NVRenderTextureFormats::Enum inFmt)
+{
+ QT3DSU8 *dest = reinterpret_cast<QT3DSU8 *>(outPtr);
+ switch (inFmt) {
+ case NVRenderTextureFormats::Alpha8:
+ dest[byteOfs] = QT3DSU8(inPtr[0] * 255.0f);
+ break;
+
+ case Luminance8:
+ case LuminanceAlpha8:
+ case R8:
+ case RG8:
+ case RGB8:
+ case RGBA8:
+ case SRGB8:
+ case SRGB8A8:
+ for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) {
+ inPtr[i] = (inPtr[i] > 1.0f) ? 1.0f : inPtr[i];
+ if (i < 3)
+ dest[byteOfs + i] = QT3DSU8(powf(inPtr[i], 2.2f) * 255.0f);
+ else
+ dest[byteOfs + i] = QT3DSU8(inPtr[i] * 255.0f);
+ }
+ break;
+ case RGBE8:
+ {
+ float max = qMax(inPtr[0], qMax(inPtr[1], inPtr[2]));
+ M8E8 ex(max);
+ M8E8 a(inPtr[0], ex.e);
+ M8E8 b(inPtr[1], ex.e);
+ M8E8 c(inPtr[2], ex.e);
+ quint8 *dst = reinterpret_cast<quint8 *>(outPtr) + byteOfs;
+ dst[0] = a.m;
+ dst[1] = b.m;
+ dst[2] = c.m;
+ dst[3] = ex.e;
+ } break;
+
+ case R32F:
+ reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
+ break;
+ case RG32F:
+ reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
+ reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
+ break;
+ case RGBA32F:
+ reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
+ reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
+ reinterpret_cast<float *>(dest + byteOfs)[2] = inPtr[2];
+ reinterpret_cast<float *>(dest + byteOfs)[3] = inPtr[3];
+ break;
+ case RGB32F:
+ reinterpret_cast<float *>(dest + byteOfs)[0] = inPtr[0];
+ reinterpret_cast<float *>(dest + byteOfs)[1] = inPtr[1];
+ reinterpret_cast<float *>(dest + byteOfs)[2] = inPtr[2];
+ break;
+
+ case R16F:
+ case RG16F:
+ case RGBA16F:
+ for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) {
+ // NOTE : This also has the limitation of not handling infs, NaNs and
+ // denormals, but it should be sufficient for our purposes.
+ if (inPtr[i] > 65519.0f)
+ inPtr[i] = 65519.0f;
+ if (fabs(inPtr[i]) < 6.10352E-5f)
+ inPtr[i] = 0.0f;
+ QT3DSU32 f = reinterpret_cast<QT3DSU32 *>(inPtr)[i];
+ QT3DSU32 sign = (f & 0x80000000) >> 16;
+ QT3DSI32 exponent = (f & 0x7f800000) >> 23;
+ QT3DSU32 mantissa = (f >> 13) & 0x3ff;
+ exponent = exponent - 112;
+ if (exponent > 31)
+ exponent = 31;
+ if (exponent < 0)
+ exponent = 0;
+ exponent = exponent << 10;
+ reinterpret_cast<QT3DSU16 *>(dest + byteOfs)[i] = QT3DSU16(sign | exponent | mantissa);
+ }
+ break;
+
+ case R11G11B10:
+ // place holder
+ QT3DS_ASSERT(false);
+ break;
+
+ default:
+ dest[byteOfs] = 0;
+ dest[byteOfs + 1] = 0;
+ dest[byteOfs + 2] = 0;
+ dest[byteOfs + 3] = 0;
+ break;
+ }
+}
+
Qt3DSRenderPrefilterTexture::Qt3DSRenderPrefilterTexture(NVRenderContext *inNVRenderContext,
QT3DSI32 inWidth, QT3DSI32 inHeight,
NVRenderTexture2D &inTexture2D,
@@ -66,7 +273,7 @@ Qt3DSRenderPrefilterTexture::Create(NVRenderContext *inNVRenderContext, QT3DSI32
NVRenderTextureFormats::Enum inDestFormat,
qt3ds::NVFoundationBase &inFnd)
{
- Qt3DSRenderPrefilterTexture *theBSDFMipMap = NULL;
+ Qt3DSRenderPrefilterTexture *theBSDFMipMap = nullptr;
if (inNVRenderContext->IsComputeSupported()) {
theBSDFMipMap = QT3DS_NEW(inFnd.getAllocator(), Qt3DSRenderPrefilterTextureCompute)(
@@ -128,14 +335,13 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel,
int newHeight = height >> 1;
newWidth = newWidth >= 1 ? newWidth : 1;
newHeight = newHeight >= 1 ? newHeight : 1;
+ const QT3DSU32 size = NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format);
if (inCurMipLevel.data) {
retval = inCurMipLevel;
- retval.dataSizeInBytes =
- newWidth * newHeight * NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format);
+ retval.dataSizeInBytes = newWidth * newHeight * size;
} else {
- retval.dataSizeInBytes =
- newWidth * newHeight * NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format);
+ retval.dataSizeInBytes = newWidth * newHeight * size;
retval.format = inPrevMipLevel.format; // inLoadedImage.format;
retval.data = m_Foundation.getAllocator().allocate(
retval.dataSizeInBytes, "Bsdf Scaled Image Data", __FILE__, __LINE__);
@@ -155,26 +361,20 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel,
getWrappedCoords(sampleX, sampleY, width, height);
// Cauchy filter (this is simply because it's the easiest to evaluate, and
- // requires no complex
- // functions).
+ // requires no complex functions).
float filterPdf = 1.f / (1.f + float(sx * sx + sy * sy) * 2.f);
// With FP HDR formats, we're not worried about intensity loss so much as
// unnecessary energy gain,
// whereas with LDR formats, the fear with a continuous normalization factor is
- // that we'd lose
- // intensity and saturation as well.
- filterPdf /= (NVRenderTextureFormats::getSizeofFormat(retval.format) >= 8)
- ? 4.71238898f
- : 4.5403446f;
- // filterPdf /= 4.5403446f; // Discrete normalization factor
- // filterPdf /= 4.71238898f; // Continuous normalization factor
+ // that we'd lose intensity and saturation as well.
+ filterPdf /= (size >= 8) ? 4.71238898f : 4.5403446f;
+ // filterPdf /= 4.5403446f; // Discrete normalization factor
+ // filterPdf /= 4.71238898f; // Continuous normalization factor
float curPix[4];
- QT3DSI32 byteOffset = (sampleY * width + sampleX)
- * NVRenderTextureFormats::getSizeofFormat(retval.format);
+ QT3DSI32 byteOffset = (sampleY * width + sampleX) * size;
if (byteOffset < 0) {
sampleY = height + sampleY;
- byteOffset = (sampleY * width + sampleX)
- * NVRenderTextureFormats::getSizeofFormat(retval.format);
+ byteOffset = (sampleY * width + sampleX) * size;
}
NVRenderTextureFormats::decodeToFloat(inPrevMipLevel.data, byteOffset, curPix,
@@ -187,8 +387,7 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel,
}
}
- QT3DSU32 newIdx =
- (y * newWidth + x) * NVRenderTextureFormats::getSizeofFormat(retval.format);
+ QT3DSU32 newIdx = (y * newWidth + x) * size;
NVRenderTextureFormats::encodeToPixel(accumVal, retval.data, newIdx, retval.format);
}
@@ -205,6 +404,7 @@ void Qt3DSRenderPrefilterTextureCPU::Build(void *inTextureData, QT3DSI32 inTextu
m_SizeOfInternalFormat = NVRenderTextureFormats::getSizeofFormat(m_InternalFormat);
m_InternalNoOfComponent = NVRenderTextureFormats::getNumberOfComponent(m_InternalFormat);
+ m_Texture2D.SetMaxLevel(m_MaxMipMapLevel);
m_Texture2D.SetTextureData(NVDataRef<QT3DSU8>((QT3DSU8 *)inTextureData, inTextureDataSize), 0,
m_Width, m_Height, inFormat, m_DestinationFormat);
@@ -333,7 +533,7 @@ static const char *computeUploadShader(std::string &prog, NVRenderTextureFormats
return prog.c_str();
}
-static const char *computeWorkShader(std::string &prog, bool binESContext)
+static const char *computeWorkShader(std::string &prog, bool binESContext, bool rgbe)
{
if (binESContext) {
prog += "#version 310 es\n"
@@ -358,10 +558,32 @@ static const char *computeWorkShader(std::string &prog, bool binESContext)
" sX = wrapMod( sX, width );\n"
"}\n";
+ if (rgbe) {
+ prog += "vec4 decodeRGBE(in vec4 rgbe)\n"
+ "{\n"
+ " float f = pow(2.0, 255.0 * rgbe.a - 128.0);\n"
+ " return vec4(rgbe.rgb * f, 1.0);\n"
+ "}\n";
+ prog += "vec4 encodeRGBE(in vec4 rgba)\n"
+ "{\n"
+ " float maxMan = max(rgba.r, max(rgba.g, rgba.b));\n"
+ " float maxExp = 1.0 + floor(log2(maxMan));\n"
+ " return vec4(rgba.rgb / pow(2.0, maxExp), (maxExp + 128.0) / 255.0);\n"
+ "}\n";
+ }
+
prog += "// Set workgroup layout;\n"
- "layout (local_size_x = 16, local_size_y = 16) in;\n\n"
+ "layout (local_size_x = 16, local_size_y = 16) in;\n\n";
+ if (rgbe) {
+ prog +=
+ "layout (rgba8, binding = 1) readonly uniform image2D inputImage;\n\n"
+ "layout (rgba8, binding = 2) writeonly uniform image2D outputImage;\n\n";
+ } else {
+ prog +=
"layout (rgba16f, binding = 1) readonly uniform image2D inputImage;\n\n"
- "layout (rgba16f, binding = 2) writeonly uniform image2D outputImage;\n\n"
+ "layout (rgba16f, binding = 2) writeonly uniform image2D outputImage;\n\n";
+ }
+ prog +=
"void main()\n"
"{\n"
" int prevWidth = int(gl_NumWorkGroups.x) << 1;\n"
@@ -377,19 +599,31 @@ static const char *computeWorkShader(std::string &prog, bool binESContext)
" int sampleX = sx + (int(gl_GlobalInvocationID.x) << 1);\n"
" int sampleY = sy + (int(gl_GlobalInvocationID.y) << 1);\n"
" getWrappedCoords(sampleX, sampleY, prevWidth, prevHeight);\n"
- " if ((sampleY * prevWidth + sampleX) < 0 )\n"
+ " if ((sampleY * prevWidth + sampleX) < 0 )\n"
" sampleY = prevHeight + sampleY;\n"
" ivec2 pos = ivec2(sampleX, sampleY);\n"
- " vec4 value = imageLoad(inputImage, pos);\n"
- " float filterPdf = 1.0 / ( 1.0 + float(sx*sx + sy*sy)*2.0 );\n"
+ " vec4 value = imageLoad(inputImage, pos);\n";
+
+ if (rgbe) {
+ prog +=
+ " value = decodeRGBE(value);\n";
+ }
+
+ prog += " float filterPdf = 1.0 / ( 1.0 + float(sx*sx + sy*sy)*2.0 );\n"
" filterPdf /= 4.71238898;\n"
" accumVal[0] += filterPdf * value.r;\n"
- " accumVal[1] += filterPdf * value.g;\n"
- " accumVal[2] += filterPdf * value.b;\n"
- " accumVal[3] += filterPdf * value.a;\n"
+ " accumVal[1] += filterPdf * value.g;\n"
+ " accumVal[2] += filterPdf * value.b;\n"
+ " accumVal[3] += filterPdf * value.a;\n"
" }\n"
- " }\n"
- " imageStore( outputImage, ivec2(gl_GlobalInvocationID.xy), accumVal );\n"
+ " }\n";
+
+ if (rgbe) {
+ prog +=
+ " accumVal = encodeRGBE(accumVal);\n";
+ }
+
+ prog += " imageStore( outputImage, ivec2(gl_GlobalInvocationID.xy), accumVal );\n"
"}\n";
return prog.c_str();
@@ -422,33 +656,46 @@ Qt3DSRenderPrefilterTextureCompute::Qt3DSRenderPrefilterTextureCompute(
NVFoundationBase &inFnd)
: Qt3DSRenderPrefilterTexture(inNVRenderContext, inWidth, inHeight, inTexture2D, inDestFormat,
inFnd)
- , m_BSDFProgram(NULL)
- , m_UploadProgram_RGBA8(NULL)
- , m_UploadProgram_RGB8(NULL)
- , m_Level0Tex(NULL)
+ , m_BSDFProgram(nullptr)
+ , m_BSDF_RGBE_Program(nullptr)
+ , m_UploadProgram_RGBA8(nullptr)
+ , m_UploadProgram_RGB8(nullptr)
+ , m_Level0Tex(nullptr)
, m_TextureCreated(false)
{
}
Qt3DSRenderPrefilterTextureCompute::~Qt3DSRenderPrefilterTextureCompute()
{
- m_UploadProgram_RGB8 = NULL;
- m_UploadProgram_RGBA8 = NULL;
- m_BSDFProgram = NULL;
- m_Level0Tex = NULL;
+ m_BSDF_RGBE_Program = nullptr;
+ m_UploadProgram_RGB8 = nullptr;
+ m_UploadProgram_RGBA8 = nullptr;
+ m_BSDFProgram = nullptr;
+ m_Level0Tex = nullptr;
}
-void Qt3DSRenderPrefilterTextureCompute::createComputeProgram(NVRenderContext *context)
+NVRenderShaderProgram *Qt3DSRenderPrefilterTextureCompute::createComputeProgram(
+ NVRenderContext *context, NVRenderTextureFormats::Enum format)
{
std::string computeProg;
- if (!m_BSDFProgram) {
+ if (!m_BSDFProgram && format != NVRenderTextureFormats::RGBE8) {
m_BSDFProgram = context
->CompileComputeSource(
"Compute BSDF mipmap shader",
- toRef(computeWorkShader(computeProg, isGLESContext(context))))
+ toRef(computeWorkShader(computeProg, isGLESContext(context), false)))
+ .mShader;
+ return m_BSDFProgram;
+ }
+ if (!m_BSDF_RGBE_Program && format == NVRenderTextureFormats::RGBE8) {
+ m_BSDF_RGBE_Program = context
+ ->CompileComputeSource(
+ "Compute BSDF RGBE mipmap shader",
+ toRef(computeWorkShader(computeProg, isGLESContext(context), true)))
.mShader;
+ return m_BSDF_RGBE_Program;
}
+ return nullptr;
}
NVRenderShaderProgram *Qt3DSRenderPrefilterTextureCompute::getOrCreateUploadComputeProgram(
@@ -496,7 +743,7 @@ void Qt3DSRenderPrefilterTextureCompute::CreateLevel0Tex(void *inTextureData, QT
theWidth = (m_Width * 3) / 4;
}
- if (m_Level0Tex == NULL) {
+ if (m_Level0Tex == nullptr) {
m_Level0Tex = m_NVRenderContext->CreateTexture2D();
m_Level0Tex->SetTextureStorage(1, theWidth, m_Height, theFormat, theFormat,
NVDataRef<QT3DSU8>((QT3DSU8 *)inTextureData, inTextureDataSize));
@@ -510,6 +757,7 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT
NVRenderTextureFormats::Enum inFormat)
{
bool needMipUpload = (inFormat != m_DestinationFormat);
+ NVRenderShaderProgram *program = nullptr;
// re-upload data
if (!m_TextureCreated) {
m_Texture2D.SetTextureStorage(
@@ -519,9 +767,9 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT
m_Texture2D.addRef();
// create a compute shader (if not aloread done) which computes the BSDF mipmaps for this
// texture
- createComputeProgram(m_NVRenderContext);
+ program = createComputeProgram(m_NVRenderContext, inFormat);
- if (!m_BSDFProgram) {
+ if (!program) {
QT3DS_ASSERT(false);
return;
}
@@ -575,19 +823,19 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT
int width = m_Width >> 1;
int height = m_Height >> 1;
- m_NVRenderContext->SetActiveShader(m_BSDFProgram);
+ m_NVRenderContext->SetActiveShader(program);
for (int i = 1; i <= m_MaxMipMapLevel; ++i) {
theOutputImage->SetTextureLevel(i);
NVRenderCachedShaderProperty<NVRenderImage2D *> theCachedOutputImage("outputImage",
- *m_BSDFProgram);
+ *program);
theCachedOutputImage.Set(theOutputImage);
theInputImage->SetTextureLevel(i - 1);
NVRenderCachedShaderProperty<NVRenderImage2D *> theCachedinputImage("inputImage",
- *m_BSDFProgram);
+ *program);
theCachedinputImage.Set(theInputImage);
- m_NVRenderContext->DispatchCompute(m_BSDFProgram, width, height, 1);
+ m_NVRenderContext->DispatchCompute(program, width, height, 1);
width = width > 2 ? width >> 1 : 1;
height = height > 2 ? height >> 1 : 1;
diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h
index e633eb1..e646443 100644
--- a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h
+++ b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h
@@ -113,15 +113,16 @@ namespace render {
NVRenderTextureFormats::Enum inFormat);
NVScopedRefCounted<NVRenderShaderProgram> m_BSDFProgram;
+ NVScopedRefCounted<NVRenderShaderProgram> m_BSDF_RGBE_Program;
NVScopedRefCounted<NVRenderShaderProgram> m_UploadProgram_RGBA8;
NVScopedRefCounted<NVRenderShaderProgram> m_UploadProgram_RGB8;
NVScopedRefCounted<NVRenderTexture2D> m_Level0Tex;
bool m_TextureCreated;
- void createComputeProgram(NVRenderContext *context);
- NVRenderShaderProgram *
- getOrCreateUploadComputeProgram(NVRenderContext *context,
- NVRenderTextureFormats::Enum inFormat);
+ NVRenderShaderProgram *createComputeProgram(
+ NVRenderContext *context, NVRenderTextureFormats::Enum format);
+ NVRenderShaderProgram *getOrCreateUploadComputeProgram(
+ NVRenderContext *context, NVRenderTextureFormats::Enum inFormat);
};
}
}