From e363af8a8b660f0268c4b649963d8bd60c393d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antti=20M=C3=A4=C3=A4tt=C3=A4?= Date: Wed, 11 Dec 2019 13:27:07 +0200 Subject: Support RGBE hdr images directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not convert RGBE images to 16-bit floating point, but use then directly in the shaders. Task-number: QT3DS-4031 Change-Id: Iecfc775247553d5fd8976c77c572435462b79e64 Reviewed-by: Janne Kangas Reviewed-by: Tomi Korpipää --- res/effectlib/gles2/sampleProbe.glsllib | 224 ++----------- res/effectlib/sampleProbe.glsllib | 224 ++----------- src/render/Qt3DSRenderBaseTypes.h | 179 +---------- src/render/Qt3DSRenderTexture2D.cpp | 10 +- src/render/backends/gl/Qt3DSOpenGLUtil.h | 2 + .../resourcemanager/Qt3DSRenderBufferManager.cpp | 10 +- .../Qt3DSRenderLoadedTextureHDR.cpp | 39 ++- .../Qt3DSRenderPrefilterTexture.cpp | 346 ++++++++++++++++++--- .../resourcemanager/Qt3DSRenderPrefilterTexture.h | 9 +- 9 files changed, 404 insertions(+), 639 deletions(-) diff --git a/res/effectlib/gles2/sampleProbe.glsllib b/res/effectlib/gles2/sampleProbe.glsllib index f785918..fb202e5 100644 --- a/res/effectlib/gles2/sampleProbe.glsllib +++ b/res/effectlib/gles2/sampleProbe.glsllib @@ -39,6 +39,8 @@ #define QT3DS_ENABLE_IBL_FOV 0 #endif +#define USE_RGBE + uniform sampler2D light_probe; uniform vec4 light_probe_props; uniform vec4 light_probe_rotation; @@ -75,17 +77,9 @@ mat3 tangentFrame( vec3 N, vec3 p ) // get edge vectors of the pixel triangle vec3 dp1 = dFdx( p ); vec3 dp2 = dFdy( p ); - // Using dPdu and dPdv would be nicer, but the nature of our materials - // are not ones with intrinsic UVs, so we can't really go there. -// vec2 duv1 = dFdx( uv ); -// vec2 duv2 = dFdy( uv ); - // solve the linear system vec3 dp2perp = cross( dp2, N ); vec3 dp1perp = cross( N, dp1 ); -// vec3 T = dp2perp * duv1.x + dp1perp * duv2.x; -// vec3 B = dp2perp * duv1.y + dp1perp * duv2.y; - vec3 T = normalize(dp1perp); vec3 B = normalize(dp2perp); return mat3( T , B , N ); @@ -99,6 +93,16 @@ vec2 transformSample( vec2 origUV, vec4 probeRot, vec2 probeOfs ) return retUV; } +vec3 textureProbe(sampler2D lightProbe, vec2 coord, float lod) +{ +#ifdef USE_RGBE + vec4 ret = textureLod(lightProbe, coord, lod); + return ret.rgb * pow(2.0, ret.a * 255.0 - 128.0); +#else + return textureLod(lightProbe, coord, lod).rgb; +#endif +} + // This is broken out into its own routine so that if we get some other // format image than a lat-long, then we can account for that by changing // the code here alone. @@ -142,7 +146,7 @@ vec4 getTopLayerSample( vec3 inDir, float lodShift, vec3 lodOffsets ) vec3 getProbeSample( vec3 smpDir, float lodShift, vec3 normal ) { vec2 smpUV = getProbeSampleUV( smpDir, light_probe_rotation, light_probe_offset.xy ); - return textureLod( light_probe, smpUV , lodShift ).xyz; + return textureProbe( light_probe, smpUV , lodShift ); } vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 normal ) @@ -184,10 +188,10 @@ vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 lodShift = max( lodShift, minLod ); - vec3 retVal = 0.4 * textureLod( light_probe, smpUV , lodShift ).xyz; - retVal += 0.2 * textureLod( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ).xyz; - retVal += 0.3 * textureLod( light_probe, smpUV , lodShift+lodOffsets.y ).xyz; - retVal += 0.1 * textureLod( light_probe, smpUV , lodShift+lodOffsets.z ).xyz; + vec3 retVal = 0.4 * textureProbe( light_probe, smpUV , lodShift ); + retVal += 0.2 * textureProbe( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ); + retVal += 0.3 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.y ); + retVal += 0.1 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.z ); #if QT3DS_ENABLE_LIGHT_PROBE_2 vec4 topSmp = getTopLayerSample( smpDir, lodShift, lodOffsets ); @@ -257,9 +261,9 @@ vec3 getProbeAnisoSample( vec3 smpDir, float roughU, float roughV, mat3 tanFrame wt = sigma / (sigma + float(i * i)); vec2 uv0 = getProbeSampleUV(normalize(smpDir + smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy); vec2 uv1 = getProbeSampleUV(normalize(smpDir - smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy); - result.xyz += wt * textureLod( light_probe, uv0 , lodMin ).xyz; + result.xyz += wt * textureProbe( light_probe, uv0 , lodMin ); result.w += wt; - result.xyz += wt * textureLod( light_probe, uv1 , lodMin ).xyz; + result.xyz += wt * textureProbe( light_probe, uv1 , lodMin ); result.w += wt; } @@ -272,78 +276,7 @@ vec4 sampleDiffuse( mat3 tanFrame ) if ( light_probe_props.w < 0.005 ) return vec4( 0.0 ); -// if ( light_probe_offset.w > 0.5 ) -// { - // The LOD offset comes from the assumption that a full diffuse convolution - // has a support of pi/2, which translates into x pixels, and the base 2 log - // gives us this LOD... Technically, "x" pixels depends on what the original - // texture resolution was, which is why we use light_probe_offset.w, which holds - // the number of mip levels the texture has. - - return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 ); -// } - - /* - // PKC -- the code below is for full-blown IBL, which we'll skip for now - - // Hand-calculated Hammersley points for t = 2, n = 33 - // I exclude the 0,0 first point, hence why n=33 and not 32 - // Nice thing about 2d Hammersley points is that any subset is - // also stratified, so even if I have 1000 points and truncate - // anywhere, I'm fine. Each of these represent the y of an xy - // while x for the kth point is always (k+1)/n. - float kernel[32]; - kernel[0] = 0.5; kernel[1] = 0.25; - kernel[2] = 0.75; kernel[3] = 0.125; - kernel[4] = 0.625; kernel[5] = 0.375; - kernel[6] = 0.875; kernel[7] = 0.0625; - kernel[8] = 0.5625; kernel[9] = 0.3125; - kernel[10] = 0.8125; kernel[11] = 0.1875; - kernel[12] = 0.6875; kernel[13] = 0.4375; - kernel[14] = 0.9375; kernel[15] = 0.03125; - kernel[16] = 0.53125; kernel[17] = 0.28125; - kernel[18] = 0.78125; kernel[19] = 0.15625; - kernel[20] = 0.65625; kernel[21] = 0.40625; - kernel[22] = 0.90625; kernel[23] = 0.09375; - kernel[24] = 0.59375; kernel[25] = 0.34375; - kernel[26] = 0.84375; kernel[27] = 0.28175; - kernel[28] = 0.71875; kernel[29] = 0.46875; - kernel[30] = 0.96875; kernel[31] = 0.015625; - - float phiShift = noise1d(gl_FragCoord.xy) - 0.5; - - vec3 ret = vec3(0, 0, 0); - - int ct = 24; - float step = 25.0; - - // Importance sampling a cosine-weighted distribution. Since this - // matches the BSDF exactly, we are just going to assume that the PDF - // and the BSDF cancel out in sampling, so we just need to accumulate - // texture colors. The noise function puts randomized "twist" into - // the sampled directions. - for( int i = 0; i < ct; ++i ) - { - vec3 localDir; - float phi = 6.28318530718 * (kernel[i] + phiShift); - float cosTheta = sqrt( float(i+1) / step); - localDir.z = sqrt(1.0 - cosTheta*cosTheta); - localDir.x = cos(phi) * cosTheta; - localDir.y = sin(phi) * cosTheta; - vec3 smpDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z; - - - float lodShift = light_probe_offset.w - 2 + log2( 3.1415926535 / (localDir.z * step) ); - vec3 smpColor = getProbeSample( smpDir, lodShift, tanFrame[2] ); - - // The assumption here is that the BSDF and the sampling PDF are identical - // so they cancel out and therefore, we don't need to include it here. - ret += smpColor; - } - - ret *= aoFactor / 24.0; - return ret; - */ + return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 ); } vec4 sampleDiffuseCustomMaterial( vec3 normal, vec3 worldPos, float aoFactor ) @@ -358,114 +291,23 @@ vec4 sampleGlossyAniso( mat3 tanFrame, vec3 viewDir, float roughU, float roughV if ( light_probe_props.w < 0.005 ) return vec4( 0.0 ); - // PKC : If we do the full IBL sampling, it's useful to square the roughnesses because - // it makes the effect of roughness feel more linear in the low end. This isn't necessary - // for fast IBL. -// float sigmaU = clamp(roughU*roughU, 0.0001, 1.0); -// float sigmaV = clamp(roughV*roughV, 0.0001, 1.0); float sigmaU = smoothstep( 0.0, 1.0, clamp(roughU, 0.0001, 1.0) ); float sigmaV = smoothstep( 0.0, 1.0, clamp(roughV, 0.0001, 1.0) ); vec3 ret = vec3(0, 0, 0); -// if ( light_probe_offset.w > 0.5 ) -// { - vec3 smpDir = reflect( -viewDir, tanFrame[2] ); - float sigma = sqrt(sigmaU * sigmaV); - - // Compute the Geometric occlusion/self-shadowing term - float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995); - float k = sigma * 0.31830988618; // roughness / pi - float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); - - vec3 outColor; - - outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame ); - - return vec4( light_probe_props.w * Gl * outColor, 1.0 ); -// } - - // PKC -- the code below is for full-blown IBL, which we'll skip for now - -/* - float step = clamp( ceil(32.0 * sqrt(max(sigmaU, sigmaV))), 4.0, 32.0 ); - int actualCt = int(step); - float phiShift = noise1d(gl_FragCoord.xy) - 0.5; - - // Hand-calculated Hammersley points for t = 2, n = 33 - // I exclude the 0,0 first point, hence why n=33 and not 32 - // Nice thing about 2d Hammersley points is that any subset is - // also stratified, so even if I have 1000 points and truncate - // anywhere, I'm fine. Each of these represent the y of an xy - // while x for the kth point is always (k+1)/n. - float kernel[32]; - kernel[0] = 0.5; kernel[1] = 0.25; - kernel[2] = 0.75; kernel[3] = 0.125; - kernel[4] = 0.625; kernel[5] = 0.375; - kernel[6] = 0.875; kernel[7] = 0.0625; - kernel[8] = 0.5625; kernel[9] = 0.3125; - kernel[10] = 0.8125; kernel[11] = 0.1875; - kernel[12] = 0.6875; kernel[13] = 0.4375; - kernel[14] = 0.9375; kernel[15] = 0.03125; - kernel[16] = 0.53125; kernel[17] = 0.28125; - kernel[18] = 0.78125; kernel[19] = 0.15625; - kernel[20] = 0.65625; kernel[21] = 0.40625; - kernel[22] = 0.90625; kernel[23] = 0.09375; - kernel[24] = 0.59375; kernel[25] = 0.34375; - kernel[26] = 0.84375; kernel[27] = 0.28175; - kernel[28] = 0.71875; kernel[29] = 0.46875; - kernel[30] = 0.96875; kernel[31] = 0.015625; - - float thetaI = acos( dot(viewDir, tanFrame[2]) ); - - // NOTE : The model I'm using here is actually based on the KGGX model used in - // physGlossyBSDF. This is my own variation on the original GGX which uses something - // closer to a pure Cauchy distribution in tangent space, but also supports anisotropy. - for (int i = 0; i < actualCt; ++i) - { - vec3 localDir; - - float phi = 6.28318530718 * (kernel[i] + phiShift); - float u = float(i + 1) / (step + 1.0); - float rU = cos(phi) * sigmaU; - float rV = sin(phi) * sigmaV; - float sigma = sqrt(rU * rU + rV * rV); - - float boundA = atan( ((thetaI - 1.57079632679) * 0.5) / sigma ); - float boundB = atan( ((thetaI + 1.57079632679) * 0.5) / sigma ); - float t = (1.0 - u) * boundA + u * boundB; - float thetaH = tan( t ) * sigma; - - float cosThetaH = cos( thetaH ); - float sinThetaH = sin( thetaH ); - localDir.z = cosThetaH; - localDir.y = sin(phi) * sinThetaH; - localDir.x = cos(phi) * sinThetaH; - - vec3 halfDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z; - halfDir = normalize(halfDir); - vec3 smpDir = reflect( -viewDir, halfDir ); - - vec2 scaledXY = localDir.xy / vec2(sigmaU, sigmaV); - float PDF = (sigmaU*sigmaV) / (sigmaU*sigmaV + dot(scaledXY, scaledXY)); - vec3 Haf = smpDir + viewDir; // We need the unnormalized half vecter as well as the normalized one - float HdotL = dot(halfDir, smpDir); - // normalize the PDF to compute the filter support - // This gives us the ideal miplevel at which to sample the texture map. - PDF *= dot(Haf, Haf) / (4.0 * dot(Haf, smpDir) * HdotL * sigmaU*sigmaV * (boundB-boundA)*(boundB-boundA)); - - // Again assuming that the pdf and BSDF are equivalent -- that's not generally valid, - // but it saves a lot of ALU cycles. - float lodShift = log2( 512.0 * sigma / PDF ); - - float k = sigma * 0.31830988618; // roughness / pi - float Gl = clamp( (HdotL / (HdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); - - vec3 smpColor = Gl * getProbeSample( smpDir, lodShift, tanFrame[2] ); - ret += smpColor; - } - ret /= float(actualCt); - return vec4(ret, 1.0); -*/ + vec3 smpDir = reflect( -viewDir, tanFrame[2] ); + float sigma = sqrt(sigmaU * sigmaV); + + // Compute the Geometric occlusion/self-shadowing term + float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995); + float k = sigma * 0.31830988618; // roughness / pi + float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); + + vec3 outColor; + + outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame ); + + return vec4( light_probe_props.w * Gl * outColor, 1.0 ); } vec4 sampleGlossy( mat3 tanFrame, vec3 viewDir, float roughness ) diff --git a/res/effectlib/sampleProbe.glsllib b/res/effectlib/sampleProbe.glsllib index 6556e51..361aad5 100644 --- a/res/effectlib/sampleProbe.glsllib +++ b/res/effectlib/sampleProbe.glsllib @@ -31,6 +31,8 @@ #ifndef SAMPLE_PROBE_GLSLLIB #define SAMPLE_PROBE_GLSLLIB 1 +#define USE_RGBE + uniform sampler2D light_probe; uniform vec4 light_probe_props; uniform vec4 light_probe_rotation; @@ -65,17 +67,9 @@ mat3 tangentFrame( vec3 N, vec3 p ) // get edge vectors of the pixel triangle vec3 dp1 = dFdx( p ); vec3 dp2 = dFdy( p ); - // Using dPdu and dPdv would be nicer, but the nature of our materials - // are not ones with intrinsic UVs, so we can't really go there. -// vec2 duv1 = dFdx( uv ); -// vec2 duv2 = dFdy( uv ); - // solve the linear system vec3 dp2perp = cross( dp2, N ); vec3 dp1perp = cross( N, dp1 ); -// vec3 T = dp2perp * duv1.x + dp1perp * duv2.x; -// vec3 B = dp2perp * duv1.y + dp1perp * duv2.y; - vec3 T = normalize(dp1perp); vec3 B = normalize(dp2perp); return mat3( T , B , N ); @@ -89,6 +83,16 @@ vec2 transformSample( vec2 origUV, vec4 probeRot, vec2 probeOfs ) return retUV; } +vec3 textureProbe(sampler2D lightProbe, vec2 coord, float lod) +{ +#ifdef USE_RGBE + vec4 ret = textureLod(lightProbe, coord, lod); + return ret.rgb * pow(2.0, ret.a * 255.0 - 128.0); +#else + return textureLod(lightProbe, coord, lod).rgb; +#endif +} + // This is broken out into its own routine so that if we get some other // format image than a lat-long, then we can account for that by changing // the code here alone. @@ -138,7 +142,7 @@ vec4 getTopLayerSample( vec3 inDir, float lodShift, vec3 lodOffsets ) vec3 getProbeSample( vec3 smpDir, float lodShift, vec3 normal ) { vec2 smpUV = getProbeSampleUV( smpDir, light_probe_rotation, light_probe_offset.xy ); - return textureLod( light_probe, smpUV , lodShift ).xyz; + return textureProbe( light_probe, smpUV , lodShift ); } vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 normal ) @@ -180,10 +184,10 @@ vec3 getProbeWeightedSample( vec3 smpDir, float lodShift, float roughness, vec3 lodShift = max( lodShift, minLod ); - vec3 retVal = 0.4 * textureLod( light_probe, smpUV , lodShift ).xyz; - retVal += 0.2 * textureLod( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ).xyz; - retVal += 0.3 * textureLod( light_probe, smpUV , lodShift+lodOffsets.y ).xyz; - retVal += 0.1 * textureLod( light_probe, smpUV , lodShift+lodOffsets.z ).xyz; + vec3 retVal = 0.4 * textureProbe( light_probe, smpUV , lodShift ); + retVal += 0.2 * textureProbe( light_probe, smpUV , max(minLod, lodShift+lodOffsets.x) ); + retVal += 0.3 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.y ); + retVal += 0.1 * textureProbe( light_probe, smpUV , lodShift+lodOffsets.z ); #if QT3DS_ENABLE_LIGHT_PROBE_2 vec4 topSmp = getTopLayerSample( smpDir, lodShift, lodOffsets ); @@ -248,9 +252,9 @@ vec3 getProbeAnisoSample( vec3 smpDir, float roughU, float roughV, mat3 tanFrame wt = sigma / (sigma + float(i * i)); vec2 uv0 = getProbeSampleUV(normalize(smpDir + smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy); vec2 uv1 = getProbeSampleUV(normalize(smpDir - smpDirOfs * float(i)), light_probe_rotation, light_probe_offset.xy); - result.xyz += wt * textureLod( light_probe, uv0 , lodMin ).xyz; + result.xyz += wt * textureProbe( light_probe, uv0 , lodMin ); result.w += wt; - result.xyz += wt * textureLod( light_probe, uv1 , lodMin ).xyz; + result.xyz += wt * textureProbe( light_probe, uv1 , lodMin ); result.w += wt; } @@ -263,78 +267,7 @@ vec4 sampleDiffuse( mat3 tanFrame ) if ( light_probe_props.w < 0.005 ) return vec4( 0.0 ); -// if ( light_probe_offset.w > 0.5 ) -// { - // The LOD offset comes from the assumption that a full diffuse convolution - // has a support of pi/2, which translates into x pixels, and the base 2 log - // gives us this LOD... Technically, "x" pixels depends on what the original - // texture resolution was, which is why we use light_probe_offset.w, which holds - // the number of mip levels the texture has. - - return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 ); -// } - - /* - // PKC -- the code below is for full-blown IBL, which we'll skip for now - - // Hand-calculated Hammersley points for t = 2, n = 33 - // I exclude the 0,0 first point, hence why n=33 and not 32 - // Nice thing about 2d Hammersley points is that any subset is - // also stratified, so even if I have 1000 points and truncate - // anywhere, I'm fine. Each of these represent the y of an xy - // while x for the kth point is always (k+1)/n. - float kernel[32]; - kernel[0] = 0.5; kernel[1] = 0.25; - kernel[2] = 0.75; kernel[3] = 0.125; - kernel[4] = 0.625; kernel[5] = 0.375; - kernel[6] = 0.875; kernel[7] = 0.0625; - kernel[8] = 0.5625; kernel[9] = 0.3125; - kernel[10] = 0.8125; kernel[11] = 0.1875; - kernel[12] = 0.6875; kernel[13] = 0.4375; - kernel[14] = 0.9375; kernel[15] = 0.03125; - kernel[16] = 0.53125; kernel[17] = 0.28125; - kernel[18] = 0.78125; kernel[19] = 0.15625; - kernel[20] = 0.65625; kernel[21] = 0.40625; - kernel[22] = 0.90625; kernel[23] = 0.09375; - kernel[24] = 0.59375; kernel[25] = 0.34375; - kernel[26] = 0.84375; kernel[27] = 0.28175; - kernel[28] = 0.71875; kernel[29] = 0.46875; - kernel[30] = 0.96875; kernel[31] = 0.015625; - - float phiShift = noise1d(gl_FragCoord.xy) - 0.5; - - vec3 ret = vec3(0, 0, 0); - - int ct = 24; - float step = 25.0; - - // Importance sampling a cosine-weighted distribution. Since this - // matches the BSDF exactly, we are just going to assume that the PDF - // and the BSDF cancel out in sampling, so we just need to accumulate - // texture colors. The noise function puts randomized "twist" into - // the sampled directions. - for( int i = 0; i < ct; ++i ) - { - vec3 localDir; - float phi = 6.28318530718 * (kernel[i] + phiShift); - float cosTheta = sqrt( float(i+1) / step); - localDir.z = sqrt(1.0 - cosTheta*cosTheta); - localDir.x = cos(phi) * cosTheta; - localDir.y = sin(phi) * cosTheta; - vec3 smpDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z; - - - float lodShift = light_probe_offset.w - 2 + log2( 3.1415926535 / (localDir.z * step) ); - vec3 smpColor = getProbeSample( smpDir, lodShift, tanFrame[2] ); - - // The assumption here is that the BSDF and the sampling PDF are identical - // so they cancel out and therefore, we don't need to include it here. - ret += smpColor; - } - - ret *= aoFactor / 24.0; - return ret; - */ + return vec4( light_probe_props.w * getProbeWeightedSample( tanFrame[2], light_probe_offset.w - 2.65149613, 1.0, tanFrame[2] ), 1.0 ); } vec4 sampleDiffuseCustomMaterial( vec3 normal, vec3 worldPos, float aoFactor ) @@ -349,114 +282,23 @@ vec4 sampleGlossyAniso( mat3 tanFrame, vec3 viewDir, float roughU, float roughV if ( light_probe_props.w < 0.005 ) return vec4( 0.0 ); - // PKC : If we do the full IBL sampling, it's useful to square the roughnesses because - // it makes the effect of roughness feel more linear in the low end. This isn't necessary - // for fast IBL. -// float sigmaU = clamp(roughU*roughU, 0.0001, 1.0); -// float sigmaV = clamp(roughV*roughV, 0.0001, 1.0); float sigmaU = smoothstep( 0.0, 1.0, clamp(roughU, 0.0001, 1.0) ); float sigmaV = smoothstep( 0.0, 1.0, clamp(roughV, 0.0001, 1.0) ); vec3 ret = vec3(0, 0, 0); -// if ( light_probe_offset.w > 0.5 ) -// { - vec3 smpDir = reflect( -viewDir, tanFrame[2] ); - float sigma = sqrt(sigmaU * sigmaV); - - // Compute the Geometric occlusion/self-shadowing term - float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995); - float k = sigma * 0.31830988618; // roughness / pi - float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); - - vec3 outColor; - - outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame ); - - return vec4( light_probe_props.w * Gl * outColor, 1.0 ); -// } - - // PKC -- the code below is for full-blown IBL, which we'll skip for now - -/* - float step = clamp( ceil(32.0 * sqrt(max(sigmaU, sigmaV))), 4.0, 32.0 ); - int actualCt = int(step); - float phiShift = noise1d(gl_FragCoord.xy) - 0.5; - - // Hand-calculated Hammersley points for t = 2, n = 33 - // I exclude the 0,0 first point, hence why n=33 and not 32 - // Nice thing about 2d Hammersley points is that any subset is - // also stratified, so even if I have 1000 points and truncate - // anywhere, I'm fine. Each of these represent the y of an xy - // while x for the kth point is always (k+1)/n. - float kernel[32]; - kernel[0] = 0.5; kernel[1] = 0.25; - kernel[2] = 0.75; kernel[3] = 0.125; - kernel[4] = 0.625; kernel[5] = 0.375; - kernel[6] = 0.875; kernel[7] = 0.0625; - kernel[8] = 0.5625; kernel[9] = 0.3125; - kernel[10] = 0.8125; kernel[11] = 0.1875; - kernel[12] = 0.6875; kernel[13] = 0.4375; - kernel[14] = 0.9375; kernel[15] = 0.03125; - kernel[16] = 0.53125; kernel[17] = 0.28125; - kernel[18] = 0.78125; kernel[19] = 0.15625; - kernel[20] = 0.65625; kernel[21] = 0.40625; - kernel[22] = 0.90625; kernel[23] = 0.09375; - kernel[24] = 0.59375; kernel[25] = 0.34375; - kernel[26] = 0.84375; kernel[27] = 0.28175; - kernel[28] = 0.71875; kernel[29] = 0.46875; - kernel[30] = 0.96875; kernel[31] = 0.015625; - - float thetaI = acos( dot(viewDir, tanFrame[2]) ); - - // NOTE : The model I'm using here is actually based on the KGGX model used in - // physGlossyBSDF. This is my own variation on the original GGX which uses something - // closer to a pure Cauchy distribution in tangent space, but also supports anisotropy. - for (int i = 0; i < actualCt; ++i) - { - vec3 localDir; - - float phi = 6.28318530718 * (kernel[i] + phiShift); - float u = float(i + 1) / (step + 1.0); - float rU = cos(phi) * sigmaU; - float rV = sin(phi) * sigmaV; - float sigma = sqrt(rU * rU + rV * rV); - - float boundA = atan( ((thetaI - 1.57079632679) * 0.5) / sigma ); - float boundB = atan( ((thetaI + 1.57079632679) * 0.5) / sigma ); - float t = (1.0 - u) * boundA + u * boundB; - float thetaH = tan( t ) * sigma; - - float cosThetaH = cos( thetaH ); - float sinThetaH = sin( thetaH ); - localDir.z = cosThetaH; - localDir.y = sin(phi) * sinThetaH; - localDir.x = cos(phi) * sinThetaH; - - vec3 halfDir = tanFrame[0]*localDir.x + tanFrame[1]*localDir.y + tanFrame[2]*localDir.z; - halfDir = normalize(halfDir); - vec3 smpDir = reflect( -viewDir, halfDir ); - - vec2 scaledXY = localDir.xy / vec2(sigmaU, sigmaV); - float PDF = (sigmaU*sigmaV) / (sigmaU*sigmaV + dot(scaledXY, scaledXY)); - vec3 Haf = smpDir + viewDir; // We need the unnormalized half vecter as well as the normalized one - float HdotL = dot(halfDir, smpDir); - // normalize the PDF to compute the filter support - // This gives us the ideal miplevel at which to sample the texture map. - PDF *= dot(Haf, Haf) / (4.0 * dot(Haf, smpDir) * HdotL * sigmaU*sigmaV * (boundB-boundA)*(boundB-boundA)); - - // Again assuming that the pdf and BSDF are equivalent -- that's not generally valid, - // but it saves a lot of ALU cycles. - float lodShift = log2( 512.0 * sigma / PDF ); - - float k = sigma * 0.31830988618; // roughness / pi - float Gl = clamp( (HdotL / (HdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); - - vec3 smpColor = Gl * getProbeSample( smpDir, lodShift, tanFrame[2] ); - ret += smpColor; - } - ret /= float(actualCt); - return vec4(ret, 1.0); -*/ + vec3 smpDir = reflect( -viewDir, tanFrame[2] ); + float sigma = sqrt(sigmaU * sigmaV); + + // Compute the Geometric occlusion/self-shadowing term + float NdotL = clamp( dot( smpDir, tanFrame[2] ), 0.0, 0.999995); + float k = sigma * 0.31830988618; // roughness / pi + float Gl = clamp( (NdotL / (NdotL*(1.0-k) + k) + (1.0 - k*k)) * 0.5, 0.0, 1.0 ); + + vec3 outColor; + + outColor = getProbeAnisoSample( smpDir, sigmaU, sigmaV, tanFrame ); + + return vec4( light_probe_props.w * Gl * outColor, 1.0 ); } vec4 sampleGlossy( mat3 tanFrame, vec3 viewDir, float roughness ) diff --git a/src/render/Qt3DSRenderBaseTypes.h b/src/render/Qt3DSRenderBaseTypes.h index 909077d..bfe034b 100644 --- a/src/render/Qt3DSRenderBaseTypes.h +++ b/src/render/Qt3DSRenderBaseTypes.h @@ -351,6 +351,7 @@ struct NVRenderRenderBufferFormats QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGBA32F) \ QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(R11G11B10) \ QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGB9E5) \ + QT3DS_RENDER_HANDLE_TEXTURE_FORMAT(RGBE8) \ QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGBA_DXT1) \ QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGB_DXT1) \ QT3DS_RENDER_HANDLE_COMPRESSED_TEXTURE_FORMAT(RGBA_DXT3) \ @@ -511,6 +512,7 @@ struct NVRenderTextureFormats return 4; case R32F: return 4; + case RGBE8: case RGBA8: return 4; case RGB8: @@ -571,6 +573,7 @@ struct NVRenderTextureFormats return 1; case R32F: return 1; + case RGBE8: case RGBA8: return 4; case RGB8: @@ -619,181 +622,9 @@ struct NVRenderTextureFormats } static void decodeToFloat(void *inPtr, QT3DSU32 byteOfs, float *outPtr, - NVRenderTextureFormats::Enum inFmt) - { - outPtr[0] = 0.0f; - outPtr[1] = 0.0f; - outPtr[2] = 0.0f; - outPtr[3] = 0.0f; - QT3DSU8 *src = reinterpret_cast(inPtr); - // float divisor; // If we want to support RGBD? - switch (inFmt) { - case Alpha8: - outPtr[0] = ((float)src[byteOfs]) / 255.0f; - break; - - case Luminance8: - case LuminanceAlpha8: - case R8: - case RG8: - case RGB8: - case RGBA8: - case SRGB8: - case SRGB8A8: - // NOTE : RGBD Hack here for reference. Not meant for installation. - // divisor = (NVRenderTextureFormats::getSizeofFormat(inFmt) == 4) ? - // ((float)src[byteOfs+3]) / 255.0f : 1.0f; - for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) { - float val = ((float)src[byteOfs + i]) / 255.0f; - outPtr[i] = (i < 3) ? powf(val, 0.4545454545f) : val; - // Assuming RGBA8 actually means RGBD (which is stupid, I know) - // if ( NVRenderTextureFormats::getSizeofFormat(inFmt) == 4 ) { outPtr[i] /= - // divisor; } - } - // outPtr[3] = divisor; - break; - - case R32F: - outPtr[0] = reinterpret_cast(src + byteOfs)[0]; - break; - case RG32F: - outPtr[0] = reinterpret_cast(src + byteOfs)[0]; - outPtr[1] = reinterpret_cast(src + byteOfs)[1]; - break; - case RGBA32F: - outPtr[0] = reinterpret_cast(src + byteOfs)[0]; - outPtr[1] = reinterpret_cast(src + byteOfs)[1]; - outPtr[2] = reinterpret_cast(src + byteOfs)[2]; - outPtr[3] = reinterpret_cast(src + byteOfs)[3]; - break; - case RGB32F: - outPtr[0] = reinterpret_cast(src + byteOfs)[0]; - outPtr[1] = reinterpret_cast(src + byteOfs)[1]; - outPtr[2] = reinterpret_cast(src + byteOfs)[2]; - break; - - case R16F: - case RG16F: - case RGBA16F: - for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) { - // NOTE : This only works on the assumption that we don't have any denormals, - // Infs or NaNs. - // Every pixel in our source image should be "regular" - QT3DSU16 h = reinterpret_cast(src + byteOfs)[i]; - QT3DSU32 sign = (h & 0x8000) << 16; - QT3DSU32 exponent = (((((h & 0x7c00) >> 10) - 15) + 127) << 23); - QT3DSU32 mantissa = ((h & 0x3ff) << 13); - QT3DSU32 result = sign | exponent | mantissa; - - if (h == 0 || h == 0x8000) { - result = 0; - } // Special case for zero and negative zero - qt3ds::intrinsics::memCopy(reinterpret_cast(outPtr) + i, &result, 4); - } - break; - - case R11G11B10: - // place holder - QT3DS_ASSERT(false); - break; - - default: - outPtr[0] = 0.0f; - outPtr[1] = 0.0f; - outPtr[2] = 0.0f; - outPtr[3] = 0.0f; - break; - } - } - + NVRenderTextureFormats::Enum inFmt); static void encodeToPixel(float *inPtr, void *outPtr, QT3DSU32 byteOfs, - NVRenderTextureFormats::Enum inFmt) - { - QT3DSU8 *dest = reinterpret_cast(outPtr); - switch (inFmt) { - case NVRenderTextureFormats::Alpha8: - dest[byteOfs] = QT3DSU8(inPtr[0] * 255.0f); - break; - - case Luminance8: - case LuminanceAlpha8: - case R8: - case RG8: - case RGB8: - case RGBA8: - case SRGB8: - case SRGB8A8: - for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) { - inPtr[i] = (inPtr[i] > 1.0f) ? 1.0f : inPtr[i]; - if (i < 3) - dest[byteOfs + i] = QT3DSU8(powf(inPtr[i], 2.2f) * 255.0f); - else - dest[byteOfs + i] = QT3DSU8(inPtr[i] * 255.0f); - } - break; - - case R32F: - reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; - break; - case RG32F: - reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; - reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; - break; - case RGBA32F: - reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; - reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; - reinterpret_cast(dest + byteOfs)[2] = inPtr[2]; - reinterpret_cast(dest + byteOfs)[3] = inPtr[3]; - break; - case RGB32F: - reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; - reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; - reinterpret_cast(dest + byteOfs)[2] = inPtr[2]; - break; - - case R16F: - case RG16F: - case RGBA16F: - for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) { - // NOTE : This also has the limitation of not handling infs, NaNs and - // denormals, but it should be - // sufficient for our purposes. - if (inPtr[i] > 65519.0f) { - inPtr[i] = 65519.0f; - } - if (fabs(inPtr[i]) < 6.10352E-5f) { - inPtr[i] = 0.0f; - } - QT3DSU32 f = reinterpret_cast(inPtr)[i]; - QT3DSU32 sign = (f & 0x80000000) >> 16; - QT3DSI32 exponent = (f & 0x7f800000) >> 23; - QT3DSU32 mantissa = (f >> 13) & 0x3ff; - exponent = exponent - 112; - if (exponent > 31) { - exponent = 31; - } - if (exponent < 0) { - exponent = 0; - } - exponent = exponent << 10; - reinterpret_cast(dest + byteOfs)[i] = - QT3DSU16(sign | exponent | mantissa); - } - break; - - case R11G11B10: - // place holder - QT3DS_ASSERT(false); - break; - - default: - dest[byteOfs] = 0; - dest[byteOfs + 1] = 0; - dest[byteOfs + 2] = 0; - dest[byteOfs + 3] = 0; - break; - } - } + NVRenderTextureFormats::Enum inFmt); }; struct NVRenderTextureTargetType diff --git a/src/render/Qt3DSRenderTexture2D.cpp b/src/render/Qt3DSRenderTexture2D.cpp index 577264d..16fd5c9 100644 --- a/src/render/Qt3DSRenderTexture2D.cpp +++ b/src/render/Qt3DSRenderTexture2D.cpp @@ -125,7 +125,7 @@ namespace render { } void NVRenderTexture2D::SetTextureStorage(QT3DSU32 inLevels, QT3DSU32 width, QT3DSU32 height, - NVRenderTextureFormats::Enum formaInternal, + NVRenderTextureFormats::Enum formatInternal, NVRenderTextureFormats::Enum format, NVDataRef dataBuffer) { @@ -138,9 +138,9 @@ namespace render { m_Width = width; m_Height = height; - m_Format = formaInternal; + m_Format = formatInternal; if (format == NVRenderTextureFormats::Unknown) - format = formaInternal; + format = formatInternal; // get max size and check value QT3DSU32 maxWidth, maxHeight; @@ -157,8 +157,8 @@ namespace render { m_MaxMipLevel = inLevels - 1; // we count from 0 // only uncompressed formats are supported and no depth - if (NVRenderTextureFormats::isUncompressedTextureFormat(formaInternal)) { - m_Backend->CreateTextureStorage2D(m_TextureHandle, m_TexTarget, inLevels, formaInternal, + if (NVRenderTextureFormats::isUncompressedTextureFormat(formatInternal)) { + m_Backend->CreateTextureStorage2D(m_TextureHandle, m_TexTarget, inLevels, formatInternal, width, height); m_Immutable = true; diff --git a/src/render/backends/gl/Qt3DSOpenGLUtil.h b/src/render/backends/gl/Qt3DSOpenGLUtil.h index 47f4230..bf877fb 100644 --- a/src/render/backends/gl/Qt3DSOpenGLUtil.h +++ b/src/render/backends/gl/Qt3DSOpenGLUtil.h @@ -969,6 +969,7 @@ namespace render { outInternalFormat = GL_RG8; outDataType = GL_UNSIGNED_BYTE; return true; + case NVRenderTextureFormats::RGBE8: case NVRenderTextureFormats::RGBA8: outFormat = GL_RGBA; outInternalFormat = GL_RGBA8; @@ -1482,6 +1483,7 @@ namespace render { return GL_R32UI; case NVRenderTextureFormats::R32F: return GL_R32F; + case NVRenderTextureFormats::RGBE8: case NVRenderTextureFormats::RGBA8: return GL_RGBA8; case NVRenderTextureFormats::SRGB8A8: diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp index 20b0738..3d0781d 100644 --- a/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp +++ b/src/runtimerender/resourcemanager/Qt3DSRenderBufferManager.cpp @@ -463,10 +463,12 @@ struct SBufferManager : public IBufferManager if (inLoadedImage.data) { qt3ds::render::NVRenderTextureFormats::Enum destFormat = inLoadedImage.format; if (inBsdfMipmaps) { - if (m_Context->GetRenderContextType() == render::NVRenderContextValues::GLES2) - destFormat = qt3ds::render::NVRenderTextureFormats::RGBA8; - else - destFormat = qt3ds::render::NVRenderTextureFormats::RGBA16F; + if (inLoadedImage.format != NVRenderTextureFormats::RGBE8) { + if (m_Context->GetRenderContextType() == render::NVRenderContextValues::GLES2) + destFormat = qt3ds::render::NVRenderTextureFormats::RGBA8; + else + destFormat = qt3ds::render::NVRenderTextureFormats::RGBA16F; + } } else { theTexture->SetTextureData( diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp index defff29..60e242d 100644 --- a/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp +++ b/src/runtimerender/resourcemanager/Qt3DSRenderLoadedTextureHDR.cpp @@ -145,18 +145,23 @@ static void decrunchScanline(FreeImageIO *io, fi_handle handle, RGBE *scanline, static void decodeScanlineToTexture(RGBE *scanline, int width, void *outBuf, QT3DSU32 offset, NVRenderTextureFormats::Enum inFormat) { - float rgbaF32[4]; - - for (int i = 0; i < width; ++i) { - rgbaF32[R] = convertComponent(scanline[i][E], scanline[i][R]); - rgbaF32[G] = convertComponent(scanline[i][E], scanline[i][G]); - rgbaF32[B] = convertComponent(scanline[i][E], scanline[i][B]); - rgbaF32[3] = 1.0f; - - QT3DSU8 *target = reinterpret_cast(outBuf); - target += offset; - NVRenderTextureFormats::encodeToPixel( - rgbaF32, target, i * NVRenderTextureFormats::getSizeofFormat(inFormat), inFormat); + + QT3DSU8 *target = reinterpret_cast(outBuf); + target += offset; + + if (inFormat == NVRenderTextureFormats::RGBE8) { + memcpy(target, scanline, size_t(4 * width)); + } else { + float rgbaF32[4]; + for (int i = 0; i < width; ++i) { + rgbaF32[R] = convertComponent(scanline[i][E], scanline[i][R]); + rgbaF32[G] = convertComponent(scanline[i][E], scanline[i][G]); + rgbaF32[B] = convertComponent(scanline[i][E], scanline[i][B]); + rgbaF32[3] = 1.0f; + + NVRenderTextureFormats::encodeToPixel( + rgbaF32, target, i * NVRenderTextureFormats::getSizeofFormat(inFormat), inFormat); + } } } @@ -242,14 +247,6 @@ SLoadedTexture *SLoadedTexture::LoadHDR(ISeekableIOStream &inStream, NVFoundatio { FreeImageIO theIO(inFnd.getAllocator(), inFnd); SLoadedTexture *retval = nullptr; - if (renderContextType == qt3ds::render::NVRenderContextValues::GLES2) - retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBA8); - else - retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBA16F); - - - // Let's just assume we don't support this just yet. - // if ( retval ) - // retval->FreeImagePostProcess( inFlipY ); + retval = DoLoadHDR(&theIO, &inStream, NVRenderTextureFormats::RGBE8); return retval; } diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp index 023964f..c9352dc 100644 --- a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp +++ b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.cpp @@ -38,6 +38,213 @@ using namespace qt3ds; using namespace qt3ds::render; using namespace qt3ds::foundation; + +struct M8E8 +{ + quint8 m; + quint8 e; + M8E8() : m(0), e(0){ + } + M8E8(const float val) { + float l2 = 1.f + floor(log2f(val)); + float mm = val / powf(2.f, l2); + m = quint8(mm * 255.f); + e = quint8(l2 + 128); + } + M8E8(const float val, quint8 exp) { + if (val <= 0) { + m = e = 0; + return; + } + float mm = val / powf(2.f, exp - 128); + m = quint8(mm * 255.f); + e = exp; + } +}; + +void NVRenderTextureFormats::decodeToFloat(void *inPtr, QT3DSU32 byteOfs, float *outPtr, + NVRenderTextureFormats::Enum inFmt) +{ + outPtr[0] = 0.0f; + outPtr[1] = 0.0f; + outPtr[2] = 0.0f; + outPtr[3] = 0.0f; + QT3DSU8 *src = reinterpret_cast(inPtr); + switch (inFmt) { + case Alpha8: + outPtr[0] = ((float)src[byteOfs]) / 255.0f; + break; + + case Luminance8: + case LuminanceAlpha8: + case R8: + case RG8: + case RGB8: + case RGBA8: + case SRGB8: + case SRGB8A8: + for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) { + float val = ((float)src[byteOfs + i]) / 255.0f; + outPtr[i] = (i < 3) ? powf(val, 0.4545454545f) : val; + } + break; + case RGBE8: + { + float pwd = powf(2.0f, int(src[byteOfs + 3]) - 128); + outPtr[0] = float(src[byteOfs + 0]) * pwd / 255.0; + outPtr[1] = float(src[byteOfs + 1]) * pwd / 255.0; + outPtr[2] = float(src[byteOfs + 2]) * pwd / 255.0; + outPtr[3] = 1.0f; + } break; + + case R32F: + outPtr[0] = reinterpret_cast(src + byteOfs)[0]; + break; + case RG32F: + outPtr[0] = reinterpret_cast(src + byteOfs)[0]; + outPtr[1] = reinterpret_cast(src + byteOfs)[1]; + break; + case RGBA32F: + outPtr[0] = reinterpret_cast(src + byteOfs)[0]; + outPtr[1] = reinterpret_cast(src + byteOfs)[1]; + outPtr[2] = reinterpret_cast(src + byteOfs)[2]; + outPtr[3] = reinterpret_cast(src + byteOfs)[3]; + break; + case RGB32F: + outPtr[0] = reinterpret_cast(src + byteOfs)[0]; + outPtr[1] = reinterpret_cast(src + byteOfs)[1]; + outPtr[2] = reinterpret_cast(src + byteOfs)[2]; + break; + + case R16F: + case RG16F: + case RGBA16F: + for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) { + // NOTE : This only works on the assumption that we don't have any denormals, + // Infs or NaNs. + // Every pixel in our source image should be "regular" + QT3DSU16 h = reinterpret_cast(src + byteOfs)[i]; + QT3DSU32 sign = (h & 0x8000) << 16; + QT3DSU32 exponent = (((((h & 0x7c00) >> 10) - 15) + 127) << 23); + QT3DSU32 mantissa = ((h & 0x3ff) << 13); + QT3DSU32 result = sign | exponent | mantissa; + + if (h == 0 || h == 0x8000) + result = 0; + qt3ds::intrinsics::memCopy(reinterpret_cast(outPtr) + i, &result, 4); + } + break; + + case R11G11B10: + // place holder + QT3DS_ASSERT(false); + break; + + default: + outPtr[0] = 0.0f; + outPtr[1] = 0.0f; + outPtr[2] = 0.0f; + outPtr[3] = 0.0f; + break; + } +} + +void NVRenderTextureFormats::encodeToPixel(float *inPtr, void *outPtr, QT3DSU32 byteOfs, + NVRenderTextureFormats::Enum inFmt) +{ + QT3DSU8 *dest = reinterpret_cast(outPtr); + switch (inFmt) { + case NVRenderTextureFormats::Alpha8: + dest[byteOfs] = QT3DSU8(inPtr[0] * 255.0f); + break; + + case Luminance8: + case LuminanceAlpha8: + case R8: + case RG8: + case RGB8: + case RGBA8: + case SRGB8: + case SRGB8A8: + for (QT3DSU32 i = 0; i < NVRenderTextureFormats::getSizeofFormat(inFmt); ++i) { + inPtr[i] = (inPtr[i] > 1.0f) ? 1.0f : inPtr[i]; + if (i < 3) + dest[byteOfs + i] = QT3DSU8(powf(inPtr[i], 2.2f) * 255.0f); + else + dest[byteOfs + i] = QT3DSU8(inPtr[i] * 255.0f); + } + break; + case RGBE8: + { + float max = qMax(inPtr[0], qMax(inPtr[1], inPtr[2])); + M8E8 ex(max); + M8E8 a(inPtr[0], ex.e); + M8E8 b(inPtr[1], ex.e); + M8E8 c(inPtr[2], ex.e); + quint8 *dst = reinterpret_cast(outPtr) + byteOfs; + dst[0] = a.m; + dst[1] = b.m; + dst[2] = c.m; + dst[3] = ex.e; + } break; + + case R32F: + reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; + break; + case RG32F: + reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; + reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; + break; + case RGBA32F: + reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; + reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; + reinterpret_cast(dest + byteOfs)[2] = inPtr[2]; + reinterpret_cast(dest + byteOfs)[3] = inPtr[3]; + break; + case RGB32F: + reinterpret_cast(dest + byteOfs)[0] = inPtr[0]; + reinterpret_cast(dest + byteOfs)[1] = inPtr[1]; + reinterpret_cast(dest + byteOfs)[2] = inPtr[2]; + break; + + case R16F: + case RG16F: + case RGBA16F: + for (QT3DSU32 i = 0; i < (NVRenderTextureFormats::getSizeofFormat(inFmt) >> 1); ++i) { + // NOTE : This also has the limitation of not handling infs, NaNs and + // denormals, but it should be sufficient for our purposes. + if (inPtr[i] > 65519.0f) + inPtr[i] = 65519.0f; + if (fabs(inPtr[i]) < 6.10352E-5f) + inPtr[i] = 0.0f; + QT3DSU32 f = reinterpret_cast(inPtr)[i]; + QT3DSU32 sign = (f & 0x80000000) >> 16; + QT3DSI32 exponent = (f & 0x7f800000) >> 23; + QT3DSU32 mantissa = (f >> 13) & 0x3ff; + exponent = exponent - 112; + if (exponent > 31) + exponent = 31; + if (exponent < 0) + exponent = 0; + exponent = exponent << 10; + reinterpret_cast(dest + byteOfs)[i] = QT3DSU16(sign | exponent | mantissa); + } + break; + + case R11G11B10: + // place holder + QT3DS_ASSERT(false); + break; + + default: + dest[byteOfs] = 0; + dest[byteOfs + 1] = 0; + dest[byteOfs + 2] = 0; + dest[byteOfs + 3] = 0; + break; + } +} + Qt3DSRenderPrefilterTexture::Qt3DSRenderPrefilterTexture(NVRenderContext *inNVRenderContext, QT3DSI32 inWidth, QT3DSI32 inHeight, NVRenderTexture2D &inTexture2D, @@ -66,7 +273,7 @@ Qt3DSRenderPrefilterTexture::Create(NVRenderContext *inNVRenderContext, QT3DSI32 NVRenderTextureFormats::Enum inDestFormat, qt3ds::NVFoundationBase &inFnd) { - Qt3DSRenderPrefilterTexture *theBSDFMipMap = NULL; + Qt3DSRenderPrefilterTexture *theBSDFMipMap = nullptr; if (inNVRenderContext->IsComputeSupported()) { theBSDFMipMap = QT3DS_NEW(inFnd.getAllocator(), Qt3DSRenderPrefilterTextureCompute)( @@ -128,14 +335,13 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel, int newHeight = height >> 1; newWidth = newWidth >= 1 ? newWidth : 1; newHeight = newHeight >= 1 ? newHeight : 1; + const QT3DSU32 size = NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format); if (inCurMipLevel.data) { retval = inCurMipLevel; - retval.dataSizeInBytes = - newWidth * newHeight * NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format); + retval.dataSizeInBytes = newWidth * newHeight * size; } else { - retval.dataSizeInBytes = - newWidth * newHeight * NVRenderTextureFormats::getSizeofFormat(inPrevMipLevel.format); + retval.dataSizeInBytes = newWidth * newHeight * size; retval.format = inPrevMipLevel.format; // inLoadedImage.format; retval.data = m_Foundation.getAllocator().allocate( retval.dataSizeInBytes, "Bsdf Scaled Image Data", __FILE__, __LINE__); @@ -155,26 +361,20 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel, getWrappedCoords(sampleX, sampleY, width, height); // Cauchy filter (this is simply because it's the easiest to evaluate, and - // requires no complex - // functions). + // requires no complex functions). float filterPdf = 1.f / (1.f + float(sx * sx + sy * sy) * 2.f); // With FP HDR formats, we're not worried about intensity loss so much as // unnecessary energy gain, // whereas with LDR formats, the fear with a continuous normalization factor is - // that we'd lose - // intensity and saturation as well. - filterPdf /= (NVRenderTextureFormats::getSizeofFormat(retval.format) >= 8) - ? 4.71238898f - : 4.5403446f; - // filterPdf /= 4.5403446f; // Discrete normalization factor - // filterPdf /= 4.71238898f; // Continuous normalization factor + // that we'd lose intensity and saturation as well. + filterPdf /= (size >= 8) ? 4.71238898f : 4.5403446f; + // filterPdf /= 4.5403446f; // Discrete normalization factor + // filterPdf /= 4.71238898f; // Continuous normalization factor float curPix[4]; - QT3DSI32 byteOffset = (sampleY * width + sampleX) - * NVRenderTextureFormats::getSizeofFormat(retval.format); + QT3DSI32 byteOffset = (sampleY * width + sampleX) * size; if (byteOffset < 0) { sampleY = height + sampleY; - byteOffset = (sampleY * width + sampleX) - * NVRenderTextureFormats::getSizeofFormat(retval.format); + byteOffset = (sampleY * width + sampleX) * size; } NVRenderTextureFormats::decodeToFloat(inPrevMipLevel.data, byteOffset, curPix, @@ -187,8 +387,7 @@ Qt3DSRenderPrefilterTextureCPU::CreateBsdfMipLevel(STextureData &inCurMipLevel, } } - QT3DSU32 newIdx = - (y * newWidth + x) * NVRenderTextureFormats::getSizeofFormat(retval.format); + QT3DSU32 newIdx = (y * newWidth + x) * size; NVRenderTextureFormats::encodeToPixel(accumVal, retval.data, newIdx, retval.format); } @@ -205,6 +404,7 @@ void Qt3DSRenderPrefilterTextureCPU::Build(void *inTextureData, QT3DSI32 inTextu m_SizeOfInternalFormat = NVRenderTextureFormats::getSizeofFormat(m_InternalFormat); m_InternalNoOfComponent = NVRenderTextureFormats::getNumberOfComponent(m_InternalFormat); + m_Texture2D.SetMaxLevel(m_MaxMipMapLevel); m_Texture2D.SetTextureData(NVDataRef((QT3DSU8 *)inTextureData, inTextureDataSize), 0, m_Width, m_Height, inFormat, m_DestinationFormat); @@ -333,7 +533,7 @@ static const char *computeUploadShader(std::string &prog, NVRenderTextureFormats return prog.c_str(); } -static const char *computeWorkShader(std::string &prog, bool binESContext) +static const char *computeWorkShader(std::string &prog, bool binESContext, bool rgbe) { if (binESContext) { prog += "#version 310 es\n" @@ -358,10 +558,32 @@ static const char *computeWorkShader(std::string &prog, bool binESContext) " sX = wrapMod( sX, width );\n" "}\n"; + if (rgbe) { + prog += "vec4 decodeRGBE(in vec4 rgbe)\n" + "{\n" + " float f = pow(2.0, 255.0 * rgbe.a - 128.0);\n" + " return vec4(rgbe.rgb * f, 1.0);\n" + "}\n"; + prog += "vec4 encodeRGBE(in vec4 rgba)\n" + "{\n" + " float maxMan = max(rgba.r, max(rgba.g, rgba.b));\n" + " float maxExp = 1.0 + floor(log2(maxMan));\n" + " return vec4(rgba.rgb / pow(2.0, maxExp), (maxExp + 128.0) / 255.0);\n" + "}\n"; + } + prog += "// Set workgroup layout;\n" - "layout (local_size_x = 16, local_size_y = 16) in;\n\n" + "layout (local_size_x = 16, local_size_y = 16) in;\n\n"; + if (rgbe) { + prog += + "layout (rgba8, binding = 1) readonly uniform image2D inputImage;\n\n" + "layout (rgba8, binding = 2) writeonly uniform image2D outputImage;\n\n"; + } else { + prog += "layout (rgba16f, binding = 1) readonly uniform image2D inputImage;\n\n" - "layout (rgba16f, binding = 2) writeonly uniform image2D outputImage;\n\n" + "layout (rgba16f, binding = 2) writeonly uniform image2D outputImage;\n\n"; + } + prog += "void main()\n" "{\n" " int prevWidth = int(gl_NumWorkGroups.x) << 1;\n" @@ -377,19 +599,31 @@ static const char *computeWorkShader(std::string &prog, bool binESContext) " int sampleX = sx + (int(gl_GlobalInvocationID.x) << 1);\n" " int sampleY = sy + (int(gl_GlobalInvocationID.y) << 1);\n" " getWrappedCoords(sampleX, sampleY, prevWidth, prevHeight);\n" - " if ((sampleY * prevWidth + sampleX) < 0 )\n" + " if ((sampleY * prevWidth + sampleX) < 0 )\n" " sampleY = prevHeight + sampleY;\n" " ivec2 pos = ivec2(sampleX, sampleY);\n" - " vec4 value = imageLoad(inputImage, pos);\n" - " float filterPdf = 1.0 / ( 1.0 + float(sx*sx + sy*sy)*2.0 );\n" + " vec4 value = imageLoad(inputImage, pos);\n"; + + if (rgbe) { + prog += + " value = decodeRGBE(value);\n"; + } + + prog += " float filterPdf = 1.0 / ( 1.0 + float(sx*sx + sy*sy)*2.0 );\n" " filterPdf /= 4.71238898;\n" " accumVal[0] += filterPdf * value.r;\n" - " accumVal[1] += filterPdf * value.g;\n" - " accumVal[2] += filterPdf * value.b;\n" - " accumVal[3] += filterPdf * value.a;\n" + " accumVal[1] += filterPdf * value.g;\n" + " accumVal[2] += filterPdf * value.b;\n" + " accumVal[3] += filterPdf * value.a;\n" " }\n" - " }\n" - " imageStore( outputImage, ivec2(gl_GlobalInvocationID.xy), accumVal );\n" + " }\n"; + + if (rgbe) { + prog += + " accumVal = encodeRGBE(accumVal);\n"; + } + + prog += " imageStore( outputImage, ivec2(gl_GlobalInvocationID.xy), accumVal );\n" "}\n"; return prog.c_str(); @@ -422,33 +656,46 @@ Qt3DSRenderPrefilterTextureCompute::Qt3DSRenderPrefilterTextureCompute( NVFoundationBase &inFnd) : Qt3DSRenderPrefilterTexture(inNVRenderContext, inWidth, inHeight, inTexture2D, inDestFormat, inFnd) - , m_BSDFProgram(NULL) - , m_UploadProgram_RGBA8(NULL) - , m_UploadProgram_RGB8(NULL) - , m_Level0Tex(NULL) + , m_BSDFProgram(nullptr) + , m_BSDF_RGBE_Program(nullptr) + , m_UploadProgram_RGBA8(nullptr) + , m_UploadProgram_RGB8(nullptr) + , m_Level0Tex(nullptr) , m_TextureCreated(false) { } Qt3DSRenderPrefilterTextureCompute::~Qt3DSRenderPrefilterTextureCompute() { - m_UploadProgram_RGB8 = NULL; - m_UploadProgram_RGBA8 = NULL; - m_BSDFProgram = NULL; - m_Level0Tex = NULL; + m_BSDF_RGBE_Program = nullptr; + m_UploadProgram_RGB8 = nullptr; + m_UploadProgram_RGBA8 = nullptr; + m_BSDFProgram = nullptr; + m_Level0Tex = nullptr; } -void Qt3DSRenderPrefilterTextureCompute::createComputeProgram(NVRenderContext *context) +NVRenderShaderProgram *Qt3DSRenderPrefilterTextureCompute::createComputeProgram( + NVRenderContext *context, NVRenderTextureFormats::Enum format) { std::string computeProg; - if (!m_BSDFProgram) { + if (!m_BSDFProgram && format != NVRenderTextureFormats::RGBE8) { m_BSDFProgram = context ->CompileComputeSource( "Compute BSDF mipmap shader", - toRef(computeWorkShader(computeProg, isGLESContext(context)))) + toRef(computeWorkShader(computeProg, isGLESContext(context), false))) + .mShader; + return m_BSDFProgram; + } + if (!m_BSDF_RGBE_Program && format == NVRenderTextureFormats::RGBE8) { + m_BSDF_RGBE_Program = context + ->CompileComputeSource( + "Compute BSDF RGBE mipmap shader", + toRef(computeWorkShader(computeProg, isGLESContext(context), true))) .mShader; + return m_BSDF_RGBE_Program; } + return nullptr; } NVRenderShaderProgram *Qt3DSRenderPrefilterTextureCompute::getOrCreateUploadComputeProgram( @@ -496,7 +743,7 @@ void Qt3DSRenderPrefilterTextureCompute::CreateLevel0Tex(void *inTextureData, QT theWidth = (m_Width * 3) / 4; } - if (m_Level0Tex == NULL) { + if (m_Level0Tex == nullptr) { m_Level0Tex = m_NVRenderContext->CreateTexture2D(); m_Level0Tex->SetTextureStorage(1, theWidth, m_Height, theFormat, theFormat, NVDataRef((QT3DSU8 *)inTextureData, inTextureDataSize)); @@ -510,6 +757,7 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT NVRenderTextureFormats::Enum inFormat) { bool needMipUpload = (inFormat != m_DestinationFormat); + NVRenderShaderProgram *program = nullptr; // re-upload data if (!m_TextureCreated) { m_Texture2D.SetTextureStorage( @@ -519,9 +767,9 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT m_Texture2D.addRef(); // create a compute shader (if not aloread done) which computes the BSDF mipmaps for this // texture - createComputeProgram(m_NVRenderContext); + program = createComputeProgram(m_NVRenderContext, inFormat); - if (!m_BSDFProgram) { + if (!program) { QT3DS_ASSERT(false); return; } @@ -575,19 +823,19 @@ void Qt3DSRenderPrefilterTextureCompute::Build(void *inTextureData, QT3DSI32 inT int width = m_Width >> 1; int height = m_Height >> 1; - m_NVRenderContext->SetActiveShader(m_BSDFProgram); + m_NVRenderContext->SetActiveShader(program); for (int i = 1; i <= m_MaxMipMapLevel; ++i) { theOutputImage->SetTextureLevel(i); NVRenderCachedShaderProperty theCachedOutputImage("outputImage", - *m_BSDFProgram); + *program); theCachedOutputImage.Set(theOutputImage); theInputImage->SetTextureLevel(i - 1); NVRenderCachedShaderProperty theCachedinputImage("inputImage", - *m_BSDFProgram); + *program); theCachedinputImage.Set(theInputImage); - m_NVRenderContext->DispatchCompute(m_BSDFProgram, width, height, 1); + m_NVRenderContext->DispatchCompute(program, width, height, 1); width = width > 2 ? width >> 1 : 1; height = height > 2 ? height >> 1 : 1; diff --git a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h index e633eb1..e646443 100644 --- a/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h +++ b/src/runtimerender/resourcemanager/Qt3DSRenderPrefilterTexture.h @@ -113,15 +113,16 @@ namespace render { NVRenderTextureFormats::Enum inFormat); NVScopedRefCounted m_BSDFProgram; + NVScopedRefCounted m_BSDF_RGBE_Program; NVScopedRefCounted m_UploadProgram_RGBA8; NVScopedRefCounted m_UploadProgram_RGB8; NVScopedRefCounted m_Level0Tex; bool m_TextureCreated; - void createComputeProgram(NVRenderContext *context); - NVRenderShaderProgram * - getOrCreateUploadComputeProgram(NVRenderContext *context, - NVRenderTextureFormats::Enum inFormat); + NVRenderShaderProgram *createComputeProgram( + NVRenderContext *context, NVRenderTextureFormats::Enum format); + NVRenderShaderProgram *getOrCreateUploadComputeProgram( + NVRenderContext *context, NVRenderTextureFormats::Enum inFormat); }; } } -- cgit v1.2.3