diff options
Diffstat (limited to 'src/Runtime/ogl-runtime/res/effectlib/Fxaa3_8.glsllib')
m--------- | src/Runtime/ogl-runtime | 0 | ||||
-rw-r--r-- | src/Runtime/ogl-runtime/res/effectlib/Fxaa3_8.glsllib | 1423 |
2 files changed, 0 insertions, 1423 deletions
diff --git a/src/Runtime/ogl-runtime b/src/Runtime/ogl-runtime new file mode 160000 +Subproject 2025912174c4cf99270b7439ec3b021e1d089ae diff --git a/src/Runtime/ogl-runtime/res/effectlib/Fxaa3_8.glsllib b/src/Runtime/ogl-runtime/res/effectlib/Fxaa3_8.glsllib deleted file mode 100644 index 620f5650..00000000 --- a/src/Runtime/ogl-runtime/res/effectlib/Fxaa3_8.glsllib +++ /dev/null @@ -1,1423 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2010 - 2014 NVIDIA Corporation. -** Copyright (C) 2017 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of Qt 3D Studio. -** -** $QT_BEGIN_LICENSE:GPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 or (at your option) any later version -** approved by the KDE Free Qt Foundation. The licenses are as published by -** the Free Software Foundation and appearing in the file LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - - -/*============================================================================ - - - NVIDIA FXAA III.8 by TIMOTHY LOTTES - - ------------------------------------------------------------------------------- - INTEGRATION CHECKLIST ------------------------------------------------------------------------------- -(1.) -In the shader source, -setup defines for the desired configuration. -Example, - - #define FXAA_PC 1 - #define FXAA_HLSL_3 1 - #define FXAA_LINEAR 1 - -(2.) -Then include this file, - - - -(3.) -Then call the FXAA pixel shader from within your desired shader, - - return FxaaPixelShader(pos, posPos, tex, rcpFrame, rcpFrameOpt); - -(4.) -Insure pass prior to FXAA outputs RGBL. -See next section. - -(5.) -Setup engine to provide "rcpFrame" and "rcpFrameOpt" constants. -Not using constants will result in a performance loss. - - // {x_} = 1.0/screenWidthInPixels - // {_y} = 1.0/screenHeightInPixels - float2 rcpFrame - - // This must be from a constant/uniform. - // {x___} = 2.0/screenWidthInPixels - // {_y__} = 2.0/screenHeightInPixels - // {__z_} = 0.5/screenWidthInPixels - // {___w} = 0.5/screenHeightInPixels - float4 rcpFrameOpt - -(6.) -Have FXAA vertex shader run as a full screen triangle, -and output "pos" and "posPos" such that inputs in the pixel shader provide, - - // {xy} = center of pixel - float2 pos, - - // {xy__} = upper left of pixel - // {__zw} = lower right of pixel - float4 posPos, - -(7.) -Insure the texture sampler used by FXAA is set to bilinear filtering. - - ------------------------------------------------------------------------------- - INTEGRATION - RGBL AND COLORSPACE ------------------------------------------------------------------------------- -FXAA3 requires RGBL as input. - -RGB should be LDR (low dynamic range). -Specifically do FXAA after tonemapping. - -RGB data as returned by a texture fetch can be linear or non-linear. -Note an "sRGB format" texture counts as linear, -because the result of a texture fetch is linear data. -Regular "RGBA8" textures in the sRGB colorspace are non-linear. -If a texture fetch results linear data the following is required, - - #define FXAA_LINEAR 1 - -Otherwise, - - #define FXAA_LINEAR 0 - -Luma must be stored in the alpha channel prior to running FXAA. -This luma value must be gamma 2.0 encoded if using FXAA_LINEAR 1. -If using FXAA_LINEAR 0, luma should match the perceptual space used for RGB. - -Example pass before FXAA where output is gamma 2.0 encoded, - - color.rgb = ToneMap(color.rgb); // linear color output - color.rgb = sqrt(color.rgb); // gamma 2.0 color output - return color; - -To use FXAA, - - color.rgb = ToneMap(color.rgb); // linear color output - color.rgb = sqrt(color.rgb); // gamma 2.0 color output - color.a = dot(color.rgb, float3(0.299, 0.587, 0.114)); // compute luma - return color; - -Another example where output is linear encoded, -say for instance writing to an sRGB formated render target, -where the render target does the conversion back to sRGB after blending, - - color.rgb = ToneMap(color.rgb); // linear color output - return color; - -To use FXAA, - - color.rgb = ToneMap(color.rgb); // linear color output - color.a = sqrt(dot(color.rgb, float3(0.299, 0.587, 0.114))); // compute luma - return color; - -Getting luma correct is required for the algorithm to work correctly. - - ------------------------------------------------------------------------------- - COMPLEX INTEGRATION ------------------------------------------------------------------------------- -Q. What if the engine is blending into RGB before wanting to run FXAA? - -A. In the last opaque pass prior to FXAA, - have the pass write out luma into alpha. - Then blend into RGB only. - FXAA should be able to run ok - assuming the blending pass did not any add aliasing. - This should be the common case for particles and common blending passes. - -============================================================================*/ - -/*============================================================================ - - INTEGRATION KNOBS - -============================================================================*/ -// -// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE). -// -// 1 = Use API. -// 0 = Don't use API. -// -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_PS3 - #define FXAA_PS3 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_360 - #define FXAA_360 0 -#endif -/*==========================================================================*/ -#ifndef FXAA_PC - // - // FXAA Quality - // The high quality PC algorithm. - // - #define FXAA_PC 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_PC_CONSOLE - // - // The console algorithm for PC is included - // for developers targeting really low spec machines. - // - #define FXAA_PC_CONSOLE 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_GLSL_120 - #define FXAA_GLSL_120 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_GLSL_130 - #define FXAA_GLSL_130 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_HLSL_3 - #define FXAA_HLSL_3 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_HLSL_4 - #define FXAA_HLSL_4 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_HLSL_5 - #define FXAA_HLSL_5 0 -#endif -/*==========================================================================*/ -#ifndef FXAA_EARLY_EXIT - // - // Controls algorithm's early exit path. - // On PS3 turning this on adds 2 cycles to the shader. - // On 360 turning this off adds 10ths of a millisecond to the shader. - // Turning this off on console will result in a more blurry image. - // So this defaults to on. - // - // 1 = On. - // 0 = Off. - // - #define FXAA_EARLY_EXIT 1 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_DISCARD - // - // Only valid for PC OpenGL currently. - // - // 1 = Use discard on pixels which don't need AA. - // For APIs which enable concurrent TEX+ROP from same surface. - // 0 = Return unchanged color on pixels which don't need AA. - // - #define FXAA_DISCARD 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_LINEAR - // - // 0 = Work in non-linear color space. - // Use this for standard 32-bit RGBA formats. - // - // 1 = Work in RGB=linear, A=non-linear luma. - // Use this for sRGB and FP16 formats. - // Works with either FXAA_ALGORITHM = 1 or 0. - // - #define FXAA_LINEAR 0 -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_FAST_PIXEL_OFFSET - // - // Used for GLSL 120 only. - // - // 1 = GL API supports fast pixel offsets - // 0 = do not use fast pixel offsets - // - #ifdef GL_EXT_gpu_shader4 - #define FXAA_FAST_PIXEL_OFFSET 1 - #endif - #ifdef GL_NV_gpu_shader5 - #define FXAA_FAST_PIXEL_OFFSET 1 - #endif - #ifdef GL_ARB_gpu_shader5 - #define FXAA_FAST_PIXEL_OFFSET 1 - #endif - #ifndef FXAA_FAST_PIXEL_OFFSET - #define FXAA_FAST_PIXEL_OFFSET 0 - #endif -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_GATHER4_ALPHA - // - // 1 = API supports gather4 on alpha channel. - // 0 = API does not support gather4 on alpha channel. - // -#ifdef GL_ES - #if __VERSION__ >= 310 - #ifdef GL_EXT_gpu_shader5 - #define FXAA_GATHER4_ALPHA 1 - #endif - #endif - #ifndef FXAA_GATHER4_ALPHA - #define FXAA_GATHER4_ALPHA 0 - #endif -#else - #if (FXAA_HLSL_5 == 1) - #define FXAA_GATHER4_ALPHA 1 - #endif - #ifdef GL_ARB_gpu_shader5 - #define FXAA_GATHER4_ALPHA 1 - #endif - #ifdef GL_NV_gpu_shader5 - #define FXAA_GATHER4_ALPHA 1 - #endif - #ifdef GL_EXT_gpu_shader5 - #define FXAA_GATHER4_ALPHA 1 - #endif - #ifndef FXAA_GATHER4_ALPHA - #define FXAA_GATHER4_ALPHA 0 - #endif -#endif -#endif - -/*============================================================================ - FXAA CONSOLE - TUNING KNOBS -============================================================================*/ -#ifndef FXAA_CONSOLE__EDGE_SHARPNESS - // - // Consoles the sharpness of edges. - // - // Due to the PS3 being ALU bound, - // there are only two safe values here: 4 and 8. - // These options use the shaders ability to a free *|/ by 4|8. - // - // 8.0 is sharper - // 4.0 is softer - // - #if 1 - #define FXAA_CONSOLE__EDGE_SHARPNESS 8.0 - #else - #define FXAA_CONSOLE__EDGE_SHARPNESS 4.0 - #endif -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_CONSOLE__EDGE_THRESHOLD - // - // The minimum amount of local contrast required to apply algorithm. - // The console setting has a different mapping than the quality setting. - // - // This only applies when FXAA_EARLY_EXIT is 1. - // - // Due to the PS3 being ALU bound, - // there are only two safe values here: 0.25 and 0.125. - // These options use the shaders ability to a free *|/ by 4|8. - // - // 0.125 leaves less aliasing, but is softer - // 0.25 leaves more aliasing, and is sharper - // - #if 1 - #define FXAA_CONSOLE__EDGE_THRESHOLD 0.125 - #else - #define FXAA_CONSOLE__EDGE_THRESHOLD 0.25 - #endif -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_CONSOLE__EDGE_THRESHOLD_MIN - // - // Trims the algorithm from processing darks. - // The console setting has a different mapping than the quality setting. - // - // This only applies when FXAA_EARLY_EXIT is 1. - // - // This does not apply to PS3. - // PS3 was simplified to avoid more shader instructions. - // - #define FXAA_CONSOLE__EDGE_THRESHOLD_MIN 0.05 -#endif - -/*============================================================================ - FXAA QUALITY - TUNING KNOBS -============================================================================*/ -#ifndef FXAA_QUALITY__EDGE_THRESHOLD - // - // The minimum amount of local contrast required to apply algorithm. - // - // 1/3 - too little - // 1/4 - low quality - // 1/6 - default - // 1/8 - high quality - // 1/16 - overkill - // - #define FXAA_QUALITY__EDGE_THRESHOLD (1.0/6.0) -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_QUALITY__EDGE_THRESHOLD_MIN - // - // Trims the algorithm from processing darks. - // - // 1/32 - visible limit - // 1/16 - high quality - // 1/12 - upper limit (default, the start of visible unfiltered edges) - // - #define FXAA_QUALITY__EDGE_THRESHOLD_MIN (1.0/12.0) -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_QUALITY__SUBPIX_CAP - // - // Insures fine detail is not completely removed. - // This partly overrides FXAA_SUBPIX_TRIM. - // - // 3/4 - default amount of filtering - // 7/8 - high amount of filtering - // 1 - no capping of filtering - // - #define FXAA_QUALITY__SUBPIX_CAP (3.0/4.0) -#endif -/*--------------------------------------------------------------------------*/ -#ifndef FXAA_QUALITY__SUBPIX_TRIM - // - // Controls removal of sub-pixel aliasing, - // - // 1/2 - low removal (sharper but more sub-pixel aliasing) - // 1/3 - medium removal - // 1/4 - default removal - // 1/8 - high removal - // 0 - complete removal (softer but less sub-pixel aliasing) - // - #define FXAA_QUALITY__SUBPIX_TRIM (1.0/4.0) -#endif - - -/*============================================================================ - - API PORTING - -============================================================================*/ -#if FXAA_GLSL_120 - // Requires, - // #version 120 - // And at least, - // #extension GL_EXT_gpu_shader4 : enable - // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9) - #define half float - #define half2 vec2 - #define half3 vec3 - #define half4 vec4 - #define int2 ivec2 - #define float2 vec2 - #define float3 vec3 - #define float4 vec4 - #define FxaaInt2 ivec2 - #define FxaaFloat2 vec2 - #define FxaaFloat3 vec3 - #define FxaaFloat4 vec4 - #define FxaaDiscard discard - #define FxaaDot3(a, b) dot(a, b) - #define FxaaSat(x) clamp(x, 0.0, 1.0) - #define FxaaLerp(x,y,s) mix(x,y,s) - #define FxaaTex sampler2D - #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0) - #if (FXAA_FAST_PIXEL_OFFSET == 1) - #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o) - #else - #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0) - #endif - #if (FXAA_GATHER4_ALPHA == 1) - // use #extension GL_ARB_gpu_shader5 : enable - #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3) - #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3) - #endif -#endif -/*--------------------------------------------------------------------------*/ -#if FXAA_GLSL_130 - // Requires "#version 130" or better - #define half float - #define half2 vec2 - #define half3 vec3 - #define half4 vec4 - #define int2 ivec2 - #define float2 vec2 - #define float3 vec3 - #define float4 vec4 - #define FxaaInt2 ivec2 - #define FxaaFloat2 vec2 - #define FxaaFloat3 vec3 - #define FxaaFloat4 vec4 - #define FxaaDiscard discard - #define FxaaDot3(a, b) dot(a, b) - #define FxaaSat(x) clamp(x, 0.0, 1.0) - #define FxaaLerp(x,y,s) mix(x,y,s) - #define FxaaTex sampler2D - #define FxaaTexTop(t, p) textureLod(t, p, 0.0) - #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o) - #if (FXAA_GATHER4_ALPHA == 1) - // use #extension GL_ARB_gpu_shader5 : enable - #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3) - #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3) - #endif -#endif -/*--------------------------------------------------------------------------*/ -#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) - #define int2 float2 - #define FxaaInt2 float2 - #define FxaaFloat2 float2 - #define FxaaFloat3 float3 - #define FxaaFloat4 float4 - #define FxaaDiscard clip(-1) - #define FxaaDot3(a, b) dot(a, b) - #define FxaaSat(x) saturate(x) - #define FxaaLerp(x,y,s) lerp(x,y,s) - #define FxaaTex sampler2D - #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0)) - #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0)) -#endif -/*--------------------------------------------------------------------------*/ -#if FXAA_HLSL_4 - #define FxaaInt2 int2 - #define FxaaFloat2 float2 - #define FxaaFloat3 float3 - #define FxaaFloat4 float4 - #define FxaaDiscard clip(-1) - #define FxaaDot3(a, b) dot(a, b) - #define FxaaSat(x) saturate(x) - #define FxaaLerp(x,y,s) lerp(x,y,s) - struct FxaaTex { SamplerState smpl; Texture2D tex; }; - #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) - #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) -#endif -/*--------------------------------------------------------------------------*/ -#if FXAA_HLSL_5 - #define FxaaInt2 int2 - #define FxaaFloat2 float2 - #define FxaaFloat3 float3 - #define FxaaFloat4 float4 - #define FxaaDiscard clip(-1) - #define FxaaDot3(a, b) dot(a, b) - #define FxaaSat(x) saturate(x) - #define FxaaLerp(x,y,s) lerp(x,y,s) - struct FxaaTex { SamplerState smpl; Texture2D tex; }; - #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) - #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) - #define FxaaTexAlpha4(t, p, r) t.tex.GatherAlpha(t.smpl, p) - #define FxaaTexOffAlpha4(t, p, o, r) t.tex.GatherAlpha(t.smpl, p, o) -#endif - - - -/*============================================================================ - - FXAA3 CONSOLE - 360 PIXEL SHADER - ------------------------------------------------------------------------------- -Might be some optimizations left here, -as of this latest change didn't have a PIX dump to verify if TEX bound. -============================================================================*/ -#if (FXAA_360 == 1) -/*--------------------------------------------------------------------------*/ -half4 FxaaPixelShader( - // {xy} = center of pixel - float2 pos, - // {xy__} = upper left of pixel - // {__zw} = lower right of pixel - float4 posPos, - // {rgb_} = color in linear or perceptual color space - // {___a} = alpha output is junk value - FxaaTex tex, - // This must be from a constant/uniform. - // {xy} = rcpFrame not used on PC version of FXAA Console - float2 rcpFrame, - // This must be from a constant/uniform. - // {x___} = 2.0/screenWidthInPixels - // {_y__} = 2.0/screenHeightInPixels - // {__z_} = 0.5/screenWidthInPixels - // {___w} = 0.5/screenHeightInPixels - float4 rcpFrameOpt -) { -/*--------------------------------------------------------------------------*/ - half4 lumaNwNeSwSe; - lumaNwNeSwSe.x = FxaaTexTop(tex, posPos.xy).w; - lumaNwNeSwSe.y = FxaaTexTop(tex, posPos.zy).w; - lumaNwNeSwSe.z = FxaaTexTop(tex, posPos.xw).w; - lumaNwNeSwSe.w = FxaaTexTop(tex, posPos.zw).w; -/*--------------------------------------------------------------------------*/ - half4 rgbyM = FxaaTexTop(tex, pos.xy); -/*--------------------------------------------------------------------------*/ - lumaNwNeSwSe.y += 1.0/384.0; -/*--------------------------------------------------------------------------*/ - half2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); - half2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); -/*--------------------------------------------------------------------------*/ - half lumaMin = min(lumaMinTemp.x, lumaMinTemp.y); - half lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y); -/*--------------------------------------------------------------------------*/ - half lumaMinM = min(lumaMin, rgbyM.w); - half lumaMaxM = max(lumaMax, rgbyM.w); - if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE__EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE__EDGE_THRESHOLD)) - #if (FXAA_DISCARD == 1) - FxaaDiscard; - #else - return rgbyM; - #endif -/*--------------------------------------------------------------------------*/ - half2 dir; - dir.x = dot(lumaNwNeSwSe, float4(-1.0, -1.0, 1.0, 1.0)); - dir.y = dot(lumaNwNeSwSe, float4( 1.0, -1.0, 1.0,-1.0)); -/*--------------------------------------------------------------------------*/ - half2 dir1; - dir1 = normalize(dir.xy); -/*--------------------------------------------------------------------------*/ - half dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * FXAA_CONSOLE__EDGE_SHARPNESS; - half2 dir2; - dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0); -/*--------------------------------------------------------------------------*/ - half4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * rcpFrameOpt.zw); - half4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * rcpFrameOpt.zw); - half4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * rcpFrameOpt.xy); - half4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * rcpFrameOpt.xy); -/*--------------------------------------------------------------------------*/ - half4 rgbyA = rgbyN1 * 0.5 + rgbyP1 * 0.5; - half4 rgbyB = rgbyN2 * 0.25 + rgbyP2 * 0.25 + rgbyA * 0.5; -/*--------------------------------------------------------------------------*/ - bool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax); - if(twoTap) rgbyB.xyz = rgbyA.xyz; - return rgbyB; -} -/*==========================================================================*/ -#endif - - - -/*============================================================================ - - FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT) - -============================================================================== -The code below does not exactly match the assembly. -I have a feeling that 12 cycles is possible, but was not able to get there. -Might have to increase register count to get full performance. -Note this shader does not use perspective interpolation. - -Use the following cgc options, - - --fenable-bx2 --fastmath --fastprecision --nofloatbindings - ------------------------------------------------------------------------------- - NVSHADERPERF OUTPUT ------------------------------------------------------------------------------- -For reference and to aid in debug, output of NVShaderPerf should match this, - -Shader to schedule: - 0: texpkb h0.w(TRUE), v5.zyxx, #0 - 2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x - 4: texpkb h0.w(TRUE), v5.xwxx, #0 - 6: addh h0.z(TRUE), -h2, h0.w - 7: texpkb h1.w(TRUE), v5, #0 - 9: addh h0.x(TRUE), h0.z, -h1.w - 10: addh h3.w(TRUE), h0.z, h1 - 11: texpkb h2.w(TRUE), v5.zwzz, #0 - 13: addh h0.z(TRUE), h3.w, -h2.w - 14: addh h0.x(TRUE), h2.w, h0 - 15: nrmh h1.xz(TRUE), h0_n - 16: minh_m8 h0.x(TRUE), |h1|, |h1.z| - 17: maxh h4.w(TRUE), h0, h1 - 18: divx h2.xy(TRUE), h1_n.xzzw, h0_n - 19: movr r1.zw(TRUE), v4.xxxy - 20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww - 22: minh h5.w(TRUE), h0, h1 - 23: texpkb h0(TRUE), r2.xzxx, #0 - 25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1 - 27: maxh h4.x(TRUE), h2.z, h2.w - 28: texpkb h1(TRUE), r0.zwzz, #0 - 30: addh_d2 h1(TRUE), h0, h1 - 31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz - 33: texpkb h0(TRUE), r0, #0 - 35: minh h4.z(TRUE), h2, h2.w - 36: fenct TRUE - 37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz - 39: texpkb h2(TRUE), r1, #0 - 41: addh_d2 h0(TRUE), h0, h2 - 42: maxh h2.w(TRUE), h4, h4.x - 43: minh h2.x(TRUE), h5.w, h4.z - 44: addh_d2 h0(TRUE), h0, h1 - 45: slth h2.x(TRUE), h0.w, h2 - 46: sgth h2.w(TRUE), h0, h2 - 47: movh h0(TRUE), h0 - 48: addx.c0 rc(TRUE), h2, h2.w - 49: movh h0(c0.NE.x), h1 - -IPU0 ------ Simplified schedule: -------- -Pass | Unit | uOp | PC: Op ------+--------+------+------------------------- - 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; - | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; - | SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-; - | | | - 2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; - | TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; - | SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-; - | | | - 3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0; - | TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0; - | SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---; - | SCB1 | add | 10: ADDh h3.w, h0.---z, h1; - | | | - 4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; - | TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; - | SCB0 | add | 14: ADDh h0.x, h2.w---, h0; - | SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-; - | | | - 5 | SCT1 | mov | 15: NRMh h1.xz, h0; - | SRB | nrm | 15: NRMh h1.xz, h0; - | SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|; - | SCB1 | max | 17: MAXh h4.w, h0, h1; - | | | - 6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0; - | SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy; - | SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-; - | SCB1 | min | 22: MINh h5.w, h0, h1; - | | | - 7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0; - | TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0; - | SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---; - | SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1; - | | | - 8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0; - | TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0; - | SCB0/1 | add | 30: ADDh/2 h1, h0, h1; - | | | - 9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--; - | SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0; - | TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0; - | SCB1 | min | 35: MINh h4.z, h2, h2.--w-; - | | | - 10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--; - | SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0; - | TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0; - | SCB0/1 | add | 41: ADDh/2 h0, h0, h2; - | | | - 11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---; - | SCT1 | max | 42: MAXh h2.w, h4, h4.---x; - | SCB0/1 | add | 44: ADDh/2 h0, h0, h1; - | | | - 12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2; - | SCT1 | set | 46: SGTh h2.w, h0, h2; - | SCB0/1 | mul | 47: MOVh h0, h0; - | | | - 13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---; - | SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1; - -Pass SCT TEX SCB - 1: 0% 100% 25% - 2: 0% 100% 25% - 3: 0% 100% 50% - 4: 0% 100% 50% - 5: 0% 0% 50% - 6: 100% 0% 75% - 7: 0% 100% 75% - 8: 0% 100% 100% - 9: 0% 100% 25% - 10: 0% 100% 100% - 11: 50% 0% 100% - 12: 50% 0% 100% - 13: 25% 0% 100% - -MEAN: 17% 61% 67% - -Pass SCT0 SCT1 TEX SCB0 SCB1 - 1: 0% 0% 100% 0% 100% - 2: 0% 0% 100% 0% 100% - 3: 0% 0% 100% 100% 100% - 4: 0% 0% 100% 100% 100% - 5: 0% 0% 0% 100% 100% - 6: 100% 100% 0% 100% 100% - 7: 0% 0% 100% 100% 100% - 8: 0% 0% 100% 100% 100% - 9: 0% 0% 100% 0% 100% - 10: 0% 0% 100% 100% 100% - 11: 100% 100% 0% 100% 100% - 12: 100% 100% 0% 100% 100% - 13: 100% 0% 0% 100% 100% - -MEAN: 30% 23% 61% 76% 100% -Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 -Results 13 cycles, 3 r regs, 923,076,923 pixels/s -============================================================================*/ -#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0) -/*--------------------------------------------------------------------------*/ -#pragma disablepc all -#pragma option O3 -#pragma option OutColorPrec=fp16 -#pragma texformat default RGBA8 -/*==========================================================================*/ -half4 FxaaPixelShader( - // {xy} = center of pixel - float2 pos, - // {xy__} = upper left of pixel - // {__zw} = lower right of pixel - float4 posPos, - // {rgb_} = color in linear or perceptual color space - // {___a} = luma in perceptual color space (not linear) - sampler2D tex, - // This must be from a constant/uniform. - // {xy} = rcpFrame not used on PS3 - float2 rcpFrame, - // This must be from a constant/uniform. - // {x___} = 2.0/screenWidthInPixels - // {_y__} = 2.0/screenHeightInPixels - // {__z_} = 0.5/screenWidthInPixels - // {___w} = 0.5/screenHeightInPixels - float4 rcpFrameOpt -) { -/*--------------------------------------------------------------------------*/ -// (1) - half4 dir; - half4 lumaNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0)); - lumaNe.w += half(1.0/512.0); - dir.x = -lumaNe.w; - dir.z = -lumaNe.w; -/*--------------------------------------------------------------------------*/ -// (2) - half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0)); - dir.x += lumaSw.w; - dir.z += lumaSw.w; -/*--------------------------------------------------------------------------*/ -// (3) - half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0)); - dir.x -= lumaNw.w; - dir.z += lumaNw.w; -/*--------------------------------------------------------------------------*/ -// (4) - half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0)); - dir.x += lumaSe.w; - dir.z -= lumaSe.w; -/*--------------------------------------------------------------------------*/ -// (5) - half4 dir1_pos; - dir1_pos.xy = normalize(dir.xyz).xz; - half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS); -/*--------------------------------------------------------------------------*/ -// (6) - half4 dir2_pos; - dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0)); - dir1_pos.zw = pos.xy; - dir2_pos.zw = pos.xy; - half4 temp1N; - temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw; -/*--------------------------------------------------------------------------*/ -// (7) - temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); - half4 rgby1; - rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw; -/*--------------------------------------------------------------------------*/ -// (8) - rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); - rgby1 = (temp1N + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ -// (9) - half4 temp2N; - temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy; - temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); -/*--------------------------------------------------------------------------*/ -// (10) - half4 rgby2; - rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy; - rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); - rgby2 = (temp2N + rgby2) * 0.5; -/*--------------------------------------------------------------------------*/ -// (11) - // compilier moves these scalar ops up to other cycles - half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w)); - half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w)); - rgby2 = (rgby2 + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ -// (12) - bool twoTapLt = rgby2.w < lumaMin; - bool twoTapGt = rgby2.w > lumaMax; -/*--------------------------------------------------------------------------*/ -// (13) - if(twoTapLt || twoTapGt) rgby2 = rgby1; -/*--------------------------------------------------------------------------*/ - return rgby2; } -/*==========================================================================*/ -#endif - - - -/*============================================================================ - - FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT) - -============================================================================== -The code mostly matches the assembly. -I have a feeling that 14 cycles is possible, but was not able to get there. -Might have to increase register count to get full performance. -Note this shader does not use perspective interpolation. - -Use the following cgc options, - - --fenable-bx2 --fastmath --fastprecision --nofloatbindings - ------------------------------------------------------------------------------- - NVSHADERPERF OUTPUT ------------------------------------------------------------------------------- -For reference and to aid in debug, output of NVShaderPerf should match this, - -Shader to schedule: - 0: texpkb h0.w(TRUE), v5.zyxx, #0 - 2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x - 4: texpkb h1.w(TRUE), v5.xwxx, #0 - 6: addh h0.x(TRUE), h1.w, -h2.y - 7: texpkb h2.w(TRUE), v5.zwzz, #0 - 9: minh h4.w(TRUE), h2.y, h2 - 10: maxh h5.x(TRUE), h2.y, h2.w - 11: texpkb h0.w(TRUE), v5, #0 - 13: addh h3.w(TRUE), -h0, h0.x - 14: addh h0.x(TRUE), h0.w, h0 - 15: addh h0.z(TRUE), -h2.w, h0.x - 16: addh h0.x(TRUE), h2.w, h3.w - 17: minh h5.y(TRUE), h0.w, h1.w - 18: nrmh h2.xz(TRUE), h0_n - 19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z| - 20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w - 21: movr r1.zw(TRUE), v4.xxxy - 22: maxh h2.w(TRUE), h0, h1 - 23: fenct TRUE - 24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz - 26: texpkb h0(TRUE), r0, #0 - 28: maxh h5.x(TRUE), h2.w, h5 - 29: minh h5.w(TRUE), h5.y, h4 - 30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz - 32: texpkb h2(TRUE), r1, #0 - 34: addh_d2 h2(TRUE), h0, h2 - 35: texpkb h1(TRUE), v4, #0 - 37: maxh h5.y(TRUE), h5.x, h1.w - 38: minh h4.w(TRUE), h1, h5 - 39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz - 41: texpkb h0(TRUE), r0, #0 - 43: addh_m8 h5.z(TRUE), h5.y, -h4.w - 44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz - 46: texpkb h3(TRUE), r2, #0 - 48: addh_d2 h0(TRUE), h0, h3 - 49: addh_d2 h3(TRUE), h0, h2 - 50: movh h0(TRUE), h3 - 51: slth h3.x(TRUE), h3.w, h5.w - 52: sgth h3.w(TRUE), h3, h5.x - 53: addx.c0 rc(TRUE), h3.x, h3 - 54: slth.c0 rc(TRUE), h5.z, h5 - 55: movh h0(c0.NE.w), h2 - 56: movh h0(c0.NE.x), h1 - -IPU0 ------ Simplified schedule: -------- -Pass | Unit | uOp | PC: Op ------+--------+------+------------------------- - 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; - | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; - | SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--; - | | | - 2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; - | TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; - | SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---; - | | | - 3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; - | TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; - | SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---; - | SCB1 | min | 9: MINh h4.w, h2.---y, h2; - | | | - 4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0; - | TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0; - | SCB0 | add | 14: ADDh h0.x, h0.w---, h0; - | SCB1 | add | 13: ADDh h3.w,-h0, h0.---x; - | | | - 5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---; - | SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-; - | SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--; - | | | - 6 | SCT1 | mov | 18: NRMh h2.xz, h0; - | SRB | nrm | 18: NRMh h2.xz, h0; - | SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|; - | | | - 7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--; - | SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy; - | SCB1 | max | 22: MAXh h2.w, h0, h1; - | | | - 8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--; - | SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0; - | TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0; - | SCB0 | max | 28: MAXh h5.x, h2.w---, h5; - | SCB1 | min | 29: MINh h5.w, h5.---y, h4; - | | | - 9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--; - | SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0; - | TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0; - | SCB0/1 | add | 34: ADDh/2 h2, h0, h2; - | | | - 10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0; - | TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0; - | SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--; - | SCB1 | min | 38: MINh h4.w, h1, h5; - | | | - 11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--; - | SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0; - | TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0; - | SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--; - | SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-; - | | | - 12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0; - | TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0; - | SCB0/1 | add | 48: ADDh/2 h0, h0, h3; - | | | - 13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2; - | SCB0/1 | mul | 50: MOVh h0, h3; - | | | - 14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---; - | SCT1 | set | 52: SGTh h3.w, h3, h5.---x; - | SCB0 | set | 54: SLThc0 rc, h5.z---, h5; - | SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3; - | | | - 15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2; - | SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1; - -Pass SCT TEX SCB - 1: 0% 100% 25% - 2: 0% 100% 25% - 3: 0% 100% 50% - 4: 0% 100% 50% - 5: 50% 0% 25% - 6: 0% 0% 25% - 7: 100% 0% 25% - 8: 0% 100% 50% - 9: 0% 100% 100% - 10: 0% 100% 50% - 11: 0% 100% 75% - 12: 0% 100% 100% - 13: 100% 0% 100% - 14: 50% 0% 50% - 15: 100% 0% 100% - -MEAN: 26% 60% 56% - -Pass SCT0 SCT1 TEX SCB0 SCB1 - 1: 0% 0% 100% 100% 0% - 2: 0% 0% 100% 100% 0% - 3: 0% 0% 100% 100% 100% - 4: 0% 0% 100% 100% 100% - 5: 100% 100% 0% 100% 0% - 6: 0% 0% 0% 0% 100% - 7: 100% 100% 0% 0% 100% - 8: 0% 0% 100% 100% 100% - 9: 0% 0% 100% 100% 100% - 10: 0% 0% 100% 100% 100% - 11: 0% 0% 100% 100% 100% - 12: 0% 0% 100% 100% 100% - 13: 100% 100% 0% 100% 100% - 14: 100% 100% 0% 100% 100% - 15: 100% 100% 0% 100% 100% - -MEAN: 33% 33% 60% 86% 80% -Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 -Results 15 cycles, 3 r regs, 800,000,000 pixels/s -============================================================================*/ -#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1) -/*--------------------------------------------------------------------------*/ -#pragma disablepc all -#pragma option O2 -#pragma option OutColorPrec=fp16 -#pragma texformat default RGBA8 -/*==========================================================================*/ -half4 FxaaPixelShader( - // {xy} = center of pixel - float2 pos, - // {xy__} = upper left of pixel - // {__zw} = lower right of pixel - float4 posPos, - // {rgb_} = color in linear or perceptual color space - // {___a} = luma in perceptual color space (not linear) - sampler2D tex, - // This must be from a constant/uniform. - // {xy} = rcpFrame not used on PS3 - float2 rcpFrame, - // This must be from a constant/uniform. - // {x___} = 2.0/screenWidthInPixels - // {_y__} = 2.0/screenHeightInPixels - // {__z_} = 0.5/screenWidthInPixels - // {___w} = 0.5/screenHeightInPixels - float4 rcpFrameOpt -) { -/*--------------------------------------------------------------------------*/ -// (1) - half4 rgbyNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0)); - half lumaNe = rgbyNe.w + half(1.0/512.0); -/*--------------------------------------------------------------------------*/ -// (2) - half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0)); - half lumaSwNegNe = lumaSw.w - lumaNe; -/*--------------------------------------------------------------------------*/ -// (3) - half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0)); - half lumaMaxNwSw = max(lumaNw.w, lumaSw.w); - half lumaMinNwSw = min(lumaNw.w, lumaSw.w); -/*--------------------------------------------------------------------------*/ -// (4) - half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0)); - half dirZ = lumaNw.w + lumaSwNegNe; - half dirX = -lumaNw.w + lumaSwNegNe; -/*--------------------------------------------------------------------------*/ -// (5) - half3 dir; - dir.y = 0.0; - dir.x = lumaSe.w + dirX; - dir.z = -lumaSe.w + dirZ; - half lumaMinNeSe = min(lumaNe, lumaSe.w); -/*--------------------------------------------------------------------------*/ -// (6) - half4 dir1_pos; - dir1_pos.xy = normalize(dir).xz; - half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS); -/*--------------------------------------------------------------------------*/ -// (7) - half4 dir2_pos; - dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0)); - dir1_pos.zw = pos.xy; - dir2_pos.zw = pos.xy; - half lumaMaxNeSe = max(lumaNe, lumaSe.w); -/*--------------------------------------------------------------------------*/ -// (8) - half4 temp1N; - temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw; - temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); - half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe); - half lumaMin = min(lumaMinNwSw, lumaMinNeSe); -/*--------------------------------------------------------------------------*/ -// (9) - half4 rgby1; - rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw; - rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); - rgby1 = (temp1N + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ -// (10) - half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0)); - half lumaMaxM = max(lumaMax, rgbyM.w); - half lumaMinM = min(lumaMin, rgbyM.w); -/*--------------------------------------------------------------------------*/ -// (11) - half4 temp2N; - temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy; - temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); - half4 rgby2; - rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy; - half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__EDGE_THRESHOLD; -/*--------------------------------------------------------------------------*/ -// (12) - rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); - rgby2 = (temp2N + rgby2) * 0.5; -/*--------------------------------------------------------------------------*/ -// (13) - rgby2 = (rgby2 + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ -// (14) - bool twoTapLt = rgby2.w < lumaMin; - bool twoTapGt = rgby2.w > lumaMax; - bool earlyExit = lumaRangeM < lumaMax; - bool twoTap = twoTapLt || twoTapGt; -/*--------------------------------------------------------------------------*/ -// (15) - if(twoTap) rgby2 = rgby1; - if(earlyExit) rgby2 = rgbyM; -/*--------------------------------------------------------------------------*/ - return rgby2; } -/*==========================================================================*/ -#endif - - - -/*============================================================================ - - FXAA3 CONSOLE - PC PIXEL SHADER - ------------------------------------------------------------------------------- -Using a modified version of the PS3 version here to best target old hardware. -============================================================================*/ -#if (FXAA_PC_CONSOLE == 1) -/*--------------------------------------------------------------------------*/ -half4 FxaaPixelShader( - // {xy} = center of pixel - float2 pos, - // {xy__} = upper left of pixel - // {__zw} = lower right of pixel - float4 posPos, - // {rgb_} = color in linear or perceptual color space - // {___a} = alpha output is junk value - FxaaTex tex, - // This must be from a constant/uniform. - // {xy} = rcpFrame not used on PC version of FXAA Console - float2 rcpFrame, - // This must be from a constant/uniform. - // {x___} = 2.0/screenWidthInPixels - // {_y__} = 2.0/screenHeightInPixels - // {__z_} = 0.5/screenWidthInPixels - // {___w} = 0.5/screenHeightInPixels - float4 rcpFrameOpt -) { -/*--------------------------------------------------------------------------*/ - half4 dir; - dir.y = 0.0; - half4 lumaNe = FxaaTexTop(tex, posPos.zy); - lumaNe.w += half(1.0/384.0); - dir.x = -lumaNe.w; - dir.z = -lumaNe.w; -/*--------------------------------------------------------------------------*/ - half4 lumaSw = FxaaTexTop(tex, posPos.xw); - dir.x += lumaSw.w; - dir.z += lumaSw.w; -/*--------------------------------------------------------------------------*/ - half4 lumaNw = FxaaTexTop(tex, posPos.xy); - dir.x -= lumaNw.w; - dir.z += lumaNw.w; -/*--------------------------------------------------------------------------*/ - half4 lumaSe = FxaaTexTop(tex, posPos.zw); - dir.x += lumaSe.w; - dir.z -= lumaSe.w; -/*==========================================================================*/ - #if (FXAA_EARLY_EXIT == 1) - half4 rgbyM = FxaaTexTop(tex, pos.xy); -/*--------------------------------------------------------------------------*/ - half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w)); - half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w)); -/*--------------------------------------------------------------------------*/ - half lumaMinM = min(lumaMin, rgbyM.w); - half lumaMaxM = max(lumaMax, rgbyM.w); -/*--------------------------------------------------------------------------*/ - if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE__EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE__EDGE_THRESHOLD)) - #if (FXAA_DISCARD == 1) - FxaaDiscard; - #else - return rgbyM; - #endif - #endif -/*==========================================================================*/ - half4 dir1_pos; - dir1_pos.xy = normalize(dir.xyz).xz; - half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS); -/*--------------------------------------------------------------------------*/ - half4 dir2_pos; - dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0)); - dir1_pos.zw = pos.xy; - dir2_pos.zw = pos.xy; - half4 temp1N; - temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw; -/*--------------------------------------------------------------------------*/ - temp1N = FxaaTexTop(tex, temp1N.xy); - half4 rgby1; - rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw; -/*--------------------------------------------------------------------------*/ - rgby1 = FxaaTexTop(tex, rgby1.xy); - rgby1 = (temp1N + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ - half4 temp2N; - temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy; - temp2N = FxaaTexTop(tex, temp2N.xy); -/*--------------------------------------------------------------------------*/ - half4 rgby2; - rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy; - rgby2 = FxaaTexTop(tex, rgby2.xy); - rgby2 = (temp2N + rgby2) * 0.5; -/*--------------------------------------------------------------------------*/ - #if (FXAA_EARLY_EXIT == 0) - half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w)); - half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w)); - #endif - rgby2 = (rgby2 + rgby1) * 0.5; -/*--------------------------------------------------------------------------*/ - bool twoTapLt = rgby2.w < lumaMin; - bool twoTapGt = rgby2.w > lumaMax; -/*--------------------------------------------------------------------------*/ - if(twoTapLt || twoTapGt) rgby2 = rgby1; -/*--------------------------------------------------------------------------*/ - return rgby2; } -/*==========================================================================*/ -#endif - - - -/*============================================================================ - - FXAA3 QUALITY - PC - -============================================================================*/ -#if (FXAA_PC == 1) -/*--------------------------------------------------------------------------*/ -float4 FxaaPixelShader( - // {xy} = center of pixel - float2 pos, - // {xyzw} = not used on FXAA3 Quality - float4 posPos, - // {rgb_} = color in linear or perceptual color space - // {___a} = luma in perceptual color space (not linear) - FxaaTex tex, - // This must be from a constant/uniform. - // {x_} = 1.0/screenWidthInPixels - // {_y} = 1.0/screenHeightInPixels - float2 rcpFrame, - // {xyzw} = not used on FXAA3 Quality - float4 rcpFrameOpt -) { -/*--------------------------------------------------------------------------*/ - #if (FXAA_GATHER4_ALPHA == 1) - float4 luma4A = FxaaTexOffAlpha4(tex, pos.xy, FxaaInt2(-1, -1), rcpFrame.xy); - #if (FXAA_DISCARD == 0) - float4 rgbyM = FxaaTexTop(tex, pos.xy); - #endif - float4 luma4B = FxaaTexAlpha4(tex, pos.xy, rcpFrame.xy); - float lumaNE = FxaaTexOff(tex, pos.xy, FxaaInt2(1, -1), rcpFrame.xy).w; - float lumaSW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1, 1), rcpFrame.xy).w; - float lumaNW = luma4A.w; - float lumaN = luma4A.z; - float lumaW = luma4A.x; - float lumaM = luma4A.y; - float lumaE = luma4B.z; - float lumaS = luma4B.x; - float lumaSE = luma4B.y; - #else - float lumaN = FxaaTexOff(tex, pos.xy, FxaaInt2(0, -1), rcpFrame.xy).w; - float lumaW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1, 0), rcpFrame.xy).w; - float4 rgbyM = FxaaTexTop(tex, pos.xy); - float lumaE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1, 0), rcpFrame.xy).w; - float lumaS = FxaaTexOff(tex, pos.xy, FxaaInt2( 0, 1), rcpFrame.xy).w; - float lumaM = rgbyM.w; - #endif -/*--------------------------------------------------------------------------*/ - float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE))); - float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE))); - float range = rangeMax - rangeMin; -/*--------------------------------------------------------------------------*/ - if(range < max(FXAA_QUALITY__EDGE_THRESHOLD_MIN, rangeMax * FXAA_QUALITY__EDGE_THRESHOLD)) - #if (FXAA_DISCARD == 1) - FxaaDiscard; - #else - return rgbyM; - #endif -/*--------------------------------------------------------------------------*/ - #if (FXAA_GATHER4_ALPHA == 0) - float lumaNW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1,-1), rcpFrame.xy).w; - float lumaNE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1,-1), rcpFrame.xy).w; - float lumaSW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1, 1), rcpFrame.xy).w; - float lumaSE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1, 1), rcpFrame.xy).w; - #endif -/*--------------------------------------------------------------------------*/ - #define FXAA_QUALITY__SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_QUALITY__SUBPIX_TRIM)) -/*--------------------------------------------------------------------------*/ - float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25; - float rangeL = abs(lumaL - lumaM); - float blendL = FxaaSat((rangeL / range) - FXAA_QUALITY__SUBPIX_TRIM) * FXAA_QUALITY__SUBPIX_TRIM_SCALE; - blendL = min(FXAA_QUALITY__SUBPIX_CAP, blendL); -/*--------------------------------------------------------------------------*/ - float edgeVert = - abs(lumaNW + (-2.0 * lumaN) + lumaNE) + - 2.0 * abs(lumaW + (-2.0 * lumaM) + lumaE ) + - abs(lumaSW + (-2.0 * lumaS) + lumaSE); - float edgeHorz = - abs(lumaNW + (-2.0 * lumaW) + lumaSW) + - 2.0 * abs(lumaN + (-2.0 * lumaM) + lumaS ) + - abs(lumaNE + (-2.0 * lumaE) + lumaSE); - bool horzSpan = edgeHorz >= edgeVert; -/*--------------------------------------------------------------------------*/ - float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x; - if(!horzSpan) lumaN = lumaW; - if(!horzSpan) lumaS = lumaE; - float gradientN = abs(lumaN - lumaM); - float gradientS = abs(lumaS - lumaM); - lumaN = (lumaN + lumaM) * 0.5; - lumaS = (lumaS + lumaM) * 0.5; -/*--------------------------------------------------------------------------*/ - bool pairN = gradientN >= gradientS; - if(!pairN) lumaN = lumaS; - if(!pairN) gradientN = gradientS; - if(!pairN) lengthSign *= -1.0; - float2 posN; - posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5); - posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0); -/*--------------------------------------------------------------------------*/ - #define FXAA_SEARCH_STEPS 6 - #define FXAA_SEARCH_THRESHOLD (1.0/4.0) -/*--------------------------------------------------------------------------*/ - gradientN *= FXAA_SEARCH_THRESHOLD; -/*--------------------------------------------------------------------------*/ - float2 posP = posN; - float2 offNP = horzSpan ? - FxaaFloat2(rcpFrame.x, 0.0) : - FxaaFloat2(0.0, rcpFrame.y); - float lumaEndN; - float lumaEndP; - bool doneN = false; - bool doneP = false; - posN += offNP * (-1.5); - posP += offNP * ( 1.5); - for(int i = 0; i < FXAA_SEARCH_STEPS; i++) { - lumaEndN = FxaaTexTop(tex, posN.xy).w; - lumaEndP = FxaaTexTop(tex, posP.xy).w; - bool doneN2 = abs(lumaEndN - lumaN) >= gradientN; - bool doneP2 = abs(lumaEndP - lumaN) >= gradientN; - if(doneN2 && !doneN) posN += offNP; - if(doneP2 && !doneP) posP -= offNP; - if(doneN2 && doneP2) break; - doneN = doneN2; - doneP = doneP2; - if(!doneN) posN -= offNP * 2.0; - if(!doneP) posP += offNP * 2.0; } -/*--------------------------------------------------------------------------*/ - float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y; - float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y; -/*--------------------------------------------------------------------------*/ - bool directionN = dstN < dstP; - lumaEndN = directionN ? lumaEndN : lumaEndP; -/*--------------------------------------------------------------------------*/ - if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0)) - lengthSign = 0.0; -/*--------------------------------------------------------------------------*/ - float spanLength = (dstP + dstN); - dstN = directionN ? dstN : dstP; - float subPixelOffset = 0.5 + (dstN * (-1.0/spanLength)); - subPixelOffset += blendL * (1.0/8.0); - subPixelOffset *= lengthSign; - float3 rgbF = FxaaTexTop(tex, FxaaFloat2( - pos.x + (horzSpan ? 0.0 : subPixelOffset), - pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz; -/*--------------------------------------------------------------------------*/ - #if (FXAA_LINEAR == 1) - lumaL *= lumaL; - #endif - float lumaF = dot(rgbF, float3(0.299, 0.587, 0.114)) + (1.0/(65536.0*256.0)); - float lumaB = FxaaLerp(lumaF, lumaL, blendL); - float scale = min(4.0, lumaB/lumaF); - rgbF *= scale; - return float4(rgbF, lumaM); } -/*==========================================================================*/ -#endif - - - |