1301 lines
40 KiB
HLSL
1301 lines
40 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
PostProcessAmbientOcclusionMobile.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "ScreenPass.ush"
|
|
#include "PostProcessCommon.ush"
|
|
|
|
// -----------------------------------------------------------------------------------------------------------------------------
|
|
// SHADER_QUALITY 0-4
|
|
#if SHADER_QUALITY == 0
|
|
// very low
|
|
#define USE_SAMPLESET 1
|
|
#define SAMPLE_STEPS 1
|
|
#define QUAD_MESSAGE_PASSING_BLUR 0
|
|
#elif SHADER_QUALITY == 1
|
|
// low
|
|
#define USE_SAMPLESET 1
|
|
#define SAMPLE_STEPS 1
|
|
#define QUAD_MESSAGE_PASSING_BLUR 2
|
|
#elif SHADER_QUALITY == 2
|
|
// medium
|
|
#define USE_SAMPLESET 2
|
|
#define SAMPLE_STEPS 1
|
|
#define QUAD_MESSAGE_PASSING_BLUR 2
|
|
#elif SHADER_QUALITY == 3
|
|
// high
|
|
#define USE_SAMPLESET 1
|
|
#define SAMPLE_STEPS 3
|
|
#define QUAD_MESSAGE_PASSING_BLUR 0
|
|
#else // SHADER_QUALITY == 4
|
|
// very high
|
|
#define USE_SAMPLESET 3
|
|
#define SAMPLE_STEPS 3
|
|
#define QUAD_MESSAGE_PASSING_BLUR 0
|
|
#endif
|
|
|
|
#if QUAD_MESSAGE_PASSING_BLUR == 0
|
|
#define QUAD_MESSAGE_PASSING_NORMAL 0
|
|
#define QUAD_MESSAGE_PASSING_DEPTH 0
|
|
#elif QUAD_MESSAGE_PASSING_BLUR == 1
|
|
#define QUAD_MESSAGE_PASSING_NORMAL 0
|
|
#define QUAD_MESSAGE_PASSING_DEPTH 0
|
|
#elif QUAD_MESSAGE_PASSING_BLUR == 2
|
|
#define QUAD_MESSAGE_PASSING_NORMAL 1
|
|
#define QUAD_MESSAGE_PASSING_DEPTH 0
|
|
#elif QUAD_MESSAGE_PASSING_BLUR == 3
|
|
#define QUAD_MESSAGE_PASSING_NORMAL 1
|
|
#define QUAD_MESSAGE_PASSING_DEPTH 1
|
|
#endif
|
|
|
|
// 0:4 samples, 1:9 samples (only really noticable with dither usage ??)
|
|
//#define AO_UPSAMPLE_QUALITY
|
|
|
|
// full resolution is expensive, do lower quality
|
|
#define AO_SAMPLE_QUALITY 3
|
|
#define AO_UPSAMPLE_QUALITY 0
|
|
|
|
// 0: 1 point (for testing)
|
|
// 1: 3 points
|
|
// 2: more evenly spread (5 points - slightly faster, stronger effect, better with multiple levels?)
|
|
// 3: near the surface very large, softly fading out (6 points)
|
|
#if USE_SAMPLESET == 0
|
|
#define SAMPLESET_ARRAY_SIZE 1
|
|
static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
|
|
{
|
|
// one sample, for testing
|
|
float2(0.500, 0.500),
|
|
};
|
|
#elif USE_SAMPLESET == 1
|
|
#define SAMPLESET_ARRAY_SIZE 3
|
|
static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
|
|
{
|
|
// 3 points distributed on the unit disc, spiral order and distance
|
|
float2(0, -1.0f) * 0.43f,
|
|
float2(0.58f, 0.814f) * 0.7f,
|
|
float2(-0.58f, 0.814f)
|
|
};
|
|
#elif USE_SAMPLESET == 2
|
|
#define SAMPLESET_ARRAY_SIZE 5
|
|
static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
|
|
{
|
|
// 5 points distributed on a ring
|
|
float2(0.156434, 0.987688),
|
|
float2(0.987688, 0.156434)*0.9,
|
|
float2(0.453990, -0.891007)*0.8,
|
|
float2(-0.707107, -0.707107)*0.7,
|
|
float2(-0.891006, 0.453991)*0.65,
|
|
};
|
|
#else // USE_SAMPLESET == 3
|
|
#define SAMPLESET_ARRAY_SIZE 6
|
|
static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
|
|
{
|
|
// 6 points distributed on the unit disc, spiral order and distance
|
|
float2(0.000, 0.200),
|
|
float2(0.325, 0.101),
|
|
float2(0.272, -0.396),
|
|
float2(-0.385, -0.488),
|
|
float2(-0.711, 0.274),
|
|
float2(0.060, 0.900)
|
|
};
|
|
#endif // USE_SAMPLESET
|
|
|
|
|
|
// 0: classic with weighted sample, 1: don't normalize and adjust the formula to be simpler and faster - can look better and is cheaper (Alchemy like?)
|
|
#define OPTIMIZATION_O1 1
|
|
|
|
// 0:off / 1:show samples on the right side of the screen
|
|
#define DEBUG_LOOKUPS 0
|
|
|
|
// 0:off / 1:take into account scene normals in the computations
|
|
#define USE_NORMALS 1
|
|
|
|
// -----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
#include "PostProcessAmbientOcclusionCommon.ush"
|
|
|
|
// -----------------------------------------------------------------------------------------------------------------------------
|
|
// GTAO
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER || SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER || SPATIALFILTER_PIXEL_SHADER
|
|
|
|
float4 FadeRadiusMulAdd_FadeDistance_AttenFactor;
|
|
float4 WorldRadiusAdj_SinDeltaAngle_CosDeltaAngle_Thickness;
|
|
|
|
float4 Power_Intensity_ScreenPixelsToSearch;
|
|
float4 DepthBufferSizeAndInvSize;
|
|
float4 BufferSizeAndInvSize;
|
|
float4 ViewSizeAndInvSize;
|
|
float4 ViewRectMin;
|
|
|
|
half2 TexturePosToBufferUV(int2 TexturePos)
|
|
{
|
|
#if HORIZONSEARCH_INTEGRAL_PIXEL_SHADER || SPATIALFILTER_PIXEL_SHADER
|
|
half2 BufferUV = (TexturePos + half2(0.5f, 0.5f)) * BufferSizeAndInvSize.zw;
|
|
#else
|
|
half2 BufferUV = (TexturePos + half2(0.5f, 0.5f) + ViewRectMin.xy) * BufferSizeAndInvSize.zw;
|
|
#endif
|
|
|
|
return BufferUV;
|
|
}
|
|
|
|
half4 EncodeFloatRGBA(half v)
|
|
{
|
|
float4 enc = float4(1.0, 255.0, 65025.0, 16581375.0) * v;
|
|
half4 encValue = frac(enc);
|
|
encValue -= encValue.yzww * half4(0.0039215686275f, 0.0039215686275f, 0.0039215686275f, 0.0f);
|
|
return encValue;
|
|
}
|
|
|
|
half DecodeFloatRGBA(half4 rgba)
|
|
{
|
|
return dot(rgba, half4(1.0, 0.0039215686275f, 1.53787e-5f, 6.03086294e-8f));
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER || SPATIALFILTER_COMPUTE_SHADER
|
|
|
|
const static int MAX_THREAD_GROUP_SIZE = THREADGROUP_SIZEX * THREADGROUP_SIZEY;
|
|
#define ARRAY_SIZE (MAX_THREAD_GROUP_SIZE * 2) //THREADGROUP_SIZEX and THREADGROUP_SIZEY should be as larger as possible and at least 16
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER
|
|
const static int2 DEPTH_GROUP_THREAD_OFFSET = int2(3, 3);
|
|
const static int DEPTH_THREADPOS_OFFSET = THREADGROUP_SIZEX + 6; // Spatial Filter pass needs extra 3 and HorizonSearchIntegrate pass needs extra 2
|
|
const static int MAX_DEPTH_THREADS = (THREADGROUP_SIZEX + 6) * (THREADGROUP_SIZEY + 6);
|
|
#elif HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER
|
|
const static int2 DEPTH_GROUP_THREAD_OFFSET = int2(1, 1);
|
|
const static int DEPTH_THREADPOS_OFFSET = THREADGROUP_SIZEX + 2;
|
|
const static int MAX_DEPTH_THREADS = (THREADGROUP_SIZEX + 2) * (THREADGROUP_SIZEY + 2);
|
|
#elif SPATIALFILTER_COMPUTE_SHADER
|
|
const static int2 DEPTH_GROUP_THREAD_OFFSET = int2(2, 2);
|
|
const static int DEPTH_THREADPOS_OFFSET = THREADGROUP_SIZEX + 4;
|
|
const static int MAX_DEPTH_THREADS = (THREADGROUP_SIZEX + 4) * (THREADGROUP_SIZEY + 4);
|
|
#endif
|
|
|
|
groupshared half DeviceZArray[ARRAY_SIZE];
|
|
|
|
void SetZVal(half DeviceZ, int Index)
|
|
{
|
|
DeviceZArray[Index] = DeviceZ;
|
|
}
|
|
|
|
half GetDeviceZFromSharedMemory(int2 ThreadPos)
|
|
{
|
|
return DeviceZArray[ThreadPos.x + (ThreadPos.y * DEPTH_THREADPOS_OFFSET)];
|
|
}
|
|
|
|
float GetSceneDepthFromSharedMemory(int2 ThreadPos)
|
|
{
|
|
return ConvertFromDeviceZ(GetDeviceZFromSharedMemory(ThreadPos));
|
|
}
|
|
|
|
const static int2 AO_GROUP_THREAD_OFFSET = int2(2, 2);
|
|
const static int AO_THREADPOS_OFFSET = THREADGROUP_SIZEX + 4;
|
|
const static int MAX_AO_THREADS = (THREADGROUP_SIZEX + 4) * (THREADGROUP_SIZEY + 4);
|
|
|
|
groupshared half AOArray[ARRAY_SIZE];
|
|
|
|
void SetAOVal(half AO, int Index)
|
|
{
|
|
AOArray[Index] = AO;
|
|
}
|
|
|
|
half GetAOValueFromSharedMemory(int2 ThreadPos)
|
|
{
|
|
ThreadPos += AO_GROUP_THREAD_OFFSET;
|
|
return AOArray[ThreadPos.x + (ThreadPos.y * AO_THREADPOS_OFFSET)];
|
|
}
|
|
|
|
RWTexture2D<half4> OutTexture;
|
|
|
|
#endif
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER || SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER || SPATIALFILTER_PIXEL_SHADER
|
|
|
|
#if SHADER_QUALITY == 0
|
|
// very low
|
|
#define GTAO_NUMTAPS 4
|
|
#define GTAO_BIASMIPLEVEL 2
|
|
#define GTAO_MAX_PIXEL_SCREEN_RADIUS 256.0f
|
|
#elif SHADER_QUALITY == 1
|
|
// low
|
|
#define GTAO_NUMTAPS 6
|
|
#define GTAO_BIASMIPLEVEL 1
|
|
#define GTAO_MAX_PIXEL_SCREEN_RADIUS 256.0f
|
|
#elif SHADER_QUALITY == 2
|
|
// medium
|
|
#define GTAO_NUMTAPS 8
|
|
#define GTAO_BIASMIPLEVEL 0
|
|
#define GTAO_MAX_PIXEL_SCREEN_RADIUS 256.0f
|
|
#elif SHADER_QUALITY == 3
|
|
// high
|
|
#define GTAO_NUMTAPS 12
|
|
#define GTAO_BIASMIPLEVEL 0
|
|
#define GTAO_MAX_PIXEL_SCREEN_RADIUS 256.0f
|
|
#else // SHADER_QUALITY == 4
|
|
// very high
|
|
#define GTAO_NUMTAPS 20
|
|
#define GTAO_BIASMIPLEVEL 0
|
|
#define GTAO_MAX_PIXEL_SCREEN_RADIUS 256.0f
|
|
#endif
|
|
|
|
const static half PI_HALF = (PI*0.5);
|
|
const static half LUTSize = 16;
|
|
|
|
Texture2D AOInputTexture;
|
|
SamplerState AOInputSampler;
|
|
Texture2D SceneDepthTexture;
|
|
SamplerState SceneDepthSampler;
|
|
Texture2D NormalTexture;
|
|
SamplerState NormalSampler;
|
|
|
|
#if PREINTEGRATED_LUT_TYPE == 2
|
|
Texture3D GTAOPreIntegrated3D;
|
|
#elif PREINTEGRATED_LUT_TYPE == 1
|
|
Texture2D GTAOPreIntegrated2D;
|
|
#endif
|
|
|
|
#if PREINTEGRATED_LUT_TYPE != 0
|
|
SamplerState GTAOPreIntegratedSampler;
|
|
#endif
|
|
|
|
half InterleavedGradientNoise(int2 TexturePos)
|
|
{
|
|
return frac(52.9829189f * frac((TexturePos.x * 0.06711056f) + (TexturePos.y * 0.00583715f)));
|
|
}
|
|
|
|
half3 GetRandomVector(int2 TexturePos)
|
|
{
|
|
TexturePos.y = 16384 - TexturePos.y;
|
|
|
|
half3 RandomVec = half3(0, 0, 0);
|
|
half3 RandomTexVec = half3(0, 0, 0);
|
|
half ScaleOffset;
|
|
|
|
const half TemporalCos = 0.8660253882f;
|
|
const half TemporalSin = 0.50f;
|
|
|
|
half GradientNoise = InterleavedGradientNoise(TexturePos);
|
|
|
|
RandomTexVec.x = cos((GradientNoise*PI));
|
|
RandomTexVec.y = sin((GradientNoise*PI));
|
|
|
|
ScaleOffset = (1.0 / 4.0) * ((TexturePos.y - TexturePos.x) & 3);
|
|
// ScaleOffset = (1.0/5.0) * (( TexturePos.y - TexturePos.x) % 5);
|
|
|
|
RandomVec.x = dot(RandomTexVec.xy, half2(TemporalCos, -TemporalSin));
|
|
RandomVec.y = dot(RandomTexVec.xy, half2(TemporalSin, TemporalCos));
|
|
RandomVec.z = frac(ScaleOffset + 0.025f);
|
|
|
|
return RandomVec;
|
|
}
|
|
|
|
half GetDeviceZFromAOInput(half2 TextureUV)
|
|
{
|
|
return Texture2DSample(SceneDepthTexture, SceneDepthSampler, TextureUV).r;
|
|
}
|
|
|
|
float GetSceneDepthFromAOInput(half2 TextureUV)
|
|
{
|
|
return ConvertFromDeviceZ(GetDeviceZFromAOInput(TextureUV));
|
|
}
|
|
|
|
float3 GetViewSpacePosFromAOInput(half2 UV)
|
|
{
|
|
float SceneDepth = GetSceneDepthFromAOInput(UV);
|
|
|
|
return ScreenToViewPos(UV, SceneDepth);
|
|
}
|
|
|
|
half3 GetNormal(half2 UV, int2 ThreadPos, float3 ViewSpacePosMid)
|
|
{
|
|
half3 ViewSpaceNormal;
|
|
|
|
#if USE_NORMALBUFFER
|
|
|
|
// Get the normal from the normal buffer
|
|
half3 WorldNormal = Texture2DSample(NormalTexture, NormalSampler, UV).xyz;
|
|
ViewSpaceNormal = normalize(mul(WorldNormal, (half3x3)ResolvedView.TranslatedWorldToView));
|
|
|
|
#else
|
|
// Get the normal derived from the depth buffer
|
|
float2 XOffset = float2(BufferSizeAndInvSize.z, 0.0f);
|
|
float2 YOffset = float2(0.0f, BufferSizeAndInvSize.w);
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_PIXEL_SHADER || SPATIALFILTER_PIXEL_SHADER
|
|
float DeviceZ = GetDeviceZFromAOInput(UV);
|
|
float DeviceZLeft = GetDeviceZFromAOInput(UV - XOffset);
|
|
float DeviceZTop = GetDeviceZFromAOInput(UV - YOffset);
|
|
float DeviceZRight = GetDeviceZFromAOInput(UV + XOffset);
|
|
float DeviceZBottom = GetDeviceZFromAOInput(UV + YOffset);
|
|
#else
|
|
int2 iXOffset = int2(1, 0);
|
|
int2 iYOffset = int2(0, 1);
|
|
int2 ThreadOffsetPos = ThreadPos + DEPTH_GROUP_THREAD_OFFSET;
|
|
float DeviceZ = GetDeviceZFromSharedMemory(ThreadOffsetPos);
|
|
float DeviceZLeft = GetDeviceZFromSharedMemory(ThreadOffsetPos - iXOffset);
|
|
float DeviceZTop = GetDeviceZFromSharedMemory(ThreadOffsetPos - iYOffset);
|
|
float DeviceZRight = GetDeviceZFromSharedMemory(ThreadOffsetPos + iXOffset);
|
|
float DeviceZBottom = GetDeviceZFromSharedMemory(ThreadOffsetPos + iYOffset);
|
|
#endif
|
|
|
|
float DeviceZDdx = TakeSmallerAbsDelta(DeviceZLeft, DeviceZ, DeviceZRight);
|
|
float DeviceZDdy = TakeSmallerAbsDelta(DeviceZTop, DeviceZ, DeviceZBottom);
|
|
|
|
float ZRight = ConvertFromDeviceZ(DeviceZ + DeviceZDdx);
|
|
float ZDown = ConvertFromDeviceZ(DeviceZ + DeviceZDdy);
|
|
|
|
float3 Right = ScreenToViewPos(UV + XOffset, ZRight) - ViewSpacePosMid;
|
|
float3 Down = ScreenToViewPos(UV + YOffset, ZDown) - ViewSpacePosMid;
|
|
|
|
ViewSpaceNormal = normalize(cross(Right, Down));
|
|
#endif
|
|
|
|
return ViewSpaceNormal;
|
|
}
|
|
|
|
// max absolute error 9.0x10^-3
|
|
// Eberly's polynomial degree 1 - respect bounds
|
|
// 4 VGPR, 12 FR (8 FR, 1 QR), 1 scalar
|
|
// input [-1, 1] and output [0, PI]
|
|
half acosFast_Half(half inX)
|
|
{
|
|
half x = abs(inX);
|
|
half res = -0.156583f * x + (0.5f * PI);
|
|
res *= sqrt(1.0f - x);
|
|
return (inX >= 0) ? res : PI - res;
|
|
}
|
|
|
|
half4 UnwrappedTexture3DSamplePoint(Texture2D Texture, SamplerState Sampler, half3 UVW, half Size)
|
|
{
|
|
// a volume texture 16x16x16 would be unwrapped to a 2d texture 256x16
|
|
|
|
half IntW = floor(UVW.z * (Size - 1) + 0.5f);
|
|
|
|
half U = (UVW.x + IntW) / Size;
|
|
half V = UVW.y;
|
|
|
|
half4 RG0 = Texture2DSample(Texture, Sampler, half2(U, V));
|
|
|
|
return RG0;
|
|
}
|
|
|
|
half2 SearchForLargestAngleDual(half2 BaseUV, half2 ScreenDir, float SearchRadius, half InitialOffset, float3 ViewPos, half3 ViewDir, float AttenFactor)
|
|
{
|
|
half OOLen, Ang, FallOff;
|
|
float3 V;
|
|
float LenSq;
|
|
float2 SceneDepths = 0;
|
|
|
|
half2 BestAng = half2(-1, -1);
|
|
half Thickness = WorldRadiusAdj_SinDeltaAngle_CosDeltaAngle_Thickness.w;
|
|
|
|
for (uint i = 0; i < GTAO_NUMTAPS; i++)
|
|
{
|
|
half fi = (half)i;
|
|
|
|
half2 UVOffset = ScreenDir * max(SearchRadius * (fi + InitialOffset), (fi + 1));
|
|
UVOffset.y *= -1;
|
|
half4 UV2 = BaseUV.xyxy + half4(UVOffset.xy, -UVOffset.xy);
|
|
|
|
// Positive Direction
|
|
SceneDepths.x = GetSceneDepthFromAOInput(UV2.xy);
|
|
SceneDepths.y = GetSceneDepthFromAOInput(UV2.zw);
|
|
|
|
V = ScreenToViewPos(UV2.xy, SceneDepths.x) - ViewPos;
|
|
LenSq = dot(V, V);
|
|
OOLen = rsqrt(LenSq + 0.0001);
|
|
Ang = dot(V, ViewDir) * OOLen;
|
|
|
|
FallOff = saturate(LenSq * AttenFactor);
|
|
Ang = lerp(Ang, BestAng.x, FallOff);
|
|
BestAng.x = (Ang > BestAng.x) ? Ang : lerp(Ang, BestAng.x, Thickness);
|
|
|
|
// Negative Direction
|
|
V = ScreenToViewPos(UV2.zw, SceneDepths.y) - ViewPos;
|
|
LenSq = dot(V, V);
|
|
OOLen = rsqrt(LenSq + 0.0001);
|
|
Ang = dot(V, ViewDir) * OOLen;
|
|
|
|
FallOff = saturate(LenSq * AttenFactor);
|
|
Ang = lerp(Ang, BestAng.y, FallOff);
|
|
BestAng.y = (Ang > BestAng.y) ? Ang : lerp(Ang, BestAng.y, Thickness);
|
|
}
|
|
|
|
#if PREINTEGRATED_LUT_TYPE == 0
|
|
BestAng.x = acosFast(clamp(BestAng.x, -1.0, 1.0));
|
|
BestAng.y = acosFast(clamp(BestAng.y, -1.0, 1.0));
|
|
#endif
|
|
|
|
return BestAng;
|
|
}
|
|
|
|
half ComputeInnerIntegral(half2 Angles, half3 ScreenDir, half3 ViewDir, half3 ViewSpaceNormal, half SceneDepth)
|
|
{
|
|
// Given the angles found in the search plane we need to project the View Space GBuffer Normal onto the plane defined by the search axis and the View Direction and perform the inner integrate
|
|
half3 PlaneNormal = normalize(cross(ScreenDir, ViewDir));
|
|
half3 Perp = cross(ViewDir, PlaneNormal);
|
|
half3 ProjNormal = ViewSpaceNormal - PlaneNormal * dot(ViewSpaceNormal, PlaneNormal);
|
|
|
|
half LenProjNormal = length(ProjNormal) + 0.000001f;
|
|
half RecipMag = 1.0f / (LenProjNormal);
|
|
|
|
half CosAng = dot(ProjNormal, Perp) * RecipMag;
|
|
|
|
#if PREINTEGRATED_LUT_TYPE == 2
|
|
|
|
half3 UVW = half3(Angles, CosAng)*0.5f + 0.5f;
|
|
half AO = (LenProjNormal) * Texture3DSample(GTAOPreIntegrated3D, GTAOPreIntegratedSampler, UVW).r;
|
|
|
|
#elif PREINTEGRATED_LUT_TYPE == 1
|
|
|
|
half3 UVW = half3(Angles, CosAng)*0.5f + 0.5f;
|
|
half AO = (LenProjNormal) * UnwrappedTexture3DSamplePoint(GTAOPreIntegrated2D, GTAOPreIntegratedSampler, UVW, LUTSize).r;
|
|
|
|
#else
|
|
|
|
half Gamma = acosFast_Half(CosAng) - PI_HALF;
|
|
half CosGamma = dot(ProjNormal, ViewDir) * RecipMag;
|
|
half SinGamma = CosAng * -2.0f;
|
|
|
|
// clamp to normal hemisphere
|
|
Angles.x = Gamma + max(-Angles.x - Gamma, -(PI_HALF));
|
|
Angles.y = Gamma + min(Angles.y - Gamma, (PI_HALF));
|
|
|
|
half AO = ((LenProjNormal) * 0.25f *
|
|
((Angles.x * SinGamma + CosGamma - cos((2.0 * Angles.x) - Gamma)) +
|
|
(Angles.y * SinGamma + CosGamma - cos((2.0 * Angles.y) - Gamma))));
|
|
|
|
#endif
|
|
|
|
return AO;
|
|
}
|
|
|
|
half CalculateGTAO(half2 TextureUV, int2 TexturePos, int2 ThreadPos)
|
|
{
|
|
TextureUV += DepthBufferSizeAndInvSize.zw*0.125;
|
|
#if HORIZONSEARCH_INTEGRAL_PIXEL_SHADER || SPATIALFILTER_PIXEL_SHADER
|
|
float SceneDepth = GetSceneDepthFromAOInput(TextureUV);
|
|
#else
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER
|
|
ThreadPos -= AO_GROUP_THREAD_OFFSET;
|
|
#endif
|
|
float SceneDepth = GetSceneDepthFromSharedMemory(ThreadPos + DEPTH_GROUP_THREAD_OFFSET);
|
|
#endif
|
|
|
|
if (SceneDepth > FadeRadiusMulAdd_FadeDistance_AttenFactor.z)
|
|
{
|
|
return 1.0f;
|
|
}
|
|
else
|
|
{
|
|
float3 ViewSpacePos = ScreenToViewPos(TextureUV, SceneDepth);
|
|
half3 ViewSpaceNormal = GetNormal(TextureUV, ThreadPos, ViewSpacePos);
|
|
half3 ViewDir = -normalize(ViewSpacePos.xyz);
|
|
|
|
float WorldRadiusAdj = WorldRadiusAdj_SinDeltaAngle_CosDeltaAngle_Thickness.x;
|
|
|
|
float PixelRadius = max(min(WorldRadiusAdj / ViewSpacePos.z, GTAO_MAX_PIXEL_SCREEN_RADIUS), (half)GTAO_NUMTAPS);
|
|
float StepRadius = PixelRadius / ((half)GTAO_NUMTAPS + 1);
|
|
float AttenFactor = FadeRadiusMulAdd_FadeDistance_AttenFactor.w;
|
|
|
|
half3 RandomAndOffset = GetRandomVector(TexturePos);
|
|
half2 RandomVec = RandomAndOffset.xy;
|
|
half Offset = RandomAndOffset.z;
|
|
|
|
half Sum = 0.0;
|
|
|
|
const uint NumAngles = 2;
|
|
|
|
half SinDeltaAngle = WorldRadiusAdj_SinDeltaAngle_CosDeltaAngle_Thickness.y;
|
|
half CosDeltaAngle = WorldRadiusAdj_SinDeltaAngle_CosDeltaAngle_Thickness.z;
|
|
|
|
half3 ScreenDir = half3(RandomVec.x, RandomVec.y, 0.0);
|
|
|
|
for (uint Angle = 0; Angle < 2; Angle++)
|
|
{
|
|
half2 Angles = SearchForLargestAngleDual(TextureUV, ScreenDir.xy * View.BufferSizeAndInvSize.zw, StepRadius,
|
|
Offset, ViewSpacePos, ViewDir, AttenFactor);
|
|
|
|
Sum += ComputeInnerIntegral(Angles, ScreenDir, ViewDir, ViewSpaceNormal, SceneDepth);
|
|
|
|
// Rotate for the next angle
|
|
half2 TempScreenDir = ScreenDir.xy;
|
|
ScreenDir.x = (TempScreenDir.x * CosDeltaAngle) + (TempScreenDir.y * -SinDeltaAngle);
|
|
ScreenDir.y = (TempScreenDir.x * SinDeltaAngle) + (TempScreenDir.y * CosDeltaAngle);
|
|
Offset = frac(Offset + 0.617);
|
|
}
|
|
|
|
half AO = Sum;
|
|
|
|
AO = AO * 0.5f;
|
|
AO *= 2.0 * 0.3183098861f;
|
|
|
|
// Fade out based on user defined distance
|
|
AO = lerp(AO, 1, saturate(SceneDepth * FadeRadiusMulAdd_FadeDistance_AttenFactor.x + FadeRadiusMulAdd_FadeDistance_AttenFactor.y));
|
|
|
|
return AO;
|
|
}
|
|
}
|
|
|
|
half2 GetDeviceZAndAO(half2 TextureUV)
|
|
{
|
|
half2 DeviceZAndAO;
|
|
half4 EncodeDeviceZAndAO = Texture2DSample(AOInputTexture, AOInputSampler, TextureUV);
|
|
DeviceZAndAO.y = EncodeDeviceZAndAO.a;
|
|
EncodeDeviceZAndAO.a = 0.0f;
|
|
DeviceZAndAO.x = DecodeFloatRGBA(EncodeDeviceZAndAO);
|
|
|
|
return DeviceZAndAO;
|
|
}
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER || HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER || SPATIALFILTER_COMPUTE_SHADER
|
|
void CacheZVal(int2 FullGroupOriginDepth, uint pixIdx)
|
|
{
|
|
int2 ThreadPos;
|
|
|
|
ThreadPos.x = pixIdx % DEPTH_THREADPOS_OFFSET;
|
|
ThreadPos.y = pixIdx / DEPTH_THREADPOS_OFFSET;
|
|
|
|
int2 TexturePos = FullGroupOriginDepth + ThreadPos;
|
|
|
|
half2 TextureUV = TexturePosToBufferUV(TexturePos);
|
|
|
|
TextureUV += DepthBufferSizeAndInvSize.zw*0.125;
|
|
|
|
SetZVal(GetDeviceZFromAOInput(TextureUV.xy).r, pixIdx);
|
|
}
|
|
|
|
void CacheAOVal(int2 FullGroupOriginAO, uint pixIdx)
|
|
{
|
|
int2 ThreadPos;
|
|
|
|
ThreadPos.x = pixIdx % AO_THREADPOS_OFFSET;
|
|
ThreadPos.y = pixIdx / AO_THREADPOS_OFFSET;
|
|
|
|
int2 TexturePos = FullGroupOriginAO + ThreadPos;
|
|
|
|
half2 TextureUV = TexturePosToBufferUV(TexturePos);
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER
|
|
SetAOVal(CalculateGTAO(TextureUV.xy, TexturePos.xy, ThreadPos.xy), pixIdx);
|
|
#elif SPATIALFILTER_COMPUTE_SHADER
|
|
half2 DeviceZAndAO = GetDeviceZAndAO(TextureUV);
|
|
SetAOVal(DeviceZAndAO.y, pixIdx);
|
|
SetZVal(DeviceZAndAO.x, pixIdx);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#if SPATIALFILTER_PIXEL_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
half4 GTAOSpatialFilter(half2 TextureUV, int2 ThreadPos, int2 PixelPos)
|
|
#else
|
|
void GTAOSpatialFilter(int2 ThreadPos, int2 PixelPos)
|
|
#endif
|
|
{
|
|
if (any(PixelPos >= int2(ViewRectMin.xy + ViewSizeAndInvSize.xy)))
|
|
{
|
|
#if SPATIALFILTER_PIXEL_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
return 1.0f;
|
|
#else
|
|
return;
|
|
#endif
|
|
}
|
|
|
|
half2 ZDiff;
|
|
|
|
// Get the ZDiffs array
|
|
#if SPATIALFILTER_PIXEL_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
half ThisZ = GetDeviceZAndAO(TextureUV).x;
|
|
|
|
{
|
|
half2 X2Offset = half2(2 * BufferSizeAndInvSize.z, 0);
|
|
half2 X1Offset = half2(BufferSizeAndInvSize.z, 0);
|
|
|
|
half XM2Z = GetDeviceZAndAO(TextureUV - X2Offset).x;
|
|
half XM1Z = GetDeviceZAndAO(TextureUV - X1Offset).x;
|
|
half XP1Z = GetDeviceZAndAO(TextureUV + X1Offset).x;
|
|
half XP2Z = GetDeviceZAndAO(TextureUV + X2Offset).x;
|
|
|
|
// Get extrapolated point either side
|
|
half C1 = abs((XM1Z + (XM1Z - XM2Z)) - ThisZ);
|
|
half C2 = abs((XP1Z + (XP1Z - XP2Z)) - ThisZ);
|
|
|
|
if (C1 < C2)
|
|
{
|
|
ZDiff.x = XM1Z - XM2Z;
|
|
}
|
|
else
|
|
{
|
|
ZDiff.x = XP2Z - XP1Z;
|
|
}
|
|
}
|
|
|
|
{
|
|
half2 Y2Offset = half2(0, 2 * BufferSizeAndInvSize.w);
|
|
half2 Y1Offset = half2(0, BufferSizeAndInvSize.w);
|
|
|
|
half YM2Z = GetDeviceZAndAO(TextureUV - Y2Offset).x;
|
|
half YM1Z = GetDeviceZAndAO(TextureUV - Y1Offset).x;
|
|
half YP1Z = GetDeviceZAndAO(TextureUV + Y1Offset).x;
|
|
half YP2Z = GetDeviceZAndAO(TextureUV + Y2Offset).x;
|
|
|
|
// Get extrapolated point either side
|
|
half C1 = abs((YM1Z + (YM1Z - YM2Z)) - ThisZ);
|
|
half C2 = abs((YP1Z + (YP1Z - YP2Z)) - ThisZ);
|
|
|
|
if (C1 < C2)
|
|
{
|
|
ZDiff.y = YM1Z - YM2Z;
|
|
}
|
|
else
|
|
{
|
|
ZDiff.y = YP2Z - YP1Z;
|
|
}
|
|
}
|
|
#else
|
|
int2 ThreadOffsetPos = ThreadPos + DEPTH_GROUP_THREAD_OFFSET;
|
|
half ThisZ = GetDeviceZFromSharedMemory(ThreadOffsetPos);
|
|
|
|
{
|
|
int2 X2Offset = int2(2, 0);
|
|
int2 X1Offset = int2(1, 0);
|
|
|
|
half XM2Z = GetDeviceZFromSharedMemory(ThreadOffsetPos - X2Offset);
|
|
half XM1Z = GetDeviceZFromSharedMemory(ThreadOffsetPos - X1Offset);
|
|
half XP1Z = GetDeviceZFromSharedMemory(ThreadOffsetPos + X1Offset);
|
|
half XP2Z = GetDeviceZFromSharedMemory(ThreadOffsetPos + X2Offset);
|
|
|
|
// Get extrapolated point either side
|
|
half C1 = abs((XM1Z + (XM1Z - XM2Z)) - ThisZ);
|
|
half C2 = abs((XP1Z + (XP1Z - XP2Z)) - ThisZ);
|
|
|
|
if (C1 < C2)
|
|
{
|
|
ZDiff.x = XM1Z - XM2Z;
|
|
}
|
|
else
|
|
{
|
|
ZDiff.x = XP2Z - XP1Z;
|
|
}
|
|
}
|
|
|
|
{
|
|
int2 Y2Offset = int2(0, 2);
|
|
int2 Y1Offset = int2(0, 1);
|
|
|
|
half YM2Z = GetDeviceZFromSharedMemory(ThreadOffsetPos - Y2Offset);
|
|
half YM1Z = GetDeviceZFromSharedMemory(ThreadOffsetPos - Y1Offset);
|
|
half YP1Z = GetDeviceZFromSharedMemory(ThreadOffsetPos + Y1Offset);
|
|
half YP2Z = GetDeviceZFromSharedMemory(ThreadOffsetPos + Y2Offset);
|
|
|
|
// Get extrapolated point either side
|
|
half C1 = abs((YM1Z + (YM1Z - YM2Z)) - ThisZ);
|
|
half C2 = abs((YP1Z + (YP1Z - YP2Z)) - ThisZ);
|
|
|
|
if (C1 < C2)
|
|
{
|
|
ZDiff.y = YM1Z - YM2Z;
|
|
}
|
|
else
|
|
{
|
|
ZDiff.y = YP2Z - YP1Z;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
half SumAO = 0;
|
|
half SumWeight = 0;
|
|
|
|
int x, y;
|
|
|
|
// Get the Z Value to compare against
|
|
|
|
half DepthBase = ThisZ - (ZDiff.x * 2) - (ZDiff.y * 2);
|
|
|
|
for (y = -2; y <= 2; y++)
|
|
{
|
|
half PlaneZ = DepthBase;
|
|
|
|
for (x = -2; x <= 2; x++)
|
|
{
|
|
// Get value and see how much it compares to the centre with the gradients
|
|
half XDiff = abs(x);
|
|
|
|
#if SPATIALFILTER_PIXEL_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
half2 CurrentTextureUV = TextureUV + half2(x, y) * BufferSizeAndInvSize.zw;
|
|
half2 SampleZAndAO = GetDeviceZAndAO(CurrentTextureUV);
|
|
#else
|
|
int2 SamplePos = ThreadPos + int2(x, y);
|
|
half2 SampleZAndAO;
|
|
SampleZAndAO.y = GetAOValueFromSharedMemory(SamplePos);
|
|
SampleZAndAO.x = GetDeviceZFromSharedMemory(SamplePos + DEPTH_GROUP_THREAD_OFFSET);
|
|
#endif
|
|
half Weight = 1.0f;
|
|
// if ((x == 0) && (y == 0)) //Need do profile to see whether disble branch is more efficent
|
|
// {
|
|
// Weight = 1.0f;
|
|
// }
|
|
// else
|
|
{
|
|
// Get the bilateral weight. This is a function of the difference in height between the plane equation and the base depth
|
|
// Compare the Z at this sample with the gradients
|
|
half SampleZDiff = abs(PlaneZ - SampleZAndAO.x);
|
|
|
|
Weight = 1.0f - saturate(SampleZDiff*1000.0f);
|
|
}
|
|
|
|
SumAO += SampleZAndAO.y * Weight;
|
|
SumWeight += Weight;
|
|
|
|
PlaneZ += ZDiff.x;
|
|
}
|
|
DepthBase += ZDiff.y;
|
|
}
|
|
SumAO /= SumWeight;
|
|
|
|
SumAO *= (PI * 0.5f);
|
|
|
|
// user adjust AO
|
|
half AmbientOcclusionIntensity = Power_Intensity_ScreenPixelsToSearch.y;
|
|
half AmbientOcclusionPower = Power_Intensity_ScreenPixelsToSearch.x;
|
|
SumAO = 1 - (1 - pow(abs(SumAO), AmbientOcclusionPower)) * AmbientOcclusionIntensity;
|
|
|
|
#if SPATIALFILTER_PIXEL_SHADER || HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
return SumAO;
|
|
#else
|
|
OutTexture[PixelPos + ViewRectMin.xy] = SumAO;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_SPATIALFILTER_COMPUTE_SHADER
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void GTAOHorizonSearchIntegralSpatialFilterCS(
|
|
int GroupIndex : SV_GroupIndex,
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID
|
|
)
|
|
{
|
|
int2 FullGroupOrigin = int2(GroupId.x * THREADGROUP_SIZEX, GroupId.y * THREADGROUP_SIZEY);
|
|
uint pixIdx = GroupIndex;
|
|
|
|
// Cache SceneDepth in Group Shared memory for calculating the normal from depth.
|
|
{
|
|
int2 FullGroupOriginDepth = FullGroupOrigin.xy - DEPTH_GROUP_THREAD_OFFSET;
|
|
|
|
pixIdx = GroupIndex * 2;
|
|
if (pixIdx < MAX_DEPTH_THREADS)
|
|
{
|
|
CacheZVal(FullGroupOriginDepth, pixIdx);
|
|
|
|
CacheZVal(FullGroupOriginDepth, pixIdx + 1);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
// Cache AO in shared group memory for spatial filter
|
|
{
|
|
int2 FullGroupOriginAO = FullGroupOrigin.xy - AO_GROUP_THREAD_OFFSET;
|
|
|
|
pixIdx = GroupIndex * 2;
|
|
|
|
if (pixIdx < MAX_AO_THREADS)
|
|
{
|
|
CacheAOVal(FullGroupOriginAO, pixIdx);
|
|
|
|
CacheAOVal(FullGroupOriginAO, pixIdx + 1);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
GTAOSpatialFilter(GroupThreadId, DispatchThreadId);
|
|
}
|
|
#endif
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_COMPUTE_SHADER
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void GTAOHorizonSearchIntegralCS(
|
|
int GroupIndex : SV_GroupIndex,
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID
|
|
)
|
|
{
|
|
int2 FullGroupOrigin = int2(GroupId.x * THREADGROUP_SIZEX, GroupId.y * THREADGROUP_SIZEY);
|
|
int pixIdx = GroupIndex;
|
|
|
|
// Cache SceneDepth in Group Shared memory for calculating the normal from depth.
|
|
{
|
|
int2 FullGroupOriginDepth = FullGroupOrigin.xy - DEPTH_GROUP_THREAD_OFFSET;
|
|
|
|
pixIdx = GroupIndex * 2;
|
|
if (pixIdx < MAX_DEPTH_THREADS)
|
|
{
|
|
CacheZVal(FullGroupOriginDepth, pixIdx);
|
|
|
|
CacheZVal(FullGroupOriginDepth, pixIdx + 1);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
int2 ThreadPos = int2(GroupThreadId);
|
|
int2 TexturePos = int2(DispatchThreadId);
|
|
|
|
half2 TextureUV = TexturePosToBufferUV(TexturePos);
|
|
|
|
half GTAO = CalculateGTAO(TextureUV.xy, TexturePos.xy, ThreadPos.xy);
|
|
half DeviceZ = GetDeviceZFromSharedMemory(ThreadPos + DEPTH_GROUP_THREAD_OFFSET);
|
|
|
|
half4 EncodeZ = EncodeFloatRGBA(DeviceZ);
|
|
EncodeZ.a = GTAO;
|
|
OutTexture[TexturePos + ViewRectMin.xy] = EncodeZ;
|
|
}
|
|
#endif
|
|
|
|
#if SPATIALFILTER_COMPUTE_SHADER
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void GTAOSpatialFilterCS(
|
|
int GroupIndex : SV_GroupIndex,
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
int2 FullGroupOrigin = int2(GroupId.x * THREADGROUP_SIZEX, GroupId.y * THREADGROUP_SIZEY);
|
|
uint pixIdx = GroupIndex;
|
|
|
|
// Cache AO and DeviceZ in shared group memory for spatial filter
|
|
{
|
|
int2 FullGroupOriginAO = FullGroupOrigin.xy - AO_GROUP_THREAD_OFFSET;
|
|
|
|
pixIdx = GroupIndex * 2;
|
|
|
|
if (pixIdx < MAX_AO_THREADS)
|
|
{
|
|
CacheAOVal(FullGroupOriginAO, pixIdx);
|
|
|
|
CacheAOVal(FullGroupOriginAO, pixIdx + 1);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
GTAOSpatialFilter(GroupThreadId, DispatchThreadId);
|
|
}
|
|
#endif
|
|
|
|
#if HORIZONSEARCH_INTEGRAL_PIXEL_SHADER
|
|
void GTAOHorizonSearchIntegralPS(
|
|
float4 InUVPos : TEXCOORD0,
|
|
in float4 SvPosition : SV_Position,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
ResolvedView = ResolveView();
|
|
|
|
int2 TexturePos = SvPosition.xy - 0.5f;
|
|
half2 TextureUV = TexturePosToBufferUV(TexturePos);
|
|
|
|
half GTAO = CalculateGTAO(TextureUV, TexturePos, 0);
|
|
TextureUV += DepthBufferSizeAndInvSize.zw*0.125;
|
|
OutColor = EncodeFloatRGBA(GetDeviceZFromAOInput(TextureUV));
|
|
OutColor.a = GTAO;
|
|
}
|
|
#endif
|
|
|
|
#if SPATIALFILTER_PIXEL_SHADER
|
|
void GTAOSpatialFilterPS(
|
|
float4 InUVPos : TEXCOORD0,
|
|
in float4 SvPosition : SV_Position,
|
|
out HALF4_TYPE OutColor : SV_Target0)
|
|
{
|
|
ResolvedView = ResolveView();
|
|
|
|
int2 TexturePos = SvPosition.xy - 0.5f;
|
|
half2 TextureUV = TexturePosToBufferUV(TexturePos);
|
|
|
|
OutColor = GTAOSpatialFilter(TextureUV, 0, TexturePos);
|
|
}
|
|
#endif
|
|
|
|
// --------------------------------------------------------------------------------------------------------------------
|
|
// SSAO
|
|
|
|
float3 PackSceneDepth(float InSceneDepth)
|
|
{
|
|
return UnpackRGBA8(PackR24F(InSceneDepth)).rgb;
|
|
}
|
|
|
|
float UnpackSceneDepth(float3 InPackedSceneDepth)
|
|
{
|
|
return UnpackR24F(PackRGBA8(float4(InPackedSceneDepth, 0)));
|
|
}
|
|
|
|
#if SSAO
|
|
|
|
void MainPSandCS(in float4 UVAndScreenPos, float4 SvPosition, out float4 OutColor)
|
|
{
|
|
OutColor = 0;
|
|
|
|
// the following constants as set up on C++ side
|
|
float AmbientOcclusionPower = ScreenSpaceAOParams[0].x;
|
|
float Ratio = ScreenSpaceAOParams[1].w;
|
|
float AORadiusInShader = ScreenSpaceAOParams[1].z;
|
|
float InvAmbientOcclusionDistance = ScreenSpaceAOParams[0].z;
|
|
float AmbientOcclusionIntensity = ScreenSpaceAOParams[0].w;
|
|
float2 ViewportUVToRandomUV = ScreenSpaceAOParams[1].xy;
|
|
float AmbientOcclusionBias = ScreenSpaceAOParams[0].y;
|
|
float ScaleFactor = ScreenSpaceAOParams[2].x;
|
|
float ScaleRadiusInWorldSpace = ScreenSpaceAOParams[2].z;
|
|
|
|
float2 UV = UVAndScreenPos.xy;
|
|
float2 ScreenPos = UVAndScreenPos.zw;
|
|
|
|
float InvTanHalfFov = ScreenSpaceAOParams[3].w;
|
|
float3 FovFix = float3(InvTanHalfFov, Ratio * InvTanHalfFov, 1);
|
|
float3 InvFovFix = 1.0f / FovFix;
|
|
|
|
float4 ModifiedSvPosition = float4(SvPosition.xy * SSAO_SvPositionScaleBias.xx + SSAO_SvPositionScaleBias.yy, SvPosition.zw);
|
|
|
|
float SceneDepth = GetDepthFromAOInput(UV);
|
|
float3 WorldNormal = GetWorldSpaceNormalFromAOInput(UV, ModifiedSvPosition);
|
|
|
|
// can be NaN if WorldNormal=0,0,0 which happens when !USE_NORMALS
|
|
float3 ViewSpaceNormal = normalize(mul(WorldNormal, (float3x3)View.TranslatedWorldToView));
|
|
|
|
float3 ViewSpacePosition = ReconstructCSPos(SceneDepth, ScreenPos);
|
|
|
|
float ActualAORadius = AORadiusInShader * lerp(SceneDepth, 1, ScaleRadiusInWorldSpace);
|
|
|
|
// Add bias after fixup (causes minor banding - not needed with larger radius)
|
|
if (USE_NORMALS)
|
|
{
|
|
ViewSpacePosition += AmbientOcclusionBias * SceneDepth * ScaleFactor * (ViewSpaceNormal * FovFix);
|
|
}
|
|
|
|
float2 WeightAccumulator = 0.0001f;
|
|
|
|
#if AO_SAMPLE_QUALITY != 0
|
|
// no SSAO in this pass, only upsampling
|
|
|
|
#if AO_SAMPLE_QUALITY == 1
|
|
// no 4x4 randomization
|
|
float2 RandomVec = float2(0, 1) * ActualAORadius;
|
|
{
|
|
#elif AO_SAMPLE_QUALITY == 2
|
|
// extract one of 16 base vectors (rotation and scale) from a texture that repeats 4x4
|
|
float2 RandomVec = (Texture2DSample(RandomNormalTexture, RandomNormalTextureSampler, UV * ViewportUVToRandomUV).rg * 2 - 1) * ActualAORadius;
|
|
{
|
|
#else // AO_SAMPLE_QUALITY == 3
|
|
// extract one of 16 base vectors (rotation and scale) from a texture that repeats 4x4, changing over time if TemporalAA is enabled
|
|
|
|
// jitter each frame a bit to get higher quality over multiple frames (only if TemporalAA is enabled), can cause ghosting effects
|
|
const float2 TemporalOffset = ScreenSpaceAOParams[3].xy;
|
|
|
|
// if the feature is enabled and right side of screen
|
|
const bool bDebugLookups = DEBUG_LOOKUPS && ViewSpacePosition.x > 0;
|
|
|
|
float2 RandomVec = (Texture2DSample(RandomNormalTexture, RandomNormalTextureSampler, TemporalOffset + UV * ViewportUVToRandomUV).rg * 2 - 1) * ActualAORadius;
|
|
{
|
|
#endif // AO_SAMPLE_QUALITY ==
|
|
|
|
if(bDebugLookups && ViewSpacePosition.y > 0)
|
|
{
|
|
// top sample are not per pixel rotated
|
|
RandomVec = float2(0, 1) * ActualAORadius;
|
|
}
|
|
|
|
float2 FovFixXY = FovFix.xy * (1.0f / ViewSpacePosition.z);
|
|
float4 RandomBase = float4(RandomVec, -RandomVec.y, RandomVec.x) * float4(FovFixXY, FovFixXY);
|
|
|
|
float2 ScreenSpacePos = ViewSpacePosition.xy / ViewSpacePosition.z;
|
|
|
|
// to debug the input depth
|
|
// OutColor = GetDepthForSSAO(ScreenSpacePos, 0); return;
|
|
// to debug the reconstructed normal
|
|
// OutColor = ReconstructedViewSpaceNormal.z; return;
|
|
|
|
// .x means for very anisotropic viewports we scale by x
|
|
float InvHaloSize = 1.0f / (ActualAORadius * FovFixXY.x * 2);
|
|
|
|
float3 ScaledViewSpaceNormal = ViewSpaceNormal;
|
|
|
|
#if OPTIMIZATION_O1
|
|
ScaledViewSpaceNormal *= 0.08f * lerp(SceneDepth, 1000, ScaleRadiusInWorldSpace);
|
|
#endif
|
|
|
|
UNROLL for(int i = 0; i < SAMPLESET_ARRAY_SIZE; ++i)
|
|
{
|
|
// -1..1
|
|
float2 UnrotatedRandom = OcclusionSamplesOffsets[i].xy;
|
|
|
|
float2 LocalRandom = (UnrotatedRandom.x * RandomBase.xy + UnrotatedRandom.y * RandomBase.zw);
|
|
|
|
if (bDebugLookups)
|
|
{
|
|
UNROLL for(uint step = 0; step < SAMPLE_STEPS; ++step)
|
|
{
|
|
float Scale = (step + 1) / (float)SAMPLE_STEPS;
|
|
float MipLevel = ComputeMipLevel(i, step);
|
|
float2 ScaledLocalRandom = Scale * LocalRandom;
|
|
|
|
WeightAccumulator += float2(ComputeSampleDebugMask(ScreenSpacePos + ScaledLocalRandom, MipLevel), 1.0f);
|
|
WeightAccumulator += float2(ComputeSampleDebugMask(ScreenSpacePos - ScaledLocalRandom, MipLevel), 1.0f);
|
|
}
|
|
}
|
|
else if (USE_NORMALS)
|
|
{
|
|
float3 LocalAccumulator = 0;
|
|
|
|
UNROLL for(uint step = 0; step < SAMPLE_STEPS; ++step)
|
|
{
|
|
// constant at run time
|
|
float Scale = (step + 1) / (float)SAMPLE_STEPS;
|
|
// constant at run time (higher is better for texture cache / performance, lower is better quality
|
|
float MipLevel = ComputeMipLevel(i, step);
|
|
|
|
float3 StepSample = WedgeWithNormal(ScreenSpacePos, Scale * LocalRandom, InvFovFix, ViewSpacePosition, ScaledViewSpaceNormal, InvHaloSize, MipLevel);
|
|
|
|
// combine horizon samples
|
|
LocalAccumulator = lerp(LocalAccumulator, float3(max(LocalAccumulator.xy, StepSample.xy), 1), StepSample.z);
|
|
}
|
|
|
|
// Square(): the area scales quadratic with the angle - it gets a bit darker
|
|
WeightAccumulator += float2(Square(1 - LocalAccumulator.x) * LocalAccumulator.z, LocalAccumulator.z);
|
|
WeightAccumulator += float2(Square(1 - LocalAccumulator.y) * LocalAccumulator.z, LocalAccumulator.z);
|
|
// cheaper? Could move 1 - out
|
|
// WeightAccumulator += float2(1 - LocalAccumulator.x, LocalAccumulator.y);
|
|
}
|
|
else // Case with no normals
|
|
{
|
|
float2 LocalAccumulator = 0;
|
|
|
|
UNROLL for(uint step = 0; step < SAMPLE_STEPS; ++step)
|
|
{
|
|
// constant at run time
|
|
float Scale = (step + 1) / (float)SAMPLE_STEPS;
|
|
// constant at run time (higher is better for texture cache / performance, lower is better quality
|
|
float MipLevel = ComputeMipLevel(i, step);
|
|
|
|
float2 StepSample = WedgeNoNormal(ScreenSpacePos, Scale * LocalRandom, InvFovFix, ViewSpacePosition, InvHaloSize, MipLevel);
|
|
|
|
// combine horizon samples
|
|
LocalAccumulator = lerp(LocalAccumulator, float2(max(LocalAccumulator.x, StepSample.x), 1), StepSample.y);
|
|
}
|
|
|
|
// Square(): the area scales quadratic with the angle - it gets a bit darker
|
|
WeightAccumulator += float2(Square(1 - LocalAccumulator.x) * LocalAccumulator.y, LocalAccumulator.y);
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // #if AO_SAMPLE_QUALITY == 0
|
|
|
|
|
|
OutColor.r = WeightAccumulator.x / WeightAccumulator.y;
|
|
OutColor.gb = float2(0, 0);
|
|
|
|
if(!bDebugLookups)
|
|
{
|
|
#if COMPUTE_SHADER || FORWARD_SHADING || SHADING_PATH_MOBILE
|
|
// In compute, Input1 and Input2 are not necessarily valid.
|
|
float4 Filtered = 1;
|
|
#else
|
|
float4 Filtered = ComputeUpsampleContribution(SceneDepth, UV, WorldNormal);
|
|
#endif
|
|
// recombined result from multiple resolutions
|
|
OutColor.r = lerp(OutColor.r, Filtered.r, ComputeLerpFactor());
|
|
}
|
|
|
|
if(!bDebugLookups)
|
|
{
|
|
// full res
|
|
|
|
// soft fade out AO in the distance
|
|
{
|
|
float Mul = ScreenSpaceAOParams[4].x;
|
|
float Add = ScreenSpaceAOParams[4].y;
|
|
OutColor.r = lerp(OutColor.r, 1, saturate(SceneDepth * Mul + Add));
|
|
}
|
|
|
|
// user adjust AO
|
|
// abs() to prevent shader warning
|
|
OutColor.r = 1 - (1 - pow(abs(OutColor.r), AmbientOcclusionPower)) * AmbientOcclusionIntensity;
|
|
|
|
// we output in a single alpha channel
|
|
OutColor = OutColor.r;
|
|
}
|
|
else
|
|
{
|
|
OutColor.r = pow(1 - OutColor.r, 16); // constnt is tweaked with radius and sample count
|
|
}
|
|
|
|
// we don't support ddx_fine() for SM4 and ES3.1
|
|
#if QUAD_MESSAGE_PASSING_BLUR > 0 && (SWITCH_PROFILE || SWITCH_PROFILE_FORWARD || FEATURE_LEVEL >= FEATURE_LEVEL_SM5)
|
|
{ // .x: AO output, .y:SceneDepth .zw:view space normal
|
|
float4 CenterPixel = float4(OutColor.r, SceneDepth, normalize(ViewSpaceNormal).xy);
|
|
|
|
float4 dX = ddx_fine(CenterPixel);
|
|
float4 dY = ddy_fine(CenterPixel);
|
|
|
|
int2 Mod = (uint2)(SvPosition.xy) % 2;
|
|
|
|
float4 PixA = CenterPixel;
|
|
float4 PixB = CenterPixel - dX * (Mod.x * 2 - 1);
|
|
float4 PixC = CenterPixel - dY * (Mod.y * 2 - 1);
|
|
|
|
float WeightA = 1.0f;
|
|
float WeightB = 1.0f;
|
|
float WeightC = 1.0f;
|
|
|
|
#if QUAD_MESSAGE_PASSING_NORMAL
|
|
const float NormalTweak = 4.0f;
|
|
float3 NormalA = ReconstructNormal(PixA.zw);
|
|
float3 NormalB = ReconstructNormal(PixB.zw);
|
|
float3 NormalC = ReconstructNormal(PixC.zw);
|
|
WeightB *= saturate(pow(saturate(dot(NormalA, NormalB)), NormalTweak));
|
|
WeightC *= saturate(pow(saturate(dot(NormalA, NormalC)), NormalTweak));
|
|
#endif
|
|
|
|
#if QUAD_MESSAGE_PASSING_DEPTH
|
|
const float DepthTweak = 1;
|
|
float InvDepth = 1.0f / PixA.y;
|
|
WeightB *= 1 - saturate(abs(1 - PixB.y * InvDepth) * DepthTweak);
|
|
WeightC *= 1 - saturate(abs(1 - PixC.y * InvDepth) * DepthTweak);
|
|
#endif
|
|
|
|
// + 1.0f to avoid div by 0
|
|
float InvWeightABC = 1.0f / (WeightA + WeightB + WeightC);
|
|
|
|
WeightA *= InvWeightABC;
|
|
WeightB *= InvWeightABC;
|
|
WeightC *= InvWeightABC;
|
|
|
|
OutColor = WeightA * PixA.x + WeightB * PixB.x + WeightC * PixC.x;
|
|
// visualize where we don't want to fade
|
|
// OutColor = (WeightA - 0.333f) / 0.666f;
|
|
}
|
|
#endif
|
|
|
|
#if OUTPUT_DEPTH
|
|
OutColor.gba = PackSceneDepth(SceneDepth);
|
|
#endif
|
|
}
|
|
|
|
void MainPS(in noperspective float4 UVAndScreenPos : TEXCOORD0, float4 SvPosition : SV_POSITION, out float4 OutColor : SV_Target0)
|
|
{
|
|
MainPSandCS(UVAndScreenPos, SvPosition, OutColor);
|
|
}
|
|
|
|
#endif // SSAO
|
|
|
|
|
|
#ifdef UPSAMPLE_PASS
|
|
|
|
Texture2D AOTexture;
|
|
SamplerState AOSampler;
|
|
|
|
float UpsampleSSAO(float2 BufferUV)
|
|
{
|
|
#if UPSAMPLE_QUALITY == 0
|
|
// Bilinear sample.
|
|
return Texture2DSampleLevel(AOTexture, AOSampler, BufferUV, 0).x;
|
|
|
|
#elif UPSAMPLE_QUALITY == 1
|
|
// 4 sample bilinear with depth weighting.
|
|
float2 LowResBufferSize = floor(View.BufferSizeAndInvSize.xy / 2);
|
|
float2 LowResTexelSize = 1.0f / LowResBufferSize;
|
|
|
|
float2 Corner00UV = floor(BufferUV * LowResBufferSize - 0.5f) / LowResBufferSize + 0.5f * LowResTexelSize;
|
|
float2 BilinearWeights = (BufferUV - Corner00UV) * LowResBufferSize;
|
|
|
|
float4 AOValues00 = Texture2DSampleLevel(AOTexture, AOSampler, Corner00UV, 0);
|
|
float4 AOValues10 = Texture2DSampleLevel(AOTexture, AOSampler, Corner00UV + float2(LowResTexelSize.x, 0), 0);
|
|
float4 AOValues01 = Texture2DSampleLevel(AOTexture, AOSampler, Corner00UV + float2(0, LowResTexelSize.y), 0);
|
|
float4 AOValues11 = Texture2DSampleLevel(AOTexture, AOSampler, Corner00UV + LowResTexelSize, 0);
|
|
|
|
float4 CornerWeights = float4(
|
|
(1 - BilinearWeights.y) * (1 - BilinearWeights.x),
|
|
(1 - BilinearWeights.y) * BilinearWeights.x,
|
|
BilinearWeights.y * (1 - BilinearWeights.x),
|
|
BilinearWeights.y * BilinearWeights.x);
|
|
|
|
float SceneDepth00 = UnpackSceneDepth(AOValues00.gba);
|
|
float SceneDepth10 = UnpackSceneDepth(AOValues10.gba);
|
|
float SceneDepth01 = UnpackSceneDepth(AOValues01.gba);
|
|
float SceneDepth11 = UnpackSceneDepth(AOValues11.gba);
|
|
|
|
float MaxDepth = max(max(max(SceneDepth00, SceneDepth10), SceneDepth01), SceneDepth11);
|
|
float MinDepth = min(min(min(SceneDepth00, SceneDepth10), SceneDepth01), SceneDepth11);
|
|
|
|
if (MaxDepth / MinDepth > 1.02f)
|
|
{
|
|
float Epsilon = 0.0001f;
|
|
float SceneDepth = GetDepthFromAOInput(BufferUV);
|
|
float4 CornerDepths = abs(float4(SceneDepth00, SceneDepth10, SceneDepth01, SceneDepth11));
|
|
float4 DepthWeights = 1.0f / (abs(CornerDepths - SceneDepth.xxxx) + Epsilon);
|
|
CornerWeights *= DepthWeights;
|
|
}
|
|
|
|
float InterpolatedResult =
|
|
(CornerWeights.x * AOValues00.r
|
|
+ CornerWeights.y * AOValues10.r
|
|
+ CornerWeights.z * AOValues01.r
|
|
+ CornerWeights.w * AOValues11.r)
|
|
/ dot(CornerWeights, 1);
|
|
|
|
return InterpolatedResult;
|
|
|
|
#elif UPSAMPLE_QUALITY == 2
|
|
// 9 sample with depth weighting.
|
|
float2 LowResBufferSize = floor(View.BufferSizeAndInvSize.xy * 0.5);
|
|
float2 LowResTexelSize = 1.0f / LowResBufferSize;
|
|
|
|
float4 AOValues[9];
|
|
AOValues[0] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(-1, -1) * LowResTexelSize, 0);
|
|
AOValues[1] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(0, -1) * LowResTexelSize, 0);
|
|
AOValues[2] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(1, -1) * LowResTexelSize, 0);
|
|
AOValues[3] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(-1, 0) * LowResTexelSize, 0);
|
|
AOValues[4] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(0, 0) * LowResTexelSize, 0);
|
|
AOValues[5] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(1, 0) * LowResTexelSize, 0);
|
|
AOValues[6] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(-1, 1) * LowResTexelSize, 0);
|
|
AOValues[7] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(0, 1) * LowResTexelSize, 0);
|
|
AOValues[8] = Texture2DSampleLevel(AOTexture, AOSampler, BufferUV + float2(1, 1) * LowResTexelSize, 0);
|
|
|
|
float SceneDepth = GetDepthFromAOInput(BufferUV);
|
|
float Epsilon = 0.0001f;
|
|
|
|
float TotalSum = 0;
|
|
float TotalWeight = 0;
|
|
UNROLL for (int i = 0; i < 9; ++i)
|
|
{
|
|
float SampleValue = AOValues[i].r;
|
|
float SampleDepth = UnpackSceneDepth(AOValues[i].gba);
|
|
|
|
float Weight = 1.0f / (abs(SceneDepth - SampleDepth) + Epsilon);
|
|
TotalWeight += Weight;
|
|
TotalSum += SampleValue * Weight;
|
|
}
|
|
|
|
return TotalSum / TotalWeight;
|
|
#endif
|
|
}
|
|
|
|
void AmbientOcclusionUpsamplePS(
|
|
in float4 SvPosition : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float2 BufferUV = SvPositionToBufferUV(SvPosition);
|
|
OutColor = UpsampleSSAO(BufferUV).xxxx;
|
|
}
|
|
|
|
#endif // UPSAMPLE_PASS
|