Files
UnrealEngine/Engine/Shaders/Private/PostProcessAmbientOcclusionCommon.ush
2025-05-18 13:04:45 +08:00

391 lines
14 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostprocessAmbientOcclusion.ush: Common code (mostly SSAO) used by
PostProcessAmbientOcclusion.usf and PostProcessAmbientOcclusionMobile.usf
=============================================================================*/
// [0]: .x:AmbientOcclusionPower, .y:AmbientOcclusionBias/BiasDistance, .z:1/AmbientOcclusionDistance, .w:AmbientOcclusionIntensity
// [1]: .xy:ViewportUVToRandomUV, .z:AORadiusInShader, .w:Ratio
// [2]: .x:ScaleFactor(e.g. 4 if current RT is a quarter in size), .y:InvThreshold, .z:ScaleRadiusInWorldSpace(0:VS/1:WS), .w:MipBlend
// [3]: .xy:TemporalAARandomOffset, .z:StaticFraction, .w: InvTanHalfFov
// [4]: .x:Multipler for FadeDistance/Radius, .y:Additive for FadeDistance/Radius, .z:clamped HzbStepMipLevelFactorValue .w: unused
float4 ScreenSpaceAOParams[5];
SCREEN_PASS_TEXTURE_VIEWPORT(AOViewport)
SCREEN_PASS_TEXTURE_VIEWPORT(AOSceneViewport)
// needed to prevent AO seam near 16 bit float maximum, this feactor pushed the problem far out and it seems to not have a visual degradion nearby
const static float Constant_Float16F_Scale = 4096.0f * 32.0f;
// only for MainSetupPS()
float ThresholdInverse;
float2 InputExtentInverse;
/** RGBA8 linear texture containing random normals */
Texture2D RandomNormalTexture;
SamplerState RandomNormalTextureSampler;
//----------------------------------------------------------------------------------------------------------------------
#include "HZB.ush"
float GetHZBDepth(float2 ScreenPos, float MipLevel)
{
float2 HZBUV = ApplyScreenTransform(ScreenPos, ScreenPosToHZBUVScaleBias);
float HZBDepth = Texture2DSampleLevel( HZBTexture, HZBSampler, HZBUV, MipLevel).r;
return ConvertFromDeviceZ(HZBDepth);
}
//----------------------------------------------------------------------------------------------------------------------
Texture2D SSAO_SetupTexture;
Texture2D SSAO_NormalsTexture;
Texture2D SSAO_DownsampledAO;
SamplerState SSAO_Sampler;
float2 SSAO_DownsampledAOInverseSize;
float2 SSAO_DownsampledAOUVViewportMin;
float2 SSAO_DownsampledAOUVViewportMax;
float2 SSAO_SvPositionScaleBias;
// could be moved to a more central spot
// @param ScreenPos -1 .. 1
float3 ReconstructCSPos(float SceneDepth, float2 ScreenPos)
{
return float3(ScreenPos * SceneDepth, SceneDepth);
}
// 0: not similar .. 1:very similar
float ComputeDepthSimilarity(float DepthA, float DepthB, float TweakScale)
{
return saturate(1 - abs(DepthA - DepthB) * TweakScale);
}
float TakeSmallerAbsDelta(float left, float mid, float right)
{
float a = mid - left;
float b = right - mid;
return (abs(a) < abs(b)) ? a : b;
}
// could use ddx,ddy but that would have less quality and would nto work fo ComputeShaders
// @return not normalized normal in world space
float3 ReconstructNormalFromDepthBuffer(float4 SvPosition)
{
// could use a modified version of GatherSceneDepth later on
float DeviceZ = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 0, 0, 0)));
float DeviceZLeft = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(-1, 0, 0, 0)));
float DeviceZTop = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, -1, 0, 0)));
float DeviceZRight = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(1, 0, 0, 0)));
float DeviceZBottom = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 1, 0, 0)));
// Favor the surfae we are looking at. Simiar to: http://www.humus.name/index.php?page=3D&ID=84
float DeviceZDdx = TakeSmallerAbsDelta(DeviceZLeft, DeviceZ, DeviceZRight);
float DeviceZDdy = TakeSmallerAbsDelta(DeviceZTop, DeviceZ, DeviceZBottom);
// can be optimized, is not fully centered but that should not matter much
float3 Mid = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 0), DeviceZ, 1));
float3 Right = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(1, 0), DeviceZ + DeviceZDdx, 1)) - Mid;
float3 Down = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 1), DeviceZ + DeviceZDdy, 1)) - Mid;
return cross(Right, Down);
}
// @return can be 0,0,0 if we don't have a good input normal
float3 GetWorldSpaceNormalFromAOInput(float2 UV, float4 SvPosition)
{
float3 WorldNormal = 0;
if (USE_NORMALS)
{
#if USE_AO_SETUP_AS_INPUT
// Low resolution normal computed in the setup (downscaled) pass.
WorldNormal = Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, frac(UV)).xyz * 2 - 1;
#elif COMPUTE_SHADER || FORWARD_SHADING || SHADING_PATH_MOBILE
// Async compute and forward shading don't have access to the gbuffer.
WorldNormal = ReconstructNormalFromDepthBuffer(SvPosition);
#else
#if SUBTRATE_GBUFFER_FORMAT==1
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SvPosition.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
FSubstratePixelHeader SubstrateHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
if (SubstrateHeader.SubstrateGetBSDFType() != SUBSTRATE_BSDF_TYPE_HAIR)
{
const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(Substrate.TopLayerTexture.Load(uint3(SvPosition.xy, 0)));
WorldNormal = TopLayerData.WorldNormal;
}
#else
// Otherwise sample Gbuffer normals if the shader model has normals.
FGBufferData GBuffer = GetGBufferData(UV, false);
if( GBuffer.ShadingModelID != SHADINGMODELID_HAIR )
{
WorldNormal = GetGBufferData(UV, false).WorldNormal;
}
#endif
#endif
}
return WorldNormal;
}
float4 ComputeUpsampleContribution(float SceneDepth, float2 InUV, float3 CenterWorldNormal)
{
// can be optimized
#if AO_UPSAMPLE_QUALITY == 0
const int SampleCount = 4;
float2 UV[SampleCount];
UV[0] = InUV + float2(-0.5f, 0.5f) * SSAO_DownsampledAOInverseSize;
UV[1] = InUV + float2( 0.5f, 0.5f) * SSAO_DownsampledAOInverseSize;
UV[2] = InUV + float2(-0.5f, -0.5f) * SSAO_DownsampledAOInverseSize;
UV[3] = InUV + float2( 0.5f, -0.5f) * SSAO_DownsampledAOInverseSize;
#else // AO_UPSAMPLE_QUALITY == 0
const int SampleCount = 9;
float2 UV[SampleCount];
UV[0] = InUV + float2( -1, -1) * SSAO_DownsampledAOInverseSize;
UV[1] = InUV + float2( 0, -1) * SSAO_DownsampledAOInverseSize;
UV[2] = InUV + float2( 1, -1) * SSAO_DownsampledAOInverseSize;
UV[3] = InUV + float2( -1, 0) * SSAO_DownsampledAOInverseSize;
UV[4] = InUV + float2( 0, 0) * SSAO_DownsampledAOInverseSize;
UV[5] = InUV + float2( 1, 0) * SSAO_DownsampledAOInverseSize;
UV[6] = InUV + float2( -1, 1) * SSAO_DownsampledAOInverseSize;
UV[7] = InUV + float2( 0, 1) * SSAO_DownsampledAOInverseSize;
UV[8] = InUV + float2( 1, 1) * SSAO_DownsampledAOInverseSize;
#endif // AO_UPSAMPLE_QUALITY == 0
// to avoid division by 0
float SmallValue = 0.0001f;
// we could weight the samples better but tests didn't show much difference
float WeightSum = SmallValue;
float4 Ret = float4(SmallValue,0,0,0);
float InvThreshold = ScreenSpaceAOParams[2].y;
float MinIteration = 1.0f;
UNROLL for(int i = 0; i < SampleCount; ++i)
{
#if SHADING_PATH_MOBILE
float2 ClampedUV = UV[i];
#else
float2 ClampedUV = clamp(UV[i], SSAO_DownsampledAOUVViewportMin, SSAO_DownsampledAOUVViewportMax);
#endif
float4 SampleValue = Texture2DSample(SSAO_DownsampledAO, SSAO_Sampler, ClampedUV);
MinIteration = min(MinIteration, SampleValue.g);
float4 NormalAndSampleDepth = Texture2DSample(SSAO_NormalsTexture, SSAO_Sampler, ClampedUV);
float SampleDepth = NormalAndSampleDepth.a * Constant_Float16F_Scale;
// when tweaking this constant look for crawling pattern at edges
float Weight = ComputeDepthSimilarity(SampleDepth, SceneDepth, 0.003f);
if (USE_NORMALS)
{
float3 LocalWorldNormal = NormalAndSampleDepth.xyz*2-1;
Weight *= saturate(dot(LocalWorldNormal, CenterWorldNormal));
}
// todo: 1 can be put into the input to save an instruction
Ret += float4(SampleValue.rgb, 1) * Weight;
WeightSum += Weight;
}
Ret /= WeightSum;
Ret.g = MinIteration;
return Ret;
}
// to blend between upsampled and current pass data
float ComputeLerpFactor()
{
// set up on C++ side
float MipBlend = ScreenSpaceAOParams[2].w;
float AOLerpFactor = MipBlend;
#if AO_SAMPLE_QUALITY == 0
// we have no AO, we only use the upsampled data
AOLerpFactor = 1.0f;
#endif
#if USE_UPSAMPLE == 0
// if there is no former pass we cannot use the data
AOLerpFactor = 0.0f;
#endif
return AOLerpFactor;
}
// @return NormAngle means 0..1 is actually 0..PI
float acosApproxNormAngle(float x)
{
// todo: expose
// 1: is a good linear approximation, 0.9f seems to look good
float ContrastTweak = 0.9f;
// correct: acos(x) / PI
// linear approximation: saturate((1 - x) * 0.5f);
// pretty good approximation with contrast tweak
return saturate((1 - x) * 0.5f * ContrastTweak);
}
// @return float3(InvNormAngleL, InvNormAngleR, Weight)
float3 WedgeWithNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float3 ScaledViewSpaceNormal, float InvHaloSize, float MipLevel)
{
float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom;
float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom;
float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel);
float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel);
float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL);
float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR);
float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix;
float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix;
#if OPTIMIZATION_O1
float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) / dot(DeltaL, DeltaL));
float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) / dot(DeltaR, DeltaR));
float Weight = 1;
#else
float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) * rsqrt(dot(DeltaL, DeltaL)));
float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) * rsqrt(dot(DeltaR, DeltaR)));
float Weight =
saturate(1.0f - length(DeltaL) * InvHaloSize)
* saturate(1.0f - length(DeltaR) * InvHaloSize);
#endif
return float3(InvNormAngleL, InvNormAngleR, Weight);
}
// @return float2(InvNormAngle, Weight)
float2 WedgeNoNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float InvHaloSize, float MipLevel)
{
float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom;
float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom;
float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel);
float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel);
float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL);
float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR);
float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix;
float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix;
float WeightLeft;
float3 SamplePositionLeft;
{
WeightLeft = 1;
#if !OPTIMIZATION_O1
WeightLeft = saturate(1.0f - length(DeltaL) * InvHaloSize);
#endif
}
float WeightRight;
float3 SamplePositionRight;
{
WeightRight = 1;
#if !OPTIMIZATION_O1
WeightRight = saturate(1.0f - length(DeltaR) * InvHaloSize);
#endif
}
float FlatSurfaceBias = 5.0f;
float left = ViewSpacePosition.z - AbsL;
float right = ViewSpacePosition.z - AbsR;
// OptionA: accurate angle computation
float NormAngle = acosApproxNormAngle( dot(DeltaL, DeltaR) / sqrt(length2(DeltaL) * length2(DeltaR)));
// OptionB(fade out in near distance): float NormAngle = acosApproxNormAngle( (- left - right) * 20);
// OptionC(look consistent but more noisy, should be much faster): float NormAngle = 0;
// not 100% correct but simple
// bias is needed to avoid flickering on almost perfectly flat surfaces
// if((leftAbs + rightAbs) * 0.5f > SceneDepth - 0.0001f)
if(left + right < FlatSurfaceBias)
{
// fix concave case
NormAngle = 1;
}
// to avoid halos around objects
float Weight = 1;
float InvAmbientOcclusionDistance = ScreenSpaceAOParams[0].z;
float ViewDepthAdd = 1.0f - ViewSpacePosition.z * InvAmbientOcclusionDistance;
Weight *= saturate(SamplePositionL.z * InvAmbientOcclusionDistance + ViewDepthAdd);
Weight *= saturate(SamplePositionR.z * InvAmbientOcclusionDistance + ViewDepthAdd);
// return float2(1 - NormAngle, (WeightLeft + WeightRight) * 0.5f);
return float2((1-NormAngle) / (Weight + 0.001f), Weight);
}
float3 ReconstructNormal(float2 In)
{
return float3(In, sqrt(1 - dot(In, In)));
}
// @param ScreenSpacePos -1..1
// @return 1 if inside the center, 0 if outside
float ComputeSampleDebugMask(float2 ScreenSpacePos, float MipLevel)
{
ScreenSpacePos.x -= 0.5f;
ScreenSpacePos.y = frac(ScreenSpacePos.y) - 0.5f;
float2 ViewPortSize = AOViewport_ViewportSize;
int2 PixelOffsetToCenter = int2(ScreenSpacePos * ViewPortSize * 0.5f);
float d = length(PixelOffsetToCenter);
// revisit this
float radius = 12.0f;
// hard
return d < radius * exp2(MipLevel);
// soft
// return saturate(1 - d / (radius * exp2(MipLevel)));
}
float ComputeMipLevel(int sampleid, int step)
{
float SamplePos = (sampleid + 0.5f) / SAMPLESET_ARRAY_SIZE;
float HzbStepMipLevelFactorValue = ScreenSpaceAOParams[4].z;
// use a constant to get better performance
//float HzbStepMipLevelFactorValue = 0.5f;
// float HzbStepMipLevelFactorValue = 1;
float Scale = (step + 1) / (float)SAMPLE_STEPS;
// return log2(1.0f + HzbStepMipLevelFactorValue * Scale * SamplePos);
return log2(HzbStepMipLevelFactorValue * Scale * SamplePos);
}
float GetDepthFromAOInput(float2 UV)
{
#if USE_AO_SETUP_AS_INPUT
// low resolution
return Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, UV).a * Constant_Float16F_Scale;
#else
// full resolution
return CalcSceneDepth(UV);
#endif
}