// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= PostprocessAmbientOcclusion.ush: Common code (mostly SSAO) used by PostProcessAmbientOcclusion.usf and PostProcessAmbientOcclusionMobile.usf =============================================================================*/ // [0]: .x:AmbientOcclusionPower, .y:AmbientOcclusionBias/BiasDistance, .z:1/AmbientOcclusionDistance, .w:AmbientOcclusionIntensity // [1]: .xy:ViewportUVToRandomUV, .z:AORadiusInShader, .w:Ratio // [2]: .x:ScaleFactor(e.g. 4 if current RT is a quarter in size), .y:InvThreshold, .z:ScaleRadiusInWorldSpace(0:VS/1:WS), .w:MipBlend // [3]: .xy:TemporalAARandomOffset, .z:StaticFraction, .w: InvTanHalfFov // [4]: .x:Multipler for FadeDistance/Radius, .y:Additive for FadeDistance/Radius, .z:clamped HzbStepMipLevelFactorValue .w: unused float4 ScreenSpaceAOParams[5]; SCREEN_PASS_TEXTURE_VIEWPORT(AOViewport) SCREEN_PASS_TEXTURE_VIEWPORT(AOSceneViewport) // needed to prevent AO seam near 16 bit float maximum, this feactor pushed the problem far out and it seems to not have a visual degradion nearby const static float Constant_Float16F_Scale = 4096.0f * 32.0f; // only for MainSetupPS() float ThresholdInverse; float2 InputExtentInverse; /** RGBA8 linear texture containing random normals */ Texture2D RandomNormalTexture; SamplerState RandomNormalTextureSampler; //---------------------------------------------------------------------------------------------------------------------- #include "HZB.ush" float GetHZBDepth(float2 ScreenPos, float MipLevel) { float2 HZBUV = ApplyScreenTransform(ScreenPos, ScreenPosToHZBUVScaleBias); float HZBDepth = Texture2DSampleLevel( HZBTexture, HZBSampler, HZBUV, MipLevel).r; return ConvertFromDeviceZ(HZBDepth); } //---------------------------------------------------------------------------------------------------------------------- Texture2D SSAO_SetupTexture; Texture2D SSAO_NormalsTexture; Texture2D SSAO_DownsampledAO; SamplerState SSAO_Sampler; float2 SSAO_DownsampledAOInverseSize; float2 SSAO_DownsampledAOUVViewportMin; float2 SSAO_DownsampledAOUVViewportMax; float2 SSAO_SvPositionScaleBias; // could be moved to a more central spot // @param ScreenPos -1 .. 1 float3 ReconstructCSPos(float SceneDepth, float2 ScreenPos) { return float3(ScreenPos * SceneDepth, SceneDepth); } // 0: not similar .. 1:very similar float ComputeDepthSimilarity(float DepthA, float DepthB, float TweakScale) { return saturate(1 - abs(DepthA - DepthB) * TweakScale); } float TakeSmallerAbsDelta(float left, float mid, float right) { float a = mid - left; float b = right - mid; return (abs(a) < abs(b)) ? a : b; } // could use ddx,ddy but that would have less quality and would nto work fo ComputeShaders // @return not normalized normal in world space float3 ReconstructNormalFromDepthBuffer(float4 SvPosition) { // could use a modified version of GatherSceneDepth later on float DeviceZ = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 0, 0, 0))); float DeviceZLeft = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(-1, 0, 0, 0))); float DeviceZTop = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, -1, 0, 0))); float DeviceZRight = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(1, 0, 0, 0))); float DeviceZBottom = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 1, 0, 0))); // Favor the surfae we are looking at. Simiar to: http://www.humus.name/index.php?page=3D&ID=84 float DeviceZDdx = TakeSmallerAbsDelta(DeviceZLeft, DeviceZ, DeviceZRight); float DeviceZDdy = TakeSmallerAbsDelta(DeviceZTop, DeviceZ, DeviceZBottom); // can be optimized, is not fully centered but that should not matter much float3 Mid = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 0), DeviceZ, 1)); float3 Right = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(1, 0), DeviceZ + DeviceZDdx, 1)) - Mid; float3 Down = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 1), DeviceZ + DeviceZDdy, 1)) - Mid; return cross(Right, Down); } // @return can be 0,0,0 if we don't have a good input normal float3 GetWorldSpaceNormalFromAOInput(float2 UV, float4 SvPosition) { float3 WorldNormal = 0; if (USE_NORMALS) { #if USE_AO_SETUP_AS_INPUT // Low resolution normal computed in the setup (downscaled) pass. WorldNormal = Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, frac(UV)).xyz * 2 - 1; #elif COMPUTE_SHADER || FORWARD_SHADING || SHADING_PATH_MOBILE // Async compute and forward shading don't have access to the gbuffer. WorldNormal = ReconstructNormalFromDepthBuffer(SvPosition); #else #if SUBTRATE_GBUFFER_FORMAT==1 FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SvPosition.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel); FSubstratePixelHeader SubstrateHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture); if (SubstrateHeader.SubstrateGetBSDFType() != SUBSTRATE_BSDF_TYPE_HAIR) { const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(Substrate.TopLayerTexture.Load(uint3(SvPosition.xy, 0))); WorldNormal = TopLayerData.WorldNormal; } #else // Otherwise sample Gbuffer normals if the shader model has normals. FGBufferData GBuffer = GetGBufferData(UV, false); if( GBuffer.ShadingModelID != SHADINGMODELID_HAIR ) { WorldNormal = GetGBufferData(UV, false).WorldNormal; } #endif #endif } return WorldNormal; } float4 ComputeUpsampleContribution(float SceneDepth, float2 InUV, float3 CenterWorldNormal) { // can be optimized #if AO_UPSAMPLE_QUALITY == 0 const int SampleCount = 4; float2 UV[SampleCount]; UV[0] = InUV + float2(-0.5f, 0.5f) * SSAO_DownsampledAOInverseSize; UV[1] = InUV + float2( 0.5f, 0.5f) * SSAO_DownsampledAOInverseSize; UV[2] = InUV + float2(-0.5f, -0.5f) * SSAO_DownsampledAOInverseSize; UV[3] = InUV + float2( 0.5f, -0.5f) * SSAO_DownsampledAOInverseSize; #else // AO_UPSAMPLE_QUALITY == 0 const int SampleCount = 9; float2 UV[SampleCount]; UV[0] = InUV + float2( -1, -1) * SSAO_DownsampledAOInverseSize; UV[1] = InUV + float2( 0, -1) * SSAO_DownsampledAOInverseSize; UV[2] = InUV + float2( 1, -1) * SSAO_DownsampledAOInverseSize; UV[3] = InUV + float2( -1, 0) * SSAO_DownsampledAOInverseSize; UV[4] = InUV + float2( 0, 0) * SSAO_DownsampledAOInverseSize; UV[5] = InUV + float2( 1, 0) * SSAO_DownsampledAOInverseSize; UV[6] = InUV + float2( -1, 1) * SSAO_DownsampledAOInverseSize; UV[7] = InUV + float2( 0, 1) * SSAO_DownsampledAOInverseSize; UV[8] = InUV + float2( 1, 1) * SSAO_DownsampledAOInverseSize; #endif // AO_UPSAMPLE_QUALITY == 0 // to avoid division by 0 float SmallValue = 0.0001f; // we could weight the samples better but tests didn't show much difference float WeightSum = SmallValue; float4 Ret = float4(SmallValue,0,0,0); float InvThreshold = ScreenSpaceAOParams[2].y; float MinIteration = 1.0f; UNROLL for(int i = 0; i < SampleCount; ++i) { #if SHADING_PATH_MOBILE float2 ClampedUV = UV[i]; #else float2 ClampedUV = clamp(UV[i], SSAO_DownsampledAOUVViewportMin, SSAO_DownsampledAOUVViewportMax); #endif float4 SampleValue = Texture2DSample(SSAO_DownsampledAO, SSAO_Sampler, ClampedUV); MinIteration = min(MinIteration, SampleValue.g); float4 NormalAndSampleDepth = Texture2DSample(SSAO_NormalsTexture, SSAO_Sampler, ClampedUV); float SampleDepth = NormalAndSampleDepth.a * Constant_Float16F_Scale; // when tweaking this constant look for crawling pattern at edges float Weight = ComputeDepthSimilarity(SampleDepth, SceneDepth, 0.003f); if (USE_NORMALS) { float3 LocalWorldNormal = NormalAndSampleDepth.xyz*2-1; Weight *= saturate(dot(LocalWorldNormal, CenterWorldNormal)); } // todo: 1 can be put into the input to save an instruction Ret += float4(SampleValue.rgb, 1) * Weight; WeightSum += Weight; } Ret /= WeightSum; Ret.g = MinIteration; return Ret; } // to blend between upsampled and current pass data float ComputeLerpFactor() { // set up on C++ side float MipBlend = ScreenSpaceAOParams[2].w; float AOLerpFactor = MipBlend; #if AO_SAMPLE_QUALITY == 0 // we have no AO, we only use the upsampled data AOLerpFactor = 1.0f; #endif #if USE_UPSAMPLE == 0 // if there is no former pass we cannot use the data AOLerpFactor = 0.0f; #endif return AOLerpFactor; } // @return NormAngle means 0..1 is actually 0..PI float acosApproxNormAngle(float x) { // todo: expose // 1: is a good linear approximation, 0.9f seems to look good float ContrastTweak = 0.9f; // correct: acos(x) / PI // linear approximation: saturate((1 - x) * 0.5f); // pretty good approximation with contrast tweak return saturate((1 - x) * 0.5f * ContrastTweak); } // @return float3(InvNormAngleL, InvNormAngleR, Weight) float3 WedgeWithNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float3 ScaledViewSpaceNormal, float InvHaloSize, float MipLevel) { float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom; float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom; float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel); float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel); float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL); float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR); float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix; float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix; #if OPTIMIZATION_O1 float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) / dot(DeltaL, DeltaL)); float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) / dot(DeltaR, DeltaR)); float Weight = 1; #else float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) * rsqrt(dot(DeltaL, DeltaL))); float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) * rsqrt(dot(DeltaR, DeltaR))); float Weight = saturate(1.0f - length(DeltaL) * InvHaloSize) * saturate(1.0f - length(DeltaR) * InvHaloSize); #endif return float3(InvNormAngleL, InvNormAngleR, Weight); } // @return float2(InvNormAngle, Weight) float2 WedgeNoNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float InvHaloSize, float MipLevel) { float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom; float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom; float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel); float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel); float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL); float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR); float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix; float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix; float WeightLeft; float3 SamplePositionLeft; { WeightLeft = 1; #if !OPTIMIZATION_O1 WeightLeft = saturate(1.0f - length(DeltaL) * InvHaloSize); #endif } float WeightRight; float3 SamplePositionRight; { WeightRight = 1; #if !OPTIMIZATION_O1 WeightRight = saturate(1.0f - length(DeltaR) * InvHaloSize); #endif } float FlatSurfaceBias = 5.0f; float left = ViewSpacePosition.z - AbsL; float right = ViewSpacePosition.z - AbsR; // OptionA: accurate angle computation float NormAngle = acosApproxNormAngle( dot(DeltaL, DeltaR) / sqrt(length2(DeltaL) * length2(DeltaR))); // OptionB(fade out in near distance): float NormAngle = acosApproxNormAngle( (- left - right) * 20); // OptionC(look consistent but more noisy, should be much faster): float NormAngle = 0; // not 100% correct but simple // bias is needed to avoid flickering on almost perfectly flat surfaces // if((leftAbs + rightAbs) * 0.5f > SceneDepth - 0.0001f) if(left + right < FlatSurfaceBias) { // fix concave case NormAngle = 1; } // to avoid halos around objects float Weight = 1; float InvAmbientOcclusionDistance = ScreenSpaceAOParams[0].z; float ViewDepthAdd = 1.0f - ViewSpacePosition.z * InvAmbientOcclusionDistance; Weight *= saturate(SamplePositionL.z * InvAmbientOcclusionDistance + ViewDepthAdd); Weight *= saturate(SamplePositionR.z * InvAmbientOcclusionDistance + ViewDepthAdd); // return float2(1 - NormAngle, (WeightLeft + WeightRight) * 0.5f); return float2((1-NormAngle) / (Weight + 0.001f), Weight); } float3 ReconstructNormal(float2 In) { return float3(In, sqrt(1 - dot(In, In))); } // @param ScreenSpacePos -1..1 // @return 1 if inside the center, 0 if outside float ComputeSampleDebugMask(float2 ScreenSpacePos, float MipLevel) { ScreenSpacePos.x -= 0.5f; ScreenSpacePos.y = frac(ScreenSpacePos.y) - 0.5f; float2 ViewPortSize = AOViewport_ViewportSize; int2 PixelOffsetToCenter = int2(ScreenSpacePos * ViewPortSize * 0.5f); float d = length(PixelOffsetToCenter); // revisit this float radius = 12.0f; // hard return d < radius * exp2(MipLevel); // soft // return saturate(1 - d / (radius * exp2(MipLevel))); } float ComputeMipLevel(int sampleid, int step) { float SamplePos = (sampleid + 0.5f) / SAMPLESET_ARRAY_SIZE; float HzbStepMipLevelFactorValue = ScreenSpaceAOParams[4].z; // use a constant to get better performance //float HzbStepMipLevelFactorValue = 0.5f; // float HzbStepMipLevelFactorValue = 1; float Scale = (step + 1) / (float)SAMPLE_STEPS; // return log2(1.0f + HzbStepMipLevelFactorValue * Scale * SamplePos); return log2(HzbStepMipLevelFactorValue * Scale * SamplePos); } float GetDepthFromAOInput(float2 UV) { #if USE_AO_SETUP_AS_INPUT // low resolution return Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, UV).a * Constant_Float16F_Scale; #else // full resolution return CalcSceneDepth(UV); #endif }