391 lines
14 KiB
HLSL
391 lines
14 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
PostprocessAmbientOcclusion.ush: Common code (mostly SSAO) used by
|
|
PostProcessAmbientOcclusion.usf and PostProcessAmbientOcclusionMobile.usf
|
|
=============================================================================*/
|
|
|
|
// [0]: .x:AmbientOcclusionPower, .y:AmbientOcclusionBias/BiasDistance, .z:1/AmbientOcclusionDistance, .w:AmbientOcclusionIntensity
|
|
// [1]: .xy:ViewportUVToRandomUV, .z:AORadiusInShader, .w:Ratio
|
|
// [2]: .x:ScaleFactor(e.g. 4 if current RT is a quarter in size), .y:InvThreshold, .z:ScaleRadiusInWorldSpace(0:VS/1:WS), .w:MipBlend
|
|
// [3]: .xy:TemporalAARandomOffset, .z:StaticFraction, .w: InvTanHalfFov
|
|
// [4]: .x:Multipler for FadeDistance/Radius, .y:Additive for FadeDistance/Radius, .z:clamped HzbStepMipLevelFactorValue .w: unused
|
|
float4 ScreenSpaceAOParams[5];
|
|
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(AOViewport)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(AOSceneViewport)
|
|
|
|
// needed to prevent AO seam near 16 bit float maximum, this feactor pushed the problem far out and it seems to not have a visual degradion nearby
|
|
const static float Constant_Float16F_Scale = 4096.0f * 32.0f;
|
|
|
|
// only for MainSetupPS()
|
|
float ThresholdInverse;
|
|
float2 InputExtentInverse;
|
|
|
|
/** RGBA8 linear texture containing random normals */
|
|
Texture2D RandomNormalTexture;
|
|
SamplerState RandomNormalTextureSampler;
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------
|
|
|
|
#include "HZB.ush"
|
|
|
|
float GetHZBDepth(float2 ScreenPos, float MipLevel)
|
|
{
|
|
float2 HZBUV = ApplyScreenTransform(ScreenPos, ScreenPosToHZBUVScaleBias);
|
|
float HZBDepth = Texture2DSampleLevel( HZBTexture, HZBSampler, HZBUV, MipLevel).r;
|
|
return ConvertFromDeviceZ(HZBDepth);
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------
|
|
|
|
Texture2D SSAO_SetupTexture;
|
|
Texture2D SSAO_NormalsTexture;
|
|
Texture2D SSAO_DownsampledAO;
|
|
SamplerState SSAO_Sampler;
|
|
|
|
float2 SSAO_DownsampledAOInverseSize;
|
|
float2 SSAO_DownsampledAOUVViewportMin;
|
|
float2 SSAO_DownsampledAOUVViewportMax;
|
|
float2 SSAO_SvPositionScaleBias;
|
|
|
|
// could be moved to a more central spot
|
|
// @param ScreenPos -1 .. 1
|
|
float3 ReconstructCSPos(float SceneDepth, float2 ScreenPos)
|
|
{
|
|
return float3(ScreenPos * SceneDepth, SceneDepth);
|
|
}
|
|
|
|
// 0: not similar .. 1:very similar
|
|
float ComputeDepthSimilarity(float DepthA, float DepthB, float TweakScale)
|
|
{
|
|
return saturate(1 - abs(DepthA - DepthB) * TweakScale);
|
|
}
|
|
|
|
float TakeSmallerAbsDelta(float left, float mid, float right)
|
|
{
|
|
float a = mid - left;
|
|
float b = right - mid;
|
|
|
|
return (abs(a) < abs(b)) ? a : b;
|
|
}
|
|
|
|
// could use ddx,ddy but that would have less quality and would nto work fo ComputeShaders
|
|
// @return not normalized normal in world space
|
|
float3 ReconstructNormalFromDepthBuffer(float4 SvPosition)
|
|
{
|
|
// could use a modified version of GatherSceneDepth later on
|
|
float DeviceZ = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 0, 0, 0)));
|
|
float DeviceZLeft = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(-1, 0, 0, 0)));
|
|
float DeviceZTop = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, -1, 0, 0)));
|
|
float DeviceZRight = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(1, 0, 0, 0)));
|
|
float DeviceZBottom = LookupDeviceZ(SvPositionToBufferUV(SvPosition + float4(0, 1, 0, 0)));
|
|
|
|
// Favor the surfae we are looking at. Simiar to: http://www.humus.name/index.php?page=3D&ID=84
|
|
float DeviceZDdx = TakeSmallerAbsDelta(DeviceZLeft, DeviceZ, DeviceZRight);
|
|
float DeviceZDdy = TakeSmallerAbsDelta(DeviceZTop, DeviceZ, DeviceZBottom);
|
|
|
|
// can be optimized, is not fully centered but that should not matter much
|
|
float3 Mid = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 0), DeviceZ, 1));
|
|
float3 Right = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(1, 0), DeviceZ + DeviceZDdx, 1)) - Mid;
|
|
float3 Down = SvPositionToTranslatedWorld(float4(SvPosition.xy + float2(0, 1), DeviceZ + DeviceZDdy, 1)) - Mid;
|
|
|
|
return cross(Right, Down);
|
|
}
|
|
|
|
// @return can be 0,0,0 if we don't have a good input normal
|
|
float3 GetWorldSpaceNormalFromAOInput(float2 UV, float4 SvPosition)
|
|
{
|
|
float3 WorldNormal = 0;
|
|
|
|
if (USE_NORMALS)
|
|
{
|
|
#if USE_AO_SETUP_AS_INPUT
|
|
// Low resolution normal computed in the setup (downscaled) pass.
|
|
WorldNormal = Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, frac(UV)).xyz * 2 - 1;
|
|
#elif COMPUTE_SHADER || FORWARD_SHADING || SHADING_PATH_MOBILE
|
|
// Async compute and forward shading don't have access to the gbuffer.
|
|
WorldNormal = ReconstructNormalFromDepthBuffer(SvPosition);
|
|
#else
|
|
#if SUBTRATE_GBUFFER_FORMAT==1
|
|
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SvPosition.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
|
|
FSubstratePixelHeader SubstrateHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
|
|
if (SubstrateHeader.SubstrateGetBSDFType() != SUBSTRATE_BSDF_TYPE_HAIR)
|
|
{
|
|
const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(Substrate.TopLayerTexture.Load(uint3(SvPosition.xy, 0)));
|
|
WorldNormal = TopLayerData.WorldNormal;
|
|
}
|
|
#else
|
|
// Otherwise sample Gbuffer normals if the shader model has normals.
|
|
FGBufferData GBuffer = GetGBufferData(UV, false);
|
|
if( GBuffer.ShadingModelID != SHADINGMODELID_HAIR )
|
|
{
|
|
WorldNormal = GetGBufferData(UV, false).WorldNormal;
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
return WorldNormal;
|
|
}
|
|
|
|
float4 ComputeUpsampleContribution(float SceneDepth, float2 InUV, float3 CenterWorldNormal)
|
|
{
|
|
// can be optimized
|
|
#if AO_UPSAMPLE_QUALITY == 0
|
|
const int SampleCount = 4;
|
|
float2 UV[SampleCount];
|
|
|
|
UV[0] = InUV + float2(-0.5f, 0.5f) * SSAO_DownsampledAOInverseSize;
|
|
UV[1] = InUV + float2( 0.5f, 0.5f) * SSAO_DownsampledAOInverseSize;
|
|
UV[2] = InUV + float2(-0.5f, -0.5f) * SSAO_DownsampledAOInverseSize;
|
|
UV[3] = InUV + float2( 0.5f, -0.5f) * SSAO_DownsampledAOInverseSize;
|
|
#else // AO_UPSAMPLE_QUALITY == 0
|
|
const int SampleCount = 9;
|
|
float2 UV[SampleCount];
|
|
|
|
UV[0] = InUV + float2( -1, -1) * SSAO_DownsampledAOInverseSize;
|
|
UV[1] = InUV + float2( 0, -1) * SSAO_DownsampledAOInverseSize;
|
|
UV[2] = InUV + float2( 1, -1) * SSAO_DownsampledAOInverseSize;
|
|
UV[3] = InUV + float2( -1, 0) * SSAO_DownsampledAOInverseSize;
|
|
UV[4] = InUV + float2( 0, 0) * SSAO_DownsampledAOInverseSize;
|
|
UV[5] = InUV + float2( 1, 0) * SSAO_DownsampledAOInverseSize;
|
|
UV[6] = InUV + float2( -1, 1) * SSAO_DownsampledAOInverseSize;
|
|
UV[7] = InUV + float2( 0, 1) * SSAO_DownsampledAOInverseSize;
|
|
UV[8] = InUV + float2( 1, 1) * SSAO_DownsampledAOInverseSize;
|
|
#endif // AO_UPSAMPLE_QUALITY == 0
|
|
|
|
// to avoid division by 0
|
|
float SmallValue = 0.0001f;
|
|
|
|
// we could weight the samples better but tests didn't show much difference
|
|
float WeightSum = SmallValue;
|
|
float4 Ret = float4(SmallValue,0,0,0);
|
|
|
|
float InvThreshold = ScreenSpaceAOParams[2].y;
|
|
float MinIteration = 1.0f;
|
|
|
|
UNROLL for(int i = 0; i < SampleCount; ++i)
|
|
{
|
|
#if SHADING_PATH_MOBILE
|
|
float2 ClampedUV = UV[i];
|
|
#else
|
|
float2 ClampedUV = clamp(UV[i], SSAO_DownsampledAOUVViewportMin, SSAO_DownsampledAOUVViewportMax);
|
|
#endif
|
|
float4 SampleValue = Texture2DSample(SSAO_DownsampledAO, SSAO_Sampler, ClampedUV);
|
|
|
|
MinIteration = min(MinIteration, SampleValue.g);
|
|
|
|
float4 NormalAndSampleDepth = Texture2DSample(SSAO_NormalsTexture, SSAO_Sampler, ClampedUV);
|
|
float SampleDepth = NormalAndSampleDepth.a * Constant_Float16F_Scale;
|
|
|
|
// when tweaking this constant look for crawling pattern at edges
|
|
float Weight = ComputeDepthSimilarity(SampleDepth, SceneDepth, 0.003f);
|
|
|
|
if (USE_NORMALS)
|
|
{
|
|
float3 LocalWorldNormal = NormalAndSampleDepth.xyz*2-1;
|
|
Weight *= saturate(dot(LocalWorldNormal, CenterWorldNormal));
|
|
}
|
|
|
|
// todo: 1 can be put into the input to save an instruction
|
|
Ret += float4(SampleValue.rgb, 1) * Weight;
|
|
WeightSum += Weight;
|
|
}
|
|
|
|
Ret /= WeightSum;
|
|
Ret.g = MinIteration;
|
|
|
|
return Ret;
|
|
}
|
|
|
|
// to blend between upsampled and current pass data
|
|
float ComputeLerpFactor()
|
|
{
|
|
// set up on C++ side
|
|
float MipBlend = ScreenSpaceAOParams[2].w;
|
|
|
|
float AOLerpFactor = MipBlend;
|
|
|
|
#if AO_SAMPLE_QUALITY == 0
|
|
// we have no AO, we only use the upsampled data
|
|
AOLerpFactor = 1.0f;
|
|
#endif
|
|
|
|
#if USE_UPSAMPLE == 0
|
|
// if there is no former pass we cannot use the data
|
|
AOLerpFactor = 0.0f;
|
|
#endif
|
|
|
|
return AOLerpFactor;
|
|
}
|
|
|
|
// @return NormAngle means 0..1 is actually 0..PI
|
|
float acosApproxNormAngle(float x)
|
|
{
|
|
// todo: expose
|
|
// 1: is a good linear approximation, 0.9f seems to look good
|
|
float ContrastTweak = 0.9f;
|
|
|
|
// correct: acos(x) / PI
|
|
// linear approximation: saturate((1 - x) * 0.5f);
|
|
// pretty good approximation with contrast tweak
|
|
return saturate((1 - x) * 0.5f * ContrastTweak);
|
|
}
|
|
|
|
// @return float3(InvNormAngleL, InvNormAngleR, Weight)
|
|
float3 WedgeWithNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float3 ScaledViewSpaceNormal, float InvHaloSize, float MipLevel)
|
|
{
|
|
float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom;
|
|
float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom;
|
|
|
|
float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel);
|
|
float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel);
|
|
|
|
float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL);
|
|
float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR);
|
|
|
|
float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix;
|
|
float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix;
|
|
|
|
#if OPTIMIZATION_O1
|
|
float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) / dot(DeltaL, DeltaL));
|
|
float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) / dot(DeltaR, DeltaR));
|
|
float Weight = 1;
|
|
#else
|
|
float InvNormAngleL = saturate(dot(DeltaL, ScaledViewSpaceNormal) * rsqrt(dot(DeltaL, DeltaL)));
|
|
float InvNormAngleR = saturate(dot(DeltaR, ScaledViewSpaceNormal) * rsqrt(dot(DeltaR, DeltaR)));
|
|
|
|
float Weight =
|
|
saturate(1.0f - length(DeltaL) * InvHaloSize)
|
|
* saturate(1.0f - length(DeltaR) * InvHaloSize);
|
|
#endif
|
|
|
|
return float3(InvNormAngleL, InvNormAngleR, Weight);
|
|
}
|
|
|
|
|
|
|
|
// @return float2(InvNormAngle, Weight)
|
|
float2 WedgeNoNormal(float2 ScreenSpacePosCenter, float2 InLocalRandom, float3 InvFovFix, float3 ViewSpacePosition, float InvHaloSize, float MipLevel)
|
|
{
|
|
float2 ScreenSpacePosL = ScreenSpacePosCenter + InLocalRandom;
|
|
float2 ScreenSpacePosR = ScreenSpacePosCenter - InLocalRandom;
|
|
|
|
float AbsL = GetHZBDepth(ScreenSpacePosL, MipLevel);
|
|
float AbsR = GetHZBDepth(ScreenSpacePosR, MipLevel);
|
|
|
|
float3 SamplePositionL = ReconstructCSPos(AbsL, ScreenSpacePosL);
|
|
float3 SamplePositionR = ReconstructCSPos(AbsR, ScreenSpacePosR);
|
|
|
|
float3 DeltaL = (SamplePositionL - ViewSpacePosition) * InvFovFix;
|
|
float3 DeltaR = (SamplePositionR - ViewSpacePosition) * InvFovFix;
|
|
|
|
float WeightLeft;
|
|
float3 SamplePositionLeft;
|
|
{
|
|
WeightLeft = 1;
|
|
|
|
#if !OPTIMIZATION_O1
|
|
WeightLeft = saturate(1.0f - length(DeltaL) * InvHaloSize);
|
|
#endif
|
|
}
|
|
|
|
float WeightRight;
|
|
float3 SamplePositionRight;
|
|
{
|
|
WeightRight = 1;
|
|
|
|
#if !OPTIMIZATION_O1
|
|
WeightRight = saturate(1.0f - length(DeltaR) * InvHaloSize);
|
|
#endif
|
|
}
|
|
|
|
|
|
float FlatSurfaceBias = 5.0f;
|
|
|
|
float left = ViewSpacePosition.z - AbsL;
|
|
float right = ViewSpacePosition.z - AbsR;
|
|
|
|
// OptionA: accurate angle computation
|
|
float NormAngle = acosApproxNormAngle( dot(DeltaL, DeltaR) / sqrt(length2(DeltaL) * length2(DeltaR)));
|
|
// OptionB(fade out in near distance): float NormAngle = acosApproxNormAngle( (- left - right) * 20);
|
|
// OptionC(look consistent but more noisy, should be much faster): float NormAngle = 0;
|
|
|
|
|
|
// not 100% correct but simple
|
|
// bias is needed to avoid flickering on almost perfectly flat surfaces
|
|
// if((leftAbs + rightAbs) * 0.5f > SceneDepth - 0.0001f)
|
|
if(left + right < FlatSurfaceBias)
|
|
{
|
|
// fix concave case
|
|
NormAngle = 1;
|
|
}
|
|
|
|
// to avoid halos around objects
|
|
float Weight = 1;
|
|
|
|
float InvAmbientOcclusionDistance = ScreenSpaceAOParams[0].z;
|
|
float ViewDepthAdd = 1.0f - ViewSpacePosition.z * InvAmbientOcclusionDistance;
|
|
|
|
Weight *= saturate(SamplePositionL.z * InvAmbientOcclusionDistance + ViewDepthAdd);
|
|
Weight *= saturate(SamplePositionR.z * InvAmbientOcclusionDistance + ViewDepthAdd);
|
|
|
|
// return float2(1 - NormAngle, (WeightLeft + WeightRight) * 0.5f);
|
|
return float2((1-NormAngle) / (Weight + 0.001f), Weight);
|
|
}
|
|
|
|
float3 ReconstructNormal(float2 In)
|
|
{
|
|
return float3(In, sqrt(1 - dot(In, In)));
|
|
}
|
|
|
|
|
|
// @param ScreenSpacePos -1..1
|
|
// @return 1 if inside the center, 0 if outside
|
|
float ComputeSampleDebugMask(float2 ScreenSpacePos, float MipLevel)
|
|
{
|
|
ScreenSpacePos.x -= 0.5f;
|
|
|
|
ScreenSpacePos.y = frac(ScreenSpacePos.y) - 0.5f;
|
|
|
|
float2 ViewPortSize = AOViewport_ViewportSize;
|
|
int2 PixelOffsetToCenter = int2(ScreenSpacePos * ViewPortSize * 0.5f);
|
|
|
|
float d = length(PixelOffsetToCenter);
|
|
|
|
// revisit this
|
|
float radius = 12.0f;
|
|
|
|
// hard
|
|
return d < radius * exp2(MipLevel);
|
|
// soft
|
|
// return saturate(1 - d / (radius * exp2(MipLevel)));
|
|
}
|
|
|
|
float ComputeMipLevel(int sampleid, int step)
|
|
{
|
|
float SamplePos = (sampleid + 0.5f) / SAMPLESET_ARRAY_SIZE;
|
|
|
|
float HzbStepMipLevelFactorValue = ScreenSpaceAOParams[4].z;
|
|
// use a constant to get better performance
|
|
//float HzbStepMipLevelFactorValue = 0.5f;
|
|
// float HzbStepMipLevelFactorValue = 1;
|
|
|
|
float Scale = (step + 1) / (float)SAMPLE_STEPS;
|
|
|
|
// return log2(1.0f + HzbStepMipLevelFactorValue * Scale * SamplePos);
|
|
return log2(HzbStepMipLevelFactorValue * Scale * SamplePos);
|
|
}
|
|
|
|
float GetDepthFromAOInput(float2 UV)
|
|
{
|
|
#if USE_AO_SETUP_AS_INPUT
|
|
// low resolution
|
|
return Texture2DSample(SSAO_SetupTexture, SSAO_Sampler, UV).a * Constant_Float16F_Scale;
|
|
#else
|
|
// full resolution
|
|
return CalcSceneDepth(UV);
|
|
#endif
|
|
}
|