Files
UnrealEngine/Engine/Shaders/Private/TemporalSuperResolution/TSRClosestOccluder.ush
2025-05-18 13:04:45 +08:00

332 lines
13 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "TSRCommon.ush"
//------------------------------------------------------- DEFINITIONS
#define CONFIG_SUPPORT_ORTHO_VIEW 1
#define VELOCITY_HOLE_FILLING_BITS 18
#define VELOCITY_HOLE_FILLING_LENGTH_BITS 13
#define VELOCITY_HOLE_FILLING_ANGLE_BITS (VELOCITY_HOLE_FILLING_BITS - VELOCITY_HOLE_FILLING_LENGTH_BITS)
#define VELOCITY_HOLE_FILLING_LENGTH_PRECISION (pow(2.0, 5))
//------------------------------------------------------- PARAMETERS
float2 PrevOutputBufferUVMin;
float2 PrevOutputBufferUVMax;
//------------------------------------------------------- HOLE FILLING VELOCITY ENCODING
CALL_SITE_DEBUGLOC
void PolarCoordinateHoleFillingVelocity(float2 PixelVelocity, out float PixelVelocityAngle, out float PixelVelocityLength)
{
PixelVelocityAngle = atan2Fast(PixelVelocity.y, PixelVelocity.x);
PixelVelocityLength = length(PixelVelocity);
}
CALL_SITE_DEBUGLOC
float2 CartesianHoleFillingVelocity(float PixelVelocityAngle, float PixelVelocityLength)
{
float2 PixelDirection;
sincos(PixelVelocityAngle, /* out */ PixelDirection.y, /* out */ PixelDirection.x);
return PixelDirection * PixelVelocityLength;
}
CALL_SITE_DEBUGLOC
void QuantizeHoleFillingVelocity(float2 PixelVelocity, out float EncodedAngle, out float EncodedLength)
{
const uint AngleBitDepth = 1 << VELOCITY_HOLE_FILLING_ANGLE_BITS;
const float LengthPixelPrecision = VELOCITY_HOLE_FILLING_LENGTH_PRECISION;
float PixelVelocityAngle;
float PixelVelocityLength;
PolarCoordinateHoleFillingVelocity(PixelVelocity, /* out */ PixelVelocityAngle, /* out */ PixelVelocityLength);
EncodedAngle = PixelVelocityAngle * (0.5 * AngleBitDepth / PI) + (0.5 * AngleBitDepth);
EncodedLength = PixelVelocityLength * LengthPixelPrecision;
}
CALL_SITE_DEBUGLOC
uint EncodeHoleFillingVelocity(float2 PixelVelocity)
{
const uint AngleBitDepth = 1 << VELOCITY_HOLE_FILLING_ANGLE_BITS;
const uint LengthBitDepth = 1 << VELOCITY_HOLE_FILLING_LENGTH_BITS;
float EncodedAngle;
float EncodedLength;
QuantizeHoleFillingVelocity(PixelVelocity, /* out */ EncodedAngle, /* out */ EncodedLength);
uint iEncodedAngle = uint(round(EncodedAngle)) & (AngleBitDepth - 1);
uint iEncodedLength = fastClamp(uint(ceil(EncodedLength)), 0u, LengthBitDepth - 1u);
uint EncodedHoleFillingVelocity = iEncodedAngle * LengthBitDepth + iEncodedLength;
return EncodedHoleFillingVelocity;
}
CALL_SITE_DEBUGLOC
void DecodeHoleFillingVelocity(uint EncodedHoleFillingVelocity, out float PixelAngle, out float PixelLength)
{
const uint AngleBitDepth = 1 << VELOCITY_HOLE_FILLING_ANGLE_BITS;
const uint LengthBitDepth = 1 << VELOCITY_HOLE_FILLING_LENGTH_BITS;
const float LengthPixelPrecision = VELOCITY_HOLE_FILLING_LENGTH_PRECISION;
uint iEncodedLength = EncodedHoleFillingVelocity % LengthBitDepth;
uint iEncodedAngle = (EncodedHoleFillingVelocity >> VELOCITY_HOLE_FILLING_LENGTH_BITS) % AngleBitDepth;
float EncodedLength = float(iEncodedLength);
PixelLength = EncodedLength * rcp(LengthPixelPrecision);
float EncodedAngle = float(iEncodedAngle);
PixelAngle = EncodedAngle * (PI * 2.0 / float(AngleBitDepth)) - PI;
}
CALL_SITE_DEBUGLOC
float GetMaxEncodableHoleFillingPixelLength()
{
const uint LengthBitDepth = 1 << VELOCITY_HOLE_FILLING_LENGTH_BITS;
const float LengthPixelPrecision = VELOCITY_HOLE_FILLING_LENGTH_PRECISION;
const float LengthRange = (float(LengthBitDepth) - 1.5) / LengthPixelPrecision;
return LengthRange;
}
//------------------------------------------------------- CLOSEST OCCLUDER SCATTERING
void ScatterClosestOccluder(globallycoherent RWTexture2DArray<uint> PrevAtomicOutput, bool bValidOutputPixel, float2 ScreenPos, float2 FinalScreenVelocity, float PrevClosestDeviceZ)
{
const float PrevUseCountQuantization = 63.0;
float2 PrevScreenPos = ScreenPos - FinalScreenVelocity.xy;
float2 PrevInputBufferUV = (InputInfo_ScreenPosToViewportScale * PrevScreenPos + InputInfo_ScreenPosToViewportBias) * InputInfo_ExtentInverse;
uint EncodedHoleFillingVelocity = EncodeHoleFillingVelocity(FinalScreenVelocity.xy * ScreenVelocityToInputPixelVelocity);
bool bValidHistoryCoord = bValidOutputPixel && all(and(PrevInputBufferUV > PrevOutputBufferUVMin, PrevInputBufferUV < PrevOutputBufferUVMax));
FBilinearSampleInfos BilinearInter = GetBilinearSampleLevelInfos(PrevInputBufferUV, InputInfo_Extent, InputInfo_ExtentInverse);
#if CONFIG_SUPPORT_ORTHO_VIEW
BRANCH
if (IsOrthoProjection())
{
uint Depth = clamp(uint(round(PrevClosestDeviceZ * 8388607.0)), 0, 8388607) << 9;
uint PrevClosestDepth0 = Depth | ((EncodedHoleFillingVelocity >> 9u) & 0x1FFu);
uint PrevClosestDepth1 = Depth | ((EncodedHoleFillingVelocity >> 0u) & 0x1FFu);
UNROLL_N(4)
for (uint i = 0; i < 4; i++)
{
float Weight = GetSampleWeight(BilinearInter, i);
uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i);
uint FinalAdd = uint(round(Weight * PrevUseCountQuantization));
PixelPos.x = ((FinalAdd > 0) && bValidHistoryCoord) ? PixelPos.x : (~0);
uint PreviousDepth0;
InterlockedMax(PrevAtomicOutput[tsr_ushort3(PixelPos, 0)], PrevClosestDepth0, /* out */ PreviousDepth0);
PixelPos.x = select(PreviousDepth0 < PrevClosestDepth0, PixelPos.x, ~0u);
InterlockedMax(PrevAtomicOutput[tsr_ushort3(PixelPos, 1)], PrevClosestDepth1);
}
}
else
#endif // CONFIG_SUPPORT_ORTHO_VIEW
{
uint PrevClosestDepth = (f32tof16(PrevClosestDeviceZ) << VELOCITY_HOLE_FILLING_BITS) | EncodedHoleFillingVelocity;
UNROLL_N(4)
for (uint i = 0; i < 4; i++)
{
float Weight = GetSampleWeight(BilinearInter, i);
uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i);
uint FinalAdd = uint(round(Weight * PrevUseCountQuantization));
PixelPos.x = ((FinalAdd > 0) && bValidHistoryCoord) ? PixelPos.x : (~0);
InterlockedMax(PrevAtomicOutput[tsr_ushort3(PixelPos, 0)], PrevClosestDepth);
}
}
} // ScatterClosestOccluder()
void LoadPrevAtomicTexturesSamples(
Texture2DArray<uint> PrevAtomicTextureArray,
float2 PrevScreenPos,
out uint HistoryClosestDeviceZSamples0[4],
out uint HistoryClosestDeviceZSamples1[4])
{
float2 PrevInputBufferUV = (InputInfo_ScreenPosToViewportScale * PrevScreenPos + InputInfo_ScreenPosToViewportBias) * InputInfo_ExtentInverse;
PrevInputBufferUV = INVARIANT(PrevInputBufferUV);
FBilinearSampleInfos BilinearInter = GetBilinearSampleLevelInfos(PrevInputBufferUV, InputInfo_Extent, InputInfo_ExtentInverse);
UNROLL_N(4)
for (uint i = 0; i < 4; i++)
{
float BilinearWeight = GetSampleWeight(BilinearInter, i);
uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i);
HistoryClosestDeviceZSamples0[i] = PrevAtomicTextureArray[uint3(PixelPos, 0)];
#if CONFIG_SUPPORT_ORTHO_VIEW
{
HistoryClosestDeviceZSamples1[i] = PrevAtomicTextureArray[uint3(PixelPos, 1)];
}
#else
{
HistoryClosestDeviceZSamples1[i] = 0;
}
#endif
}
} // LoadPrevAtomicTexturesSamples()
void ProcessPrevAtomicTexturesSamples(
uint HistoryClosestDeviceZSamples0[4],
uint HistoryClosestDeviceZSamples1[4],
float2 ScreenPos,
float2 ScreenVelocity,
float PrevDeviceZ,
float DeviceZError,
bool bIsOffScreen,
out bool bIsParallaxDisocclusion,
out float2 HoleFillingPixelVelocity,
out bool bCanHoleFill)
{
float WorldDepth = ConvertFromDeviceZ(PrevDeviceZ);
float WorldDepthError = abs(WorldDepth - ConvertFromDeviceZ(PrevDeviceZ + DeviceZError));
float2 PrevScreenPos = ScreenPos - ScreenVelocity;
float2 PrevInputBufferUV = (InputInfo_ScreenPosToViewportScale * PrevScreenPos + InputInfo_ScreenPosToViewportBias) * InputInfo_ExtentInverse;
PrevInputBufferUV = INVARIANT(PrevInputBufferUV);
FBilinearSampleInfos BilinearInter = GetBilinearSampleLevelInfos(PrevInputBufferUV, InputInfo_Extent, InputInfo_ExtentInverse);
tsr_half ParallaxRejectionMask = tsr_half(0.0);
bool bIsValidHoleFillingPixelVelocity = false;
uint EncodedHoleFillingVelocity = 0;
#if CONFIG_SUPPORT_ORTHO_VIEW
BRANCH
if (IsOrthoProjection())
{
float ClosestDeviceZ = 0.0f;
UNROLL_N(4)
for (uint i = 0; i < 4; i++)
{
float BilinearWeight = GetSampleWeight(BilinearInter, i);
uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i);
bool bValidPixelPos = all(and(PixelPos >= InputInfo_ViewportMin, PixelPos < InputInfo_ViewportMax));
uint SampleHistoryClosestDeviceZ0 = HistoryClosestDeviceZSamples0[i];
uint SampleHistoryClosestDeviceZ1 = HistoryClosestDeviceZSamples1[i];
uint SampleEncodedHoleFillingVelocity = ((SampleHistoryClosestDeviceZ0 & 0x1FFu) << 9) | (SampleHistoryClosestDeviceZ1 & 0x1FFu);
float HistoryClosestDeviceZ = float((SampleHistoryClosestDeviceZ0 & 0xFFFFFE00u) >> 9u) * rcp(8388607.0);
float HistoryClosestWorldDepth = ConvertFromDeviceZ(HistoryClosestDeviceZ);
tsr_half DepthRejection;
{
const float PixelDepthError = 3.0;
const float RadiusToDiameter = 2.0;
float WorldDepthEpsilon = GetDepthPixelRadiusForProjectionType(HistoryClosestWorldDepth) * PixelDepthError * RadiusToDiameter;
WorldDepthEpsilon += WorldDepthError;
float DeltaDepth = abs(HistoryClosestWorldDepth - WorldDepth);
DepthRejection = saturate(tsr_half(2.0) - tsr_half(DeltaDepth / WorldDepthEpsilon));
}
bIsValidHoleFillingPixelVelocity = bIsValidHoleFillingPixelVelocity || bValidPixelPos;
FLATTEN
if (bValidPixelPos && HistoryClosestDeviceZ > ClosestDeviceZ)
{
EncodedHoleFillingVelocity = SampleEncodedHoleFillingVelocity;
}
ParallaxRejectionMask = select(bValidPixelPos, ParallaxRejectionMask + tsr_half(BilinearWeight) * DepthRejection, ParallaxRejectionMask);
}
}
else
#endif // CONFIG_SUPPORT_ORTHO_VIEW
{
UNROLL_N(4)
for (uint i = 0; i < 4; i++)
{
float BilinearWeight = GetSampleWeight(BilinearInter, i);
uint2 PixelPos = GetSamplePixelCoord(BilinearInter, i);
bool bValidPixelPos = all(and(PixelPos >= InputInfo_ViewportMin, PixelPos < InputInfo_ViewportMax));
uint SampleHistoryClosestDeviceZ = HistoryClosestDeviceZSamples0[i];
float HistoryClosestDeviceZ = f16tof32(SampleHistoryClosestDeviceZ >> VELOCITY_HOLE_FILLING_BITS);
float HistoryClosestWorldDepth = ConvertFromDeviceZ(HistoryClosestDeviceZ);
tsr_half DepthRejection;
{
const float PixelDepthError = 3.0;
const float RadiusToDiameter = 2.0;
float WorldDepthEpsilon = GetDepthPixelRadiusForProjectionType(HistoryClosestWorldDepth) * PixelDepthError * RadiusToDiameter;
WorldDepthEpsilon += WorldDepthError;
float DeltaDepth = abs(HistoryClosestWorldDepth - WorldDepth);
DepthRejection = saturate(tsr_half(2.0) - tsr_half(DeltaDepth / WorldDepthEpsilon));
}
bIsValidHoleFillingPixelVelocity = bIsValidHoleFillingPixelVelocity || bValidPixelPos;
EncodedHoleFillingVelocity = select(bValidPixelPos, max(EncodedHoleFillingVelocity, SampleHistoryClosestDeviceZ), EncodedHoleFillingVelocity);
ParallaxRejectionMask = select(bValidPixelPos, ParallaxRejectionMask + tsr_half(BilinearWeight) * DepthRejection, ParallaxRejectionMask);
}
}
float HoleFillingPixelAngle;
float HoleFillingPixelLength;
DecodeHoleFillingVelocity(EncodedHoleFillingVelocity, /* out */ HoleFillingPixelAngle, /* out */ HoleFillingPixelLength);
bool bIsEncodablePixelLength = HoleFillingPixelLength < GetMaxEncodableHoleFillingPixelLength();
HoleFillingPixelVelocity = CartesianHoleFillingVelocity(HoleFillingPixelAngle, HoleFillingPixelLength);
bCanHoleFill = bIsValidHoleFillingPixelVelocity && bIsEncodablePixelLength;
// Dim down the parallax rejection mask if the hole fill velocity matches
FLATTEN
if (bIsEncodablePixelLength)
{
const float AnglePrecision = 2.0 * PI * pow(0.5, VELOCITY_HOLE_FILLING_ANGLE_BITS);
const float LengthPixelPrecision = VELOCITY_HOLE_FILLING_LENGTH_PRECISION;
float2 PixelVelocity = ScreenVelocity * ScreenVelocityToInputPixelVelocity;
float PixelVelocityAngle, PixelVelocityLength;
PolarCoordinateHoleFillingVelocity(PixelVelocity, /* out */ PixelVelocityAngle, /* out */ PixelVelocityLength);
float CartesianCoordinatePrecision = AnglePrecision * PixelVelocityLength;
float PixelLengthDiff = abs(HoleFillingPixelLength - PixelVelocityLength) - 2.0;
float PixelAngleDiff = abs(PixelVelocityAngle - HoleFillingPixelAngle);
PixelAngleDiff = min(PixelAngleDiff, 2.0 * PI - PixelAngleDiff);
float VelocityDiff = length(HoleFillingPixelVelocity - PixelVelocity);
float PixelAngleCompare = saturate(2.0 - PixelAngleDiff / AnglePrecision);
float PixelLengthCompare = saturate(1.0 + rcp(LengthPixelPrecision) - PixelLengthDiff);
float MinParallaxRejectionMaskPolar = min(PixelLengthCompare, PixelAngleCompare);
float MinParallaxRejectionMaskCartesian = saturate(1.0 + CartesianCoordinatePrecision - VelocityDiff);
float MinParallaxRejectionMask = lerp(MinParallaxRejectionMaskCartesian, MinParallaxRejectionMaskPolar, saturate(min(HoleFillingPixelLength, PixelVelocityLength) - 2.0));
ParallaxRejectionMask = max(ParallaxRejectionMask, tsr_half(MinParallaxRejectionMask));
}
bIsParallaxDisocclusion = !bIsOffScreen && ParallaxRejectionMask < tsr_half(0.5);
} // ProcessPrevAtomicTexturesSamples()