Files
UnrealEngine/Engine/Shaders/Private/TemporalSuperResolution/TSRSpatialAntiAliasing.usf
2025-05-18 13:04:45 +08:00

236 lines
6.5 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "TSRSpatialAntiAliasing.ush"
//------------------------------------------------------- CONFIG
#define TILE_SIZE 8
#if DIM_QUALITY_PRESET == 1
#define CONFIG_ITERATIONS 3
#elif DIM_QUALITY_PRESET == 2
#define CONFIG_ITERATIONS 8
#else
#error unknown DIM_QUALITY_PRESET
#endif
#if CONFIG_COMPILE_FP16
// Take advantage of RDNA's v_pk_*_{uif}16 instructions
#define CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION 1
#else
#define CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION 0
#endif
//------------------------------------------------------- PARAMETERS
Texture2D<tsr_ushort> AntiAliasMaskTexture;
Texture2D<tsr_half> InputSceneColorLdrLumaTexture;
RWTexture2D<tsr_ushort2> AntiAliasingOutput;
//------------------------------------------------------- FUNCTIONS
tsr_ushort2x2 Map8x8Tile2x2LaneDPV(uint GroupThreadIndex)
#if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION
{
tsr_ushort T = tsr_ushort(GroupThreadIndex);
tsr_ushort2 GroupId;
GroupId.x = ((T >> tsr_ushort(1 - 1)) & tsr_ushort(0x03 << 1));
GroupId.y = ((T >> tsr_ushort(0)) & tsr_ushort(0x01)) | ((T >> tsr_ushort(3 - 1)) & tsr_ushort(0x03 << 1));
return dpv_interleave_registers(GroupId, GroupId + tsr_ushort2(1, 0));
}
#else
{
tsr_ushort2 GroupId = Map8x8Tile2x2Lane(GroupThreadIndex);
return dpv_interleave_mono_registers(GroupId);
}
#endif
tsr_half SampleInputLDRLuma(tsr_short2 KernelCenter, tsr_short2 Offset)
{
tsr_short2 SampleInputPixelPos = KernelCenter + ClampPixelOffset(
KernelCenter,
Offset, Offset,
InputPixelPosMin, InputPixelPosMax);
return InputSceneColorLdrLumaTexture[SampleInputPixelPos];
}
tsr_half2 SampleInputLDRLuma(tsr_short2x2 KernelCenter, tsr_short2 Offset)
{
tsr_short2x2 SampleInputPixelPos = KernelCenter + ClampPixelOffset(
KernelCenter,
dpv_interleave_mono_registers(Offset), Offset,
InputPixelPosMin, InputPixelPosMax);
tsr_half2 InputLuma = dpv_interleave_registers(
InputSceneColorLdrLumaTexture[dpv_lo(SampleInputPixelPos)],
InputSceneColorLdrLumaTexture[dpv_hi(SampleInputPixelPos)]);
return InputLuma;
}
//------------------------------------------------------- ENTRY POINT
#if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION
[numthreads(TILE_SIZE * TILE_SIZE / 2, 1, 1)]
#else
[numthreads(TILE_SIZE * TILE_SIZE, 1, 1)]
#endif
void MainCS(
uint2 GroupId : SV_GroupID,
uint GroupThreadIndex : SV_GroupIndex)
{
uint GroupWaveIndex = GetGroupWaveIndex(GroupThreadIndex, /* GroupSize = */ TILE_SIZE * TILE_SIZE);
float4x2 Debug = 0.0;
tsr_short2x2 InputPixelPos = dpv_add(
tsr_short2(InputPixelPosMin) + tsr_short2(GroupId) * tsr_short2(TILE_SIZE, TILE_SIZE),
tsr_short2x2(Map8x8Tile2x2LaneDPV(GroupThreadIndex)));
//Debug[0] = 1.0;
bool2 bAntiAlias;
bAntiAlias[0] = AntiAliasMaskTexture[dpv_lo(InputPixelPos)] > tsr_ushort(0);
bAntiAlias[1] = AntiAliasMaskTexture[dpv_hi(InputPixelPos)] > tsr_ushort(0);
// Whether it is worth to anti-aliasing.
bool bSkipAntiAliaser = all(!bAntiAlias);
tsr_half2x2 TexelOffset;
tsr_half2 NoiseFiltering;
BRANCH
if (bSkipAntiAliaser)
{
TexelOffset = tsr_half(0.0);
NoiseFiltering = tsr_half(0.0);
}
else
{
#if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION && 1
tsr_half InputCache[3 * 4];
UNROLL_N(3)
for (int cy = 0; cy < 3; cy++)
{
UNROLL_N(4)
for (int cx = 0; cx < 4; cx++)
{
const uint ci = cx + 4 * cy;
InputCache[ci] = SampleInputLDRLuma(dpv_lo(InputPixelPos), tsr_short2(cx - 1, cy - 1));
}
}
#define ReadCache(x, y) tsr_half2(InputCache[(x + 1) + 4 * (y + 1)], InputCache[(x + 2) + 4 * (y + 1)])
#else
#define ReadCache(x, y) SampleInputLDRLuma(InputPixelPos, tsr_short2(x, y));
#endif
tsr_half2 InputC = ReadCache( 0, 0);
tsr_half2 InputN = ReadCache( 0, -1);
tsr_half2 InputS = ReadCache( 0, +1);
tsr_half2 InputE = ReadCache(+1, 0);
tsr_half2 InputW = ReadCache(-1, 0);
tsr_half2 InputNE = ReadCache(+1, -1);
tsr_half2 InputNW = ReadCache(-1, -1);
tsr_half2 InputSE = ReadCache(+1, +1);
tsr_half2 InputSW = ReadCache(-1, +1);
tsr_short2x2 BrowseDirection;
tsr_short2x2 EdgeSide;
tsr_half2 EdgeLuma;
FindBrowsingDirection(
InputC,
InputN, InputS, InputE, InputW,
InputNE, InputNW, InputSE, InputSW,
/* out */ NoiseFiltering,
/* out */ BrowseDirection,
/* out */ EdgeSide,
/* out */ EdgeLuma);
tsr_half2 LumaDelta = abs(EdgeLuma - InputC) * 0.5;
bool2 WorthBrowsing = LumaDelta > SPATIAL_ANTI_ALIASER_MIN_LUMIMANCE;
tsr_half2 BrowseDelta = saturate(LumaDelta * rcp(tsr_half(SPATIAL_ANTI_ALIASER_MIN_LUMIMANCE)));
tsr_ushort2 EdgeLengthP, EdgeLengthN;
bool2 bEdgeStopedByIncrementP, bEdgeStopedByIncrementN;
bool2 bEdgeStopedByDecrementP, bEdgeStopedByDecrementN;
BrowseNeighborhoodBilinearOptimized(
/* Iterations = */ CONFIG_ITERATIONS,
/* Texture = */ InputSceneColorLdrLumaTexture,
InputC,
EdgeLuma,
InputPixelPos,
BrowseDirection,
EdgeSide,
/* out */ EdgeLengthP,
/* out */ EdgeLengthN,
/* out */ bEdgeStopedByIncrementP,
/* out */ bEdgeStopedByIncrementN,
/* out */ bEdgeStopedByDecrementP,
/* out */ bEdgeStopedByDecrementN);
tsr_half TexelOffsetLo = ComputeDistanceToEdge(
bEdgeStopedByIncrementN[0],
bEdgeStopedByIncrementP[0],
bEdgeStopedByDecrementN[0],
bEdgeStopedByDecrementP[0],
dpv_lo(EdgeLengthN),
dpv_lo(EdgeLengthP));
tsr_half TexelOffsetHi = ComputeDistanceToEdge(
bEdgeStopedByIncrementN[1],
bEdgeStopedByIncrementP[1],
bEdgeStopedByDecrementN[1],
bEdgeStopedByDecrementP[1],
dpv_hi(EdgeLengthN),
dpv_hi(EdgeLengthP));
FLATTEN
if (!WorthBrowsing[0])
TexelOffsetLo = 0.0;
FLATTEN
if (!WorthBrowsing[1])
TexelOffsetHi = 0.0;
TexelOffset = dpv_scale(tsr_half2x2(EdgeSide), BrowseDelta * dpv_interleave_registers(TexelOffsetLo, TexelOffsetHi));
}
uint2 EncodedTexelOffset = EncodeSpatialAntiAliasingOffset(TexelOffset);
{
tsr_short2x2 OutputPixelPos = InputPixelPos;
OutputPixelPos = InvalidateOutputPixelPos(OutputPixelPos, InputInfo_ViewportMax);
AntiAliasingOutput[dpv_lo(OutputPixelPos)] = tsr_ushort2(dpv_lo(EncodedTexelOffset), round(dpv_lo(NoiseFiltering) * tsr_half(255.0)));
#if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION
AntiAliasingOutput[dpv_hi(OutputPixelPos)] = tsr_ushort2(dpv_hi(EncodedTexelOffset), round(dpv_hi(NoiseFiltering) * tsr_half(255.0)));
#endif
#if DEBUG_OUTPUT
{
DebugOutput[tsr_short3(dpv_lo(OutputPixelPos), 0)] = dpv_lo(Debug);
#if CONFIG_ENABLE_DUAL_PIXEL_VECTORIZATION
DebugOutput[tsr_short3(dpv_hi(OutputPixelPos), 0)] = dpv_hi(Debug);
#endif
}
#endif
}
}