124 lines
5.4 KiB
HLSL
124 lines
5.4 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "VRSShadingRateCommon.ush"
|
|
#include "../Common.ush"
|
|
#include "../SceneTexturesCommon.ush"
|
|
|
|
float2 InputSRIDimensions;
|
|
float2 ScaledSRIDimensions;
|
|
float2 TextureDimensions;
|
|
float2 InvTextureDimensions;
|
|
|
|
float2 ScaledUVOffset;
|
|
float InvDynamicResolutionScale;
|
|
|
|
Texture2D<uint> InputSRITexture;
|
|
RWTexture2D<uint> ScaledSRITexture;
|
|
RWTexture2D<uint> ScaledConservativeSRITexture;
|
|
|
|
groupshared float CachedDepth[THREADGROUP_SIZE / 2][THREADGROUP_SIZE / 2];
|
|
|
|
// This is a large approximation of reprojection in a couple of ways in favor of performance:
|
|
// 1. We only look at reprojection based on the camera, not velocity vectors. I.E. this will be incorrect for dynamic objects. In most cases
|
|
// motion blur will hide this entirely, though I found no perceptable artifacts even with motion blur off
|
|
// 2. The reprojection only does one depth texture fetch in the center of each 16x16 pixel block.
|
|
//
|
|
// Both of these approximations are fine because VRS doesn't require a whole lot of accuracy since the side-effect is
|
|
// just lower resolution in areas that are "wrong" rather than the type of ghosting you'd get from bad reprojection in something
|
|
// like TAA. In practice, I found reprojection is only required in a really small set of scenarios since VRS already preserves
|
|
// silhouettes anyways. The main artifact I saw was materials that have a strip of metal that's not on the edge of a mesh and can
|
|
// cause noticeble aliasing when it's got a bright specular highlight.
|
|
float2 ReprojectBufferUV(uint3 DispatchThreadId, uint3 GroupThreadId, float2 BufferUV, bool bValidThread)
|
|
{
|
|
// Only read depth once in the top left thread for each 2x2 block of thread IDs
|
|
// This shader is primarily bound on reading from the depth buffer on Scarlett. At 4k,
|
|
// doing a single depth buffer tap that gets shared by each 2x2 block of threads ends
|
|
// up making the shader almost 4x faster
|
|
uint2 CachedDepthIndex = uint2(GroupThreadId.x / 2, GroupThreadId.y / 2);
|
|
if( GroupThreadId.x % 2 == 0 && GroupThreadId.y % 2 == 0 && bValidThread)
|
|
{
|
|
const float2 pixelUVSize = InvTextureDimensions;
|
|
|
|
// Center the UV in the middle of the 2x2 block
|
|
float2 UVOffset = ScaledUVOffset;
|
|
float2 DepthUV = UVOffset + (float2(DispatchThreadId.xy) + float2(1, 1)) * pixelUVSize;
|
|
CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y] = LookupDeviceZ(DepthUV);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if(bValidThread)
|
|
{
|
|
float DeviceZ = CachedDepth[CachedDepthIndex.x][CachedDepthIndex.y];
|
|
float2 ScreenPosition = ViewportUVToScreenPos(BufferUVToViewportUV(BufferUV));
|
|
float4 ThisClip = float4(ScreenPosition, DeviceZ, 1);
|
|
float4 PrevClip = mul(ThisClip, View.ClipToPrevClip);
|
|
float2 PrevScreen = PrevClip.xy / PrevClip.w;
|
|
BufferUV = ViewportUVToBufferUV(ScreenPosToViewportUV(PrevScreen));
|
|
}
|
|
return BufferUV;
|
|
}
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void RescaleVariableRateShading(
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
bool bValidThread = DispatchThreadId.x < ScaledSRIDimensions.x && DispatchThreadId.y < ScaledSRIDimensions.y;
|
|
|
|
uint shadingRate = D3D12_SHADING_RATE_2X2 | (D3D12_SHADING_RATE_2X2 << CONSERVATIVE_SHADING_RATE_SHIFT);
|
|
float2 pixelUVSize = InvTextureDimensions;
|
|
const float epsilon = 0.0001;
|
|
|
|
const float2 HalfPixelOffset = float2(0.5, 0.5);
|
|
float2 CenterUV = (float2(DispatchThreadId.xy) + HalfPixelOffset) * pixelUVSize;
|
|
CenterUV = ReprojectBufferUV(DispatchThreadId, GroupThreadId, CenterUV, bValidThread);
|
|
|
|
// FXC doesn't allow a thread to terminate before GroupMemoryBarrierWithGroupSync so
|
|
// this return isn't allowed until right after ReprojectBufferUV
|
|
if(!bValidThread)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Map from pre-scaled UV to post-scaled UV.
|
|
float2 uvStart = CenterUV - pixelUVSize * 0.5 + epsilon;
|
|
float2 uvEnd = CenterUV + pixelUVSize * 0.5 - epsilon;
|
|
|
|
// Convert from UV space to Texel space of the source (pre-scaled) image
|
|
int2 inputCoordStart = uvStart * InvDynamicResolutionScale * TextureDimensions.xy;
|
|
int2 inputCoordEnd = uvEnd * InvDynamicResolutionScale * TextureDimensions.xy;
|
|
|
|
// Clamp start to top left corner of the input SRI.
|
|
inputCoordStart.x = max(inputCoordStart.x, 0.0f);
|
|
inputCoordStart.y = max(inputCoordStart.y, 0.0f);
|
|
|
|
// Clamp end to bottom right corner of the input SRI.
|
|
inputCoordEnd.x = min(inputCoordEnd.x, InputSRIDimensions.x);
|
|
inputCoordEnd.y = min(inputCoordEnd.y, InputSRIDimensions.y);
|
|
|
|
// start.x or y will be greater than end.x or y iff both corners were out of view on the same side.
|
|
// start is never clamped against the bottom-right and end is never clamped against the top-left
|
|
// So the only way start can be greater than end, is if both corners were out of view and only
|
|
// one of them got clamped passed the other.
|
|
bool bOnScreen = all(inputCoordStart <= inputCoordEnd);
|
|
|
|
if(bOnScreen)
|
|
{
|
|
for(uint x = inputCoordStart.x; x <= inputCoordEnd.x; x++)
|
|
{
|
|
for(uint y = inputCoordStart.y; y <= inputCoordEnd.y; y++)
|
|
{
|
|
shadingRate &= InputSRITexture[uint2(x, y)];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Don't apply VRS to anything coming offscreen
|
|
shadingRate = D3D12_SHADING_RATE_1X1 | (D3D12_SHADING_RATE_1X1 << CONSERVATIVE_SHADING_RATE_SHIFT);
|
|
}
|
|
|
|
ScaledSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = shadingRate & D3D12_SHADING_RATE_2X2;
|
|
ScaledConservativeSRITexture[DispatchThreadId.xy + ScaledUVOffset * TextureDimensions] = (shadingRate >> CONSERVATIVE_SHADING_RATE_SHIFT) & D3D12_SHADING_RATE_2X2;
|
|
}
|