206 lines
6.3 KiB
HLSL
206 lines
6.3 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "VRSShadingRateCommon.ush"
|
|
#include "../Common.ush"
|
|
#include "../ColorSpace.ush"
|
|
|
|
float4 ViewRect;
|
|
float EdgeThreshold;
|
|
float ConservativeEdgeThreshold;
|
|
|
|
Texture2D<float> LuminanceTexture;
|
|
RWTexture2D<uint> HardwareShadingRateImage;
|
|
RWTexture2D<uint> SoftwareShadingRateImage;
|
|
|
|
groupshared half LumaMatrix[HARDWARE_TILE_SIZE][HARDWARE_TILE_SIZE];
|
|
groupshared uint TileSupportedShadingRate;
|
|
|
|
#define SOBEL_DIMENSION 3
|
|
#define USE_WAVE_OP COMPILER_SUPPORTS_WAVE_BIT_ORAND
|
|
|
|
void CalculateShadingRateImageInner(
|
|
uint2 DispatchThreadId,
|
|
uint2 GroupThreadId,
|
|
uint2 GroupId)
|
|
{
|
|
const uint VRSBitMask = D3D12_SHADING_RATE_2X2 | (D3D12_SHADING_RATE_2X2 << CONSERVATIVE_SHADING_RATE_SHIFT);
|
|
const uint SobelWidth = SOBEL_DIMENSION / 2;
|
|
|
|
uint2 LuminanceTextureCoord = uint2(int(ViewRect.x + DispatchThreadId.x), int(ViewRect.y + DispatchThreadId.y));
|
|
|
|
#if !OUTPUT_SOFTWARE_IMAGE
|
|
LumaMatrix[GroupThreadId.y][GroupThreadId.x] = LuminanceTexture[LuminanceTextureCoord];
|
|
#endif
|
|
|
|
TileSupportedShadingRate = VRSBitMask;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Exclude pixels outside our ViewRect
|
|
float2 Dimensions = ViewRect.zw - ViewRect.xy;
|
|
bool InvalidThread = (
|
|
#if !OUTPUT_SOFTWARE_IMAGE
|
|
// When in HW only mode, we only read from the groupshared LumaMatrix, and so skip sobel operations on pixels at the edge of a group
|
|
GroupThreadId.x == HARDWARE_TILE_SIZE - 1 ||
|
|
GroupThreadId.y == HARDWARE_TILE_SIZE - 1 ||
|
|
GroupThreadId.x == 0 ||
|
|
GroupThreadId.y == 0 ||
|
|
#endif
|
|
(float)DispatchThreadId.x >= Dimensions.x - 1 ||
|
|
(float)DispatchThreadId.y >= Dimensions.y - 1);
|
|
|
|
// If Sobel X is over a certain threshold, we rule out 2x1 rates. If Sobel Y is over that threshold, we rule out 1x2 rates.
|
|
// Later, taking the inverse will give us the roughest supported rate for the current lane.
|
|
uint UnsupportedShadingRate = 0x0;
|
|
|
|
if(!InvalidThread)
|
|
{
|
|
float SobelXSum = 0.0;
|
|
float SobelX[SOBEL_DIMENSION][SOBEL_DIMENSION] =
|
|
{
|
|
{-1.0, 0.0, 1.0},
|
|
{-2.0, 0.0, 2.0},
|
|
{-1.0, 0.0, 1.0}
|
|
};
|
|
|
|
float SobelYSum = 0.0;
|
|
float SobelY[SOBEL_DIMENSION][SOBEL_DIMENSION] =
|
|
{
|
|
{-1.0, -2.0, -1.0},
|
|
{ 0.0, 0.0, 0.0},
|
|
{ 1.0, 2.0, 1.0}
|
|
};
|
|
|
|
|
|
for (int x = 0; x < SOBEL_DIMENSION; x++)
|
|
{
|
|
for (int y = 0; y < SOBEL_DIMENSION; y++)
|
|
{
|
|
#if OUTPUT_SOFTWARE_IMAGE
|
|
uint2 LumaCoord = LuminanceTextureCoord + int2(x - SobelWidth, y - SobelWidth);
|
|
float LumaValue = LuminanceTexture[LumaCoord];
|
|
#else
|
|
uint2 LumaCoord = GroupThreadId + int2(x - SobelWidth, y - SobelWidth);
|
|
float LumaValue = LumaMatrix[LumaCoord.y][LumaCoord.x];
|
|
#endif
|
|
SobelXSum += SobelX[y][x] * LumaValue;
|
|
SobelYSum += SobelY[y][x] * LumaValue;
|
|
}
|
|
}
|
|
|
|
if (abs(SobelXSum) > EdgeThreshold)
|
|
{
|
|
UnsupportedShadingRate |= D3D12_SHADING_RATE_2X1;
|
|
}
|
|
|
|
if (abs(SobelXSum) > ConservativeEdgeThreshold)
|
|
{
|
|
UnsupportedShadingRate |= (D3D12_SHADING_RATE_2X1 << CONSERVATIVE_SHADING_RATE_SHIFT);
|
|
}
|
|
|
|
if (abs(SobelYSum) > EdgeThreshold)
|
|
{
|
|
UnsupportedShadingRate |= D3D12_SHADING_RATE_1X2;
|
|
}
|
|
|
|
if (abs(SobelYSum) > ConservativeEdgeThreshold)
|
|
{
|
|
UnsupportedShadingRate |= (D3D12_SHADING_RATE_1X2 << CONSERVATIVE_SHADING_RATE_SHIFT);
|
|
}
|
|
}
|
|
|
|
#if OUTPUT_SOFTWARE_IMAGE
|
|
uint QuadSupportedShadingRate = ~UnsupportedShadingRate;
|
|
|
|
#if USE_WAVE_OP
|
|
QuadSupportedShadingRate &= QuadReadAcrossX(QuadSupportedShadingRate);
|
|
QuadSupportedShadingRate &= QuadReadAcrossY(QuadSupportedShadingRate);
|
|
#endif
|
|
|
|
if (GroupThreadId.x % 2 == 0 && GroupThreadId.y % 2 == 0)
|
|
{
|
|
// Software VRS always uses a 2x2 tile size
|
|
SoftwareShadingRateImage[DispatchThreadId.xy >> 1] = VRSBitMask & QuadSupportedShadingRate;
|
|
}
|
|
#endif
|
|
|
|
#if OUTPUT_HARDWARE_IMAGE
|
|
// Tile size may exceed wave size (and always will for 16x16 tiles), so WaveActiveBitAnd is insufficient
|
|
uint Unused;
|
|
InterlockedAnd(TileSupportedShadingRate, ~UnsupportedShadingRate, Unused);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadId.x == 1 && GroupThreadId.y == 1)
|
|
{
|
|
// Generation is always done in one pass for a view family, so no need to account for stereo offsets
|
|
HardwareShadingRateImage[GroupId.xy] = VRSBitMask & TileSupportedShadingRate;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
uint2 LinearToSwizzled(uint LinearIndex)
|
|
{
|
|
// Just like pixel shaders, compute 2x2 CS quads from thread index so we can leverage the 4 lane cross bar.
|
|
// [0 1][2 3] -> [0 1][4 5]
|
|
// [4 5][6 7] -> [2 3][6 7]
|
|
|
|
// All quads internally have this index order:
|
|
// 0 1
|
|
// 2 3
|
|
|
|
// Ordering of 8x8 group quads (each index represents one four-lane quad)
|
|
// 0 2 4 6
|
|
// 1 3 5 7
|
|
// 8 10 12 14
|
|
// 9 11 13 15
|
|
|
|
// Ordering of 16x16 group quads (rectangular)
|
|
// 0 8 16 24 32 40 48 56
|
|
// 1 9 17 25 33 41 49 57
|
|
// 2 10 18 26 34 42 50 58
|
|
// 3 11 19 27 35 43 51 59
|
|
// 4 12 20 28 36 44 52 60
|
|
// 5 13 21 29 37 45 53 61
|
|
// 6 14 22 30 38 46 54 62
|
|
// 7 15 23 31 39 47 55 63
|
|
|
|
#if HARDWARE_TILE_SIZE == 8
|
|
// Quads organized in squares (shift Y, then X, then X, then Y)
|
|
// Original index has bits 5-4-3-2-1-0
|
|
// SwizzleX = 4-3-0
|
|
// SwizzleY = 5-2-1
|
|
const uint SwizzleX = BitFieldInsertU32(BitFieldMaskU32(1u, 0u), LinearIndex, BitFieldExtractU32(LinearIndex, 3u, 2u));
|
|
const uint SwizzleY = BitFieldInsertU32(BitFieldMaskU32(2u, 0u), BitFieldExtractU32(LinearIndex, 2u, 1u), BitFieldExtractU32(LinearIndex, 3u, 3u));
|
|
#else
|
|
// Quads go down, then right
|
|
// Original index has bits 7-6-5-4-3-2-1-0
|
|
// SwizzleX = 7-6-5-0
|
|
// SwizzleY = 4-3-2-1
|
|
const uint SwizzleX = BitFieldInsertU32(BitFieldMaskU32(1u, 0u), LinearIndex, BitFieldExtractU32(LinearIndex, 4u, 4u));
|
|
const uint SwizzleY = BitFieldExtractU32(LinearIndex, 4u, 1u);
|
|
#endif
|
|
|
|
return uint2(SwizzleX, SwizzleY);
|
|
}
|
|
|
|
#define CALCULATE_SHADING_RATE_IMAGE_THREADS (HARDWARE_TILE_SIZE * HARDWARE_TILE_SIZE)
|
|
|
|
[numthreads(CALCULATE_SHADING_RATE_IMAGE_THREADS, 1, 1)]
|
|
void CalculateShadingRateImage(
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint3 GroupId : SV_GroupID,
|
|
uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
// Remap lanes so that 4 adjacent lanes (e.g. 0,1,2,3) correspond to a pixel quad in the luma texture
|
|
// Required to use QuadReadAcross functions
|
|
uint2 RemappedGroupThreadId = LinearToSwizzled(GroupIndex);
|
|
uint2 RemappedDispatchThreadId = GroupId.xy * uint2(HARDWARE_TILE_SIZE, HARDWARE_TILE_SIZE) + RemappedGroupThreadId;
|
|
|
|
CalculateShadingRateImageInner(
|
|
RemappedDispatchThreadId,
|
|
RemappedGroupThreadId,
|
|
GroupId.xy);
|
|
|
|
} |