Files
UnrealEngine/Engine/Shaders/Private/DefaultSSRTiles.usf
2025-05-18 13:04:45 +08:00

318 lines
10 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "Common.ush"
#include "DeferredShadingCommon.ush"
#include "BRDF.ush"
#include "ScreenSpaceReflectionTileCommons.ush"
#include "Substrate/Substrate.ush"
#include "Substrate/SubstrateEvaluation.ush"
// Tiling for pixels that require SSR
#ifndef TILE_CATERGORISATION_SHADER
#define TILE_CATERGORISATION_SHADER 1
#endif
#if TILE_CATERGORISATION_SHADER
#if USE_SSR_PRE_PASS_STENCIL
Texture2D<uint2> SSRDepthStencilTexture;
#endif
float MaxRoughness;
float MinSpecular;
int bEnableTwoSidedFoliage;
int2 TiledViewRes;
RWStructuredBuffer<uint> TileMaskBufferOut;
#if COMPILER_SUPPORTS_WAVE_VOTE
groupshared uint bAnySSRPixels;
#else
groupshared bool ContainsSSR[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY];
#endif
bool DoesPixelContainSSR(uint2 PixelPos)
{
#if USE_SSR_PRE_PASS_STENCIL
uint Stencil = SSRDepthStencilTexture.Load(uint3(PixelPos, 0)) STENCIL_COMPONENT_SWIZZLE;
return (Stencil & 1U) != 0U;
#else // !USE_SSR_PRE_PASS_STENCIL
#if SUBTRATE_GBUFFER_FORMAT==1
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelPos, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(SubstratePixelHeader.PackedTopLayerData);
bool bRoughnessIsValid = TopLayerData.Roughness <= MaxRoughness;
bool bContainValidPixels = SubstratePixelHeader.ClosureCount > 0 && bRoughnessIsValid;
BRANCH
if (SubstratePixelHeader.ClosureCount == 1) // Only non complex materials will be interpreted for exclusion for now when it comes to Specular values or legacy shading models.
{
const FSubstrateSubsurfaceHeader SSSHeader = SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, SubstrateAddressing.PixelCoords);
const bool bSkipSSSMaterialOverride = true; // We do not want the material data to be affected by any override.
FSubstrateBSDF BSDF = UnpackSubstrateBSDFIn(Substrate.MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader, bSkipSSSMaterialOverride);
const float BSDFSpecular = F0ToDielectricSpecular(max3(SLAB_F0(BSDF).x, SLAB_F0(BSDF).y, SLAB_F0(BSDF).z));
bContainValidPixels =
bRoughnessIsValid
&& BSDFSpecular >= MinSpecular
&& !SubstratePixelHeader.IsHair()
&& !SubstratePixelHeader.IsSingleLayerWater();
bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (BSDF_GETSSSTYPE(BSDF) != SSS_TYPE_TWO_SIDED_WRAP));
const uint BSDFType = BSDF_GETTYPE(BSDF);
switch (BSDFType)
{
case SUBSTRATE_BSDF_TYPE_SLAB:
{
// Cull special case where specular=0, and roughness=0
if (SLAB_ROUGHNESS(BSDF) == 0 && BSDFSpecular == 0)
{
bContainValidPixels = false;
}
// Clearcoat: if the material is clear coat it contains valid pixel.
const bool bHaziness = BSDF_GETHASHAZINESS(BSDF);
if (bHaziness)
{
const FHaziness Haziness = UnpackHaziness(SLAB_HAZINESS(BSDF));
const bool bHazeAsSimpleClearCoat = Haziness.bSimpleClearCoat;
bContainValidPixels = bContainValidPixels || bHazeAsSimpleClearCoat;
}
break;
}
}
}
return bContainValidPixels;
#else
float2 BufferUV = (PixelPos + 0.5f) * View.BufferSizeAndInvSize.zw;
FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV);
bool bContainValidPixels =
GBuffer.Roughness <= MaxRoughness
&& GBuffer.Specular >= MinSpecular
&& (GBuffer.ShadingModelID != SHADINGMODELID_UNLIT)
&& (GBuffer.ShadingModelID != SHADINGMODELID_HAIR)
&& (GBuffer.ShadingModelID != SHADINGMODELID_SINGLELAYERWATER);
// Ignore two sided foliage
bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (GBuffer.ShadingModelID != SHADINGMODELID_TWOSIDED_FOLIAGE));
// Cull special case where specular=0, and roughness=0
if (GBuffer.Roughness == 0 && GBuffer.Specular == 0)
{
bContainValidPixels = false;
}
// Clearcoat: if the material is clear coat it contains valid pixel.
bContainValidPixels = bContainValidPixels || (GBuffer.ShadingModelID == SHADINGMODELID_CLEAR_COAT);
return bContainValidPixels;
#endif
#endif // USE_SSR_PRE_PASS_STENCIL
}
/**
* Check all GBuffer pixels and set 1 bit for any 8x8 tile which contains a water pixel
*/
[numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)]
void SSRTileCategorisationMarkCS(uint3 ThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID)
{
#if 0
// Slow reference path
const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
if (LinearIndex < 1)
{
bool bContainsSSR = false;
for (uint i = 0; i < SSR_TILE_SIZE_XY; ++i)
{
for (uint j = 0; j < SSR_TILE_SIZE_XY; ++j)
{
bContainsSSR = bContainsSSR || DoesPixelContainSSR((ThreadId.xy + uint2(i, j)) + View.ViewRectMin.xy);
}
}
if (bContainsSSR)
{
uint WriteToIndex;
InterlockedAdd(DrawIndirectDataUAV[1], 1, WriteToIndex);
InterlockedAdd(DispatchIndirectDataUAV[0], 1);
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
SSRTileListDataUAV[WriteToIndex] = PackTileCoord12bits(GroupId.xy);
}
}
#else
bool bContainsSSR = false;
if (ThreadId.x < uint(View.BufferSizeAndInvSize.x) && ThreadId.y < uint(View.BufferSizeAndInvSize.y))
{
bContainsSSR = bContainsSSR || DoesPixelContainSSR(ThreadId.xy + View.ViewRectMin.xy);
}
bool bWriteTile = false;
#if COMPILER_SUPPORTS_WAVE_VOTE
if (all(GroupThreadId == 0))
{
bAnySSRPixels = 0;
}
GroupMemoryBarrierWithGroupSync();
const bool bAnySSRPixelsInWave = WaveActiveAnyTrue(bContainsSSR);
if (WaveIsFirstLane() && bAnySSRPixelsInWave)
{
InterlockedAdd(bAnySSRPixels, 1);
}
GroupMemoryBarrierWithGroupSync();
if (all(GroupThreadId == 0))
{
bWriteTile = bAnySSRPixels > 0;
}
#else
const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
ContainsSSR[LinearIndex] = bContainsSSR;
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32) // 8*8 = 64 elements to merge
{
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 32];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 16];
}
GroupMemoryBarrierWithGroupSync();
// The smallest wave size is 16 on Intel hardware. So now we can do simple math operations without group sync.
// EDIT: for some reason group sync is needed until the end, otherwise some pixels are missing...
if (LinearIndex < 8)
{
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 8];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 4];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 2];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 1)
{
bWriteTile = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 1];
}
#endif
if (bWriteTile)
{
// Set bit to indicate tile is occupied.
uint MaskLinearIndex = TiledViewRes.x * GroupId.y + GroupId.x;
InterlockedOr(TileMaskBufferOut[MaskLinearIndex / 32U], 1U << (MaskLinearIndex % 32U));
}
#endif
}
uint VertexCountPerInstanceIndirect;
RWBuffer<uint> DrawIndirectDataUAV;
RWBuffer<uint> DispatchIndirectDataUAV;
RWBuffer<uint> SSRTileListDataUAV;
StructuredBuffer<uint> TileMaskBuffer;
groupshared uint SharedNumTiles;
groupshared uint SharedTileData[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY];
groupshared uint SharedGlobalTileOffset;
/**
* Every group checks 64 tiles and builds a spatially coherent compacted list of SSR tiles
*/
[numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)]
void SSRTileClassificationBuildListsCS(
uint2 GroupId : SV_GroupID,
uint2 DispatchThreadId : SV_DispatchThreadID,
uint2 GroupThreadId : SV_GroupThreadID)
{
const uint LinearThreadIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
if (all(DispatchThreadId == 0))
{
// TODO compute clear myself
DrawIndirectDataUAV[0] = VertexCountPerInstanceIndirect; // VertexCountPerInstance
//DrawIndirectDataUAV[1] // InstanceCount already cleared to 0
//DrawIndirectDataUAV[2] = 0; // StartVertexLocation " "
//DrawIndirectDataUAV[3] = 0; // StartInstanceLocation " "
DispatchIndirectDataUAV[1] = 1;
DispatchIndirectDataUAV[2] = 1;
}
if (LinearThreadIndex == 0)
{
SharedNumTiles = 0;
}
GroupMemoryBarrierWithGroupSync();
//@todo - parallel version
if (LinearThreadIndex == 0)
{
SharedNumTiles = 0;
for (uint LocalTileIndex = 0; LocalTileIndex < SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY; ++LocalTileIndex)
{
// ZOrder tiles to maximize screen locality after converting to 1d for compaction
// The tile locality ultimately affects trace coherency, since trace compaction pulls from neighboring tiles
uint2 ThreadOffset = ZOrder2D(LocalTileIndex, log2(SSR_TILE_SIZE_XY));
uint2 TileCoordinate = GroupId * SSR_TILE_SIZE_XY + ThreadOffset;
if (all(TileCoordinate < TiledViewRes))
{
uint MaskLinearIndex = TiledViewRes.x * TileCoordinate.y + TileCoordinate.x;
uint Mask = 1u << (MaskLinearIndex % 32u);
bool bTileUsed = (TileMaskBuffer[MaskLinearIndex / 32u] & Mask) != 0;
if (bTileUsed)
{
uint TileOffset = SharedNumTiles;
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
SharedTileData[TileOffset] = PackTileCoord12bits(TileCoordinate);
SharedNumTiles = TileOffset + 1;
}
}
}
}
GroupMemoryBarrierWithGroupSync();
// Allocate space in the tile list
if (LinearThreadIndex == 0 && SharedNumTiles > 0)
{
InterlockedAdd(DrawIndirectDataUAV[1], SharedNumTiles, SharedGlobalTileOffset);
InterlockedAdd(DispatchIndirectDataUAV[0], SharedNumTiles);
}
GroupMemoryBarrierWithGroupSync();
// Write out tiles
if (LinearThreadIndex < SharedNumTiles)
{
SSRTileListDataUAV[SharedGlobalTileOffset + LinearThreadIndex] = SharedTileData[LinearThreadIndex];
}
}
#endif