// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "Common.ush" #include "DeferredShadingCommon.ush" #include "BRDF.ush" #include "ScreenSpaceReflectionTileCommons.ush" #include "Substrate/Substrate.ush" #include "Substrate/SubstrateEvaluation.ush" // Tiling for pixels that require SSR #ifndef TILE_CATERGORISATION_SHADER #define TILE_CATERGORISATION_SHADER 1 #endif #if TILE_CATERGORISATION_SHADER #if USE_SSR_PRE_PASS_STENCIL Texture2D SSRDepthStencilTexture; #endif float MaxRoughness; float MinSpecular; int bEnableTwoSidedFoliage; int2 TiledViewRes; RWStructuredBuffer TileMaskBufferOut; #if COMPILER_SUPPORTS_WAVE_VOTE groupshared uint bAnySSRPixels; #else groupshared bool ContainsSSR[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY]; #endif bool DoesPixelContainSSR(uint2 PixelPos) { #if USE_SSR_PRE_PASS_STENCIL uint Stencil = SSRDepthStencilTexture.Load(uint3(PixelPos, 0)) STENCIL_COMPONENT_SWIZZLE; return (Stencil & 1U) != 0U; #else // !USE_SSR_PRE_PASS_STENCIL #if SUBTRATE_GBUFFER_FORMAT==1 FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelPos, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel); FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture); const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(SubstratePixelHeader.PackedTopLayerData); bool bRoughnessIsValid = TopLayerData.Roughness <= MaxRoughness; bool bContainValidPixels = SubstratePixelHeader.ClosureCount > 0 && bRoughnessIsValid; BRANCH if (SubstratePixelHeader.ClosureCount == 1) // Only non complex materials will be interpreted for exclusion for now when it comes to Specular values or legacy shading models. { const FSubstrateSubsurfaceHeader SSSHeader = SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, SubstrateAddressing.PixelCoords); const bool bSkipSSSMaterialOverride = true; // We do not want the material data to be affected by any override. FSubstrateBSDF BSDF = UnpackSubstrateBSDFIn(Substrate.MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader, bSkipSSSMaterialOverride); const float BSDFSpecular = F0ToDielectricSpecular(max3(SLAB_F0(BSDF).x, SLAB_F0(BSDF).y, SLAB_F0(BSDF).z)); bContainValidPixels = bRoughnessIsValid && BSDFSpecular >= MinSpecular && !SubstratePixelHeader.IsHair() && !SubstratePixelHeader.IsSingleLayerWater(); bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (BSDF_GETSSSTYPE(BSDF) != SSS_TYPE_TWO_SIDED_WRAP)); const uint BSDFType = BSDF_GETTYPE(BSDF); switch (BSDFType) { case SUBSTRATE_BSDF_TYPE_SLAB: { // Cull special case where specular=0, and roughness=0 if (SLAB_ROUGHNESS(BSDF) == 0 && BSDFSpecular == 0) { bContainValidPixels = false; } // Clearcoat: if the material is clear coat it contains valid pixel. const bool bHaziness = BSDF_GETHASHAZINESS(BSDF); if (bHaziness) { const FHaziness Haziness = UnpackHaziness(SLAB_HAZINESS(BSDF)); const bool bHazeAsSimpleClearCoat = Haziness.bSimpleClearCoat; bContainValidPixels = bContainValidPixels || bHazeAsSimpleClearCoat; } break; } } } return bContainValidPixels; #else float2 BufferUV = (PixelPos + 0.5f) * View.BufferSizeAndInvSize.zw; FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV); bool bContainValidPixels = GBuffer.Roughness <= MaxRoughness && GBuffer.Specular >= MinSpecular && (GBuffer.ShadingModelID != SHADINGMODELID_UNLIT) && (GBuffer.ShadingModelID != SHADINGMODELID_HAIR) && (GBuffer.ShadingModelID != SHADINGMODELID_SINGLELAYERWATER); // Ignore two sided foliage bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (GBuffer.ShadingModelID != SHADINGMODELID_TWOSIDED_FOLIAGE)); // Cull special case where specular=0, and roughness=0 if (GBuffer.Roughness == 0 && GBuffer.Specular == 0) { bContainValidPixels = false; } // Clearcoat: if the material is clear coat it contains valid pixel. bContainValidPixels = bContainValidPixels || (GBuffer.ShadingModelID == SHADINGMODELID_CLEAR_COAT); return bContainValidPixels; #endif #endif // USE_SSR_PRE_PASS_STENCIL } /** * Check all GBuffer pixels and set 1 bit for any 8x8 tile which contains a water pixel */ [numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)] void SSRTileCategorisationMarkCS(uint3 ThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { #if 0 // Slow reference path const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x; if (LinearIndex < 1) { bool bContainsSSR = false; for (uint i = 0; i < SSR_TILE_SIZE_XY; ++i) { for (uint j = 0; j < SSR_TILE_SIZE_XY; ++j) { bContainsSSR = bContainsSSR || DoesPixelContainSSR((ThreadId.xy + uint2(i, j)) + View.ViewRectMin.xy); } } if (bContainsSSR) { uint WriteToIndex; InterlockedAdd(DrawIndirectDataUAV[1], 1, WriteToIndex); InterlockedAdd(DispatchIndirectDataUAV[0], 1); // Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf) SSRTileListDataUAV[WriteToIndex] = PackTileCoord12bits(GroupId.xy); } } #else bool bContainsSSR = false; if (ThreadId.x < uint(View.BufferSizeAndInvSize.x) && ThreadId.y < uint(View.BufferSizeAndInvSize.y)) { bContainsSSR = bContainsSSR || DoesPixelContainSSR(ThreadId.xy + View.ViewRectMin.xy); } bool bWriteTile = false; #if COMPILER_SUPPORTS_WAVE_VOTE if (all(GroupThreadId == 0)) { bAnySSRPixels = 0; } GroupMemoryBarrierWithGroupSync(); const bool bAnySSRPixelsInWave = WaveActiveAnyTrue(bContainsSSR); if (WaveIsFirstLane() && bAnySSRPixelsInWave) { InterlockedAdd(bAnySSRPixels, 1); } GroupMemoryBarrierWithGroupSync(); if (all(GroupThreadId == 0)) { bWriteTile = bAnySSRPixels > 0; } #else const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x; ContainsSSR[LinearIndex] = bContainsSSR; GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 32) // 8*8 = 64 elements to merge { ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 32]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 16) { ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 16]; } GroupMemoryBarrierWithGroupSync(); // The smallest wave size is 16 on Intel hardware. So now we can do simple math operations without group sync. // EDIT: for some reason group sync is needed until the end, otherwise some pixels are missing... if (LinearIndex < 8) { ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 8]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 4) { ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 4]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 2) { ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 2]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 1) { bWriteTile = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 1]; } #endif if (bWriteTile) { // Set bit to indicate tile is occupied. uint MaskLinearIndex = TiledViewRes.x * GroupId.y + GroupId.x; InterlockedOr(TileMaskBufferOut[MaskLinearIndex / 32U], 1U << (MaskLinearIndex % 32U)); } #endif } uint VertexCountPerInstanceIndirect; RWBuffer DrawIndirectDataUAV; RWBuffer DispatchIndirectDataUAV; RWBuffer SSRTileListDataUAV; StructuredBuffer TileMaskBuffer; groupshared uint SharedNumTiles; groupshared uint SharedTileData[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY]; groupshared uint SharedGlobalTileOffset; /** * Every group checks 64 tiles and builds a spatially coherent compacted list of SSR tiles */ [numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)] void SSRTileClassificationBuildListsCS( uint2 GroupId : SV_GroupID, uint2 DispatchThreadId : SV_DispatchThreadID, uint2 GroupThreadId : SV_GroupThreadID) { const uint LinearThreadIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x; if (all(DispatchThreadId == 0)) { // TODO compute clear myself DrawIndirectDataUAV[0] = VertexCountPerInstanceIndirect; // VertexCountPerInstance //DrawIndirectDataUAV[1] // InstanceCount already cleared to 0 //DrawIndirectDataUAV[2] = 0; // StartVertexLocation " " //DrawIndirectDataUAV[3] = 0; // StartInstanceLocation " " DispatchIndirectDataUAV[1] = 1; DispatchIndirectDataUAV[2] = 1; } if (LinearThreadIndex == 0) { SharedNumTiles = 0; } GroupMemoryBarrierWithGroupSync(); //@todo - parallel version if (LinearThreadIndex == 0) { SharedNumTiles = 0; for (uint LocalTileIndex = 0; LocalTileIndex < SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY; ++LocalTileIndex) { // ZOrder tiles to maximize screen locality after converting to 1d for compaction // The tile locality ultimately affects trace coherency, since trace compaction pulls from neighboring tiles uint2 ThreadOffset = ZOrder2D(LocalTileIndex, log2(SSR_TILE_SIZE_XY)); uint2 TileCoordinate = GroupId * SSR_TILE_SIZE_XY + ThreadOffset; if (all(TileCoordinate < TiledViewRes)) { uint MaskLinearIndex = TiledViewRes.x * TileCoordinate.y + TileCoordinate.x; uint Mask = 1u << (MaskLinearIndex % 32u); bool bTileUsed = (TileMaskBuffer[MaskLinearIndex / 32u] & Mask) != 0; if (bTileUsed) { uint TileOffset = SharedNumTiles; // Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf) SharedTileData[TileOffset] = PackTileCoord12bits(TileCoordinate); SharedNumTiles = TileOffset + 1; } } } } GroupMemoryBarrierWithGroupSync(); // Allocate space in the tile list if (LinearThreadIndex == 0 && SharedNumTiles > 0) { InterlockedAdd(DrawIndirectDataUAV[1], SharedNumTiles, SharedGlobalTileOffset); InterlockedAdd(DispatchIndirectDataUAV[0], SharedNumTiles); } GroupMemoryBarrierWithGroupSync(); // Write out tiles if (LinearThreadIndex < SharedNumTiles) { SSRTileListDataUAV[SharedGlobalTileOffset + LinearThreadIndex] = SharedTileData[LinearThreadIndex]; } } #endif