// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "../Common.ush" #include "HairStrandsVisibilityCommon.ush" #include "HairStrandsTileCommon.ush" #define GROUP_THREAD_COUNT (HAIR_TILE_SIZE*HAIR_TILE_SIZE) //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_GENERATION int2 BufferResolution; uint bUintTexture; uint bForceOutputAllTiles; float TransmittanceThreshold; uint IntCoverageThreshold; #if PERMUTATION_INPUT_TYPE == 0 Texture2D InputFloatTexture; #elif PERMUTATION_INPUT_TYPE == 1 Texture2D InputUintTexture; #else #error Input type not defined #endif RWBuffer TileCountBuffer; RWBuffer TileHairAllBuffer; RWBuffer TileHairFullBuffer; RWBuffer TileHairPartialBuffer; RWBuffer TileOtherBuffer; #if PERMUTATION_WAVEOPS == 0 // 8x8 bit mask groupshared uint2 s_fMask[GROUP_THREAD_COUNT]; groupshared uint2 s_pMask[GROUP_THREAD_COUNT]; #endif #if PERMUTATION_WAVE_OPS && COMPILER_SUPPORTS_WAVE_SIZE WAVESIZE(64) // PERMUTATION_WAVE_OPS is true only when wave>=64 are available #endif [numthreads(HAIR_TILE_SIZE, HAIR_TILE_SIZE, 1)] void TileMainCS(uint2 DispatchThreadId : SV_DispatchThreadID, uint LinearIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID) { const uint2 PixelCoord = DispatchThreadId + View.ViewRectMin.xy; const bool bIsValid = all(DispatchThreadId < uint2(View.ViewRectMinAndSize.zw)); bool fHair = false; bool pHair = false; if (bIsValid) { if (bForceOutputAllTiles) { // When bForceOutputAllTiles, we generate tiles for all pixels. fHair = true; pHair = false; } else #if PERMUTATION_INPUT_TYPE == 0 { const float Transmittance = InputFloatTexture.Load(uint3(PixelCoord, 0)); fHair = Transmittance <= TransmittanceThreshold; pHair = Transmittance < 1.f; } #elif PERMUTATION_INPUT_TYPE == 1 { const uint Coverage = InputUintTexture.Load(uint3(PixelCoord, 0)); fHair = Coverage >= IntCoverageThreshold; pHair = Coverage > 0;//&& Coverage < IntCoverageThreshold; //this is now consistent with non-int branch below } #endif } #if PERMUTATION_WAVEOPS const bool bIsFull = WaveActiveAllTrue(fHair); const bool bIsPartial = WaveActiveAnyTrue(pHair); if (all(LinearIndex == 0)) { if (bIsFull || bIsPartial) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_ALL], 1, WriteToIndex); TileHairAllBuffer[WriteToIndex] = GroupId.xy; } if (bIsFull) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_FULL], 1, WriteToIndex); TileHairFullBuffer[WriteToIndex] = GroupId.xy; } else if (bIsPartial) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_PARTIAL], 1, WriteToIndex); TileHairPartialBuffer[WriteToIndex] = GroupId.xy; } else { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_OTHER], 1, WriteToIndex); TileOtherBuffer[WriteToIndex] = GroupId.xy; } } #else const uint2 Mask = LinearIndex < 32 ? uint2(1u << LinearIndex, 0u) : uint2(0u, 1u << (LinearIndex - 32u)); s_fMask[LinearIndex] = fHair ? Mask : 0u; s_pMask[LinearIndex] = pHair ? Mask : 0u; GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 32) { s_fMask[LinearIndex] = s_fMask[LinearIndex] | s_fMask[LinearIndex + 32]; s_pMask[LinearIndex] = s_pMask[LinearIndex] | s_pMask[LinearIndex + 32]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 16) { s_fMask[LinearIndex] = s_fMask[LinearIndex] | s_fMask[LinearIndex + 16]; s_pMask[LinearIndex] = s_pMask[LinearIndex] | s_pMask[LinearIndex + 16]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 8) { s_fMask[LinearIndex] = s_fMask[LinearIndex] | s_fMask[LinearIndex + 8]; s_pMask[LinearIndex] = s_pMask[LinearIndex] | s_pMask[LinearIndex + 8]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 4) { s_fMask[LinearIndex] = s_fMask[LinearIndex] | s_fMask[LinearIndex + 4]; s_pMask[LinearIndex] = s_pMask[LinearIndex] | s_pMask[LinearIndex + 4]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 2) { s_fMask[LinearIndex] = s_fMask[LinearIndex] | s_fMask[LinearIndex + 2]; s_pMask[LinearIndex] = s_pMask[LinearIndex] | s_pMask[LinearIndex + 2]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 1) { const uint2 fMask = s_fMask[LinearIndex] | s_fMask[LinearIndex + 1]; const uint2 pMask = s_pMask[LinearIndex] | s_pMask[LinearIndex + 1]; const bool bIsFull = all(fMask == 0xFFFFFFFF); const bool bIsPartial = any(pMask > 0); if (bIsFull || bIsPartial) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_ALL], 1, WriteToIndex); TileHairAllBuffer[WriteToIndex] = GroupId.xy; } if (bIsFull) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_FULL], 1, WriteToIndex); TileHairFullBuffer[WriteToIndex] = GroupId.xy; } else if (bIsPartial) { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_HAIR_PARTIAL], 1, WriteToIndex); TileHairPartialBuffer[WriteToIndex] = GroupId.xy; } else { uint WriteToIndex; InterlockedAdd(TileCountBuffer[HAIRTILE_OTHER], 1, WriteToIndex); TileOtherBuffer[WriteToIndex] = GroupId.xy; } } #endif } #endif // SHADER_TILE_GENERATION //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_COPY_ARGS uint2 TileCountXY; uint TilePerThread_GroupSize; uint bRectPrimitive; uint TileSize; Buffer TileCountBuffer; RWBuffer TileIndirectDrawBuffer; RWBuffer TileIndirectDispatchBuffer; RWBuffer TilePerThreadIndirectDispatchBuffer; RWBuffer TileIndirectRayDispatchBuffer; void FillIndirectBuffers(uint TileType) { const uint TileCount = TileCountBuffer[TileType]; // Indirect draw TileIndirectDrawBuffer[TileType * 4 + 0] = bRectPrimitive > 0 ? 4 : 6; // VertexCountPerInstance TileIndirectDrawBuffer[TileType * 4 + 1] = TileCount; // InstanceCount TileIndirectDrawBuffer[TileType * 4 + 2] = 0; // StartVertexLocation TileIndirectDrawBuffer[TileType * 4 + 3] = 0; // StartInstanceLocation // Indirect dispatch WriteDispatchIndirectArgs(TileIndirectDispatchBuffer, TileType, TileCount >= TileCountXY.x ? TileCountXY.x : TileCount, DivideAndRoundUp(TileCount, TileCountXY.x), 1); // Indirect dispatch with one thread // At 4k, with 8x8 tiles: 4096 x 4096 / 8 x 8 = 512 x 512 tiles // With a group size of 64 (8x8), the total dispatch count should be 512 x 512 / 8 x 8 = 64 x 64 = 4096, which is below the 65k limit per dimension WriteDispatchIndirectArgs(TilePerThreadIndirectDispatchBuffer, TileType, DivideAndRoundUp(TileCount, TilePerThread_GroupSize), 1, 1); // Indrect ray dispatch // Ray tracing dispatch dimensions are defined simply in terms of threads/rays, not thread groups. WriteDispatchIndirectArgs(TileIndirectRayDispatchBuffer, TileType, TileCount * TileSize, TileSize, 1); } [numthreads(HAIRTILE_COUNT, 1, 1)] void MainCS(uint2 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.y == 0 && DispatchThreadId.x < HAIRTILE_COUNT) { uint HairTileType = DispatchThreadId.x; // HairAll, HairFull, HairPartial, Other FillIndirectBuffers(HairTileType); } } #endif // SHADER_TILE_COPY_ARGS //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_DEBUG float3 DebugColor; void MainPS( in FScreenVertexOutput Input, out float4 OutColor0 : SV_Target0) { const uint2 InTileCoord = uint2(Input.Position.xy) >> 3u; // HAIR_TILE_SIZE == 8; const bool bTileX = (InTileCoord.x & 1) == 0; const bool bTileY = (InTileCoord.y & 1) == 0; const bool bChecker = (bTileX && bTileY) || (!bTileX && !bTileY); OutColor0 = 255; OutColor0 = bChecker ? float4(DebugColor * 0.5f, 1.0f) : float4(DebugColor * 1.0f, 1.0f); } #endif //SHADER_TILE_DEBUG //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_DEBUG_PRINT #include "../ShaderPrint.ush" int2 MaxResolution; uint TileGroupSize; uint TileSize; uint TileCount; uint TileType; int2 TileCountXY; uint bRectPrimitive; void AddTileCountLine(inout FShaderPrintContext Context, uint TileType) { const uint ActiveTileCount = HairStrands.HairTileCount[TileType]; Print(Context, TEXT("Count "), FontWhite); if (TileType == HAIRTILE_HAIR_ALL) { Print(Context, TEXT("Hair(All) "), FontEmerald); } else if (TileType == HAIRTILE_HAIR_FULL) { Print(Context, TEXT("Hair(Full) "), FontEmerald); } else if (TileType == HAIRTILE_HAIR_PARTIAL) { Print(Context, TEXT("Hair(Partial)"), FontEmerald); } else if (TileType == HAIRTILE_OTHER) { Print(Context, TEXT("Clear "), FontEmerald); } else { Print(Context, TEXT("Unknown "), FontEmerald); } Print(Context, TEXT(": "), FontWhite); Print(Context, ActiveTileCount, FontYellow); Print(Context, 100.f * float(ActiveTileCount) / float(TileCount), FontOrange); Print(Context, TEXT("%"), FontOrange); Newline(Context); } [numthreads(1, 1, 1)] void MainCS(uint GroupIndex : SV_GroupIndex, uint3 DispatchThreadId : SV_DispatchThreadID) { if (any(DispatchThreadId != 0)) return; // Pixel coord FShaderPrintContext Context = InitShaderPrintContext(true, uint2(50, 50)); Print(Context, TEXT("Tile Count XY : "), FontWhite); Print(Context, TileCountXY, FontEmerald); Newline(Context); Print(Context, TEXT("Tile type : "), FontWhite); FFontColor HairAllColor = FontOrange; FFontColor HairFullColor = FontOrange; FFontColor HairPartialColor = FontOrange; FFontColor OtherColor = FontOrange; if (TileType == HAIRTILE_HAIR_ALL) { Print(Context, TEXT("Hair(All)"), FontOrange); HairAllColor = FontOrange; } else if (TileType == HAIRTILE_HAIR_FULL) { Print(Context, TEXT("Hair(Full)"), FontOrange); HairFullColor = FontOrange; } else if (TileType == HAIRTILE_HAIR_PARTIAL) { Print(Context, TEXT("Hair(Partial)"), FontOrange); HairPartialColor = FontOrange; } else if (TileType == HAIRTILE_OTHER) { Print(Context, TEXT("Clear"), FontOrange); OtherColor = FontOrange; } else { Print(Context, TEXT("Unknown"), FontOrange); } Newline(Context); Print(Context, TEXT("Tile Size : "), FontWhite); Print(Context, TileSize, FontYellow); Newline(Context); Print(Context, TEXT("Tile Threads : "), FontWhite); Print(Context, TileGroupSize, FontYellow); Newline(Context); Print(Context, TEXT("Primitive : "), FontWhite); if (bRectPrimitive) Print(Context, TEXT("Rect."), FontOrange); else Print(Context, TEXT("Triangle"), FontOrange); Newline(Context); Newline(Context); AddTileCountLine(Context, HAIRTILE_HAIR_ALL); AddTileCountLine(Context, HAIRTILE_HAIR_FULL); AddTileCountLine(Context, HAIRTILE_HAIR_PARTIAL); AddTileCountLine(Context, HAIRTILE_OTHER); } #endif //SHADER_TILE_DEBUG_PRINT //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_VS int2 ViewMin; float2 ViewInvSize; uint TileType; Buffer TileDataBuffer; void MainVS( in uint InVertexId : SV_VertexID, in uint InInstanceId : SV_InstanceID, out FScreenVertexOutput Out) { Out = (FScreenVertexOutput)0; const uint2 TileCoord = TileDataBuffer[InInstanceId]; uint2 TileVertex = TileCoord * HAIR_TILE_SIZE; TileVertex.x += InVertexId == 1 || InVertexId == 2 || InVertexId == 4 ? HAIR_TILE_SIZE : 0; TileVertex.y += InVertexId == 2 || InVertexId == 4 || InVertexId == 5 ? HAIR_TILE_SIZE : 0; Out.UV = float2(TileVertex) * ViewInvSize; // No need to take ViewMin into account, as the viewport rect is center on the actual view (i.e., xy:ViewMin) Out.Position = float4(Out.UV * float2(2.0f, -2.0f) + float2(-1.0, 1.0f), 0.5f, 1.0f); } #endif //SHADER_TILE_VS //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_TILE_CLEAR uint TileSize; uint TileType; int2 TileCountXY; int2 ViewRectMin; int2 Resolution; Buffer TileCountBuffer; Buffer TileDataBuffer; RWTexture2D OutTexture; [numthreads(HAIR_TILE_SIZE, HAIR_TILE_SIZE, 1)] void TileMainCS(uint2 InGroupId : SV_GroupID, uint2 InGroupThreadId : SV_GroupThreadID) { const uint TileCount = TileCountBuffer[TileType]; const uint TileIndex1D = InGroupId.x + InGroupId.y * TileCountXY.x; if (TileIndex1D >= TileCount) { return; } const uint2 GroupId = TileDataBuffer[TileIndex1D]; const uint2 PixelCoord = uint2(ViewRectMin) + GroupId * TileSize + InGroupThreadId; if (all(PixelCoord < uint2(Resolution))) { OutTexture[PixelCoord] = 0; } } #endif // SHADER_TILE_CLEAR