// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "../Common.ush" #include "HairStrandsVisibilityCommon.ush" #include "HairStrandsTileCommon.ush" #if PERMUTATION_GROUPSIZE ==32 #define TILE_PIXEL_SIZE_X 8 #define TILE_PIXEL_SIZE_Y 4 #elif PERMUTATION_GROUPSIZE == 64 #define TILE_PIXEL_SIZE_X 8 #define TILE_PIXEL_SIZE_Y 8 #else #error Unknown group size #endif int2 OutputResolution; int2 ResolutionOffset; uint MaxNodeCount; uint bSortSampleByDepth; float DepthTheshold; float CosTangentThreshold; float CoverageThreshold; // Allow to scale the transmittance to compensate its asymptotic behavior uint VelocityType; RWStructuredBuffer OutCompactNodeCounter; RWTexture2D OutCompactNodeIndex; RWStructuredBuffer OutCompactNodeVis; RWBuffer OutCompactNodeCoord; RWTexture2D OutCoverageTexture; groupshared uint AllocationNodeCount; groupshared uint AllocationNodeOffset; struct FSampleSetDesc { uint UniqueSampleCount; uint ValidSampleCount; uint HairSampleCount; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// // PPLL /////////////////////////////////////////////////////////////////////////////////////////////////////////////// #if PERMUTATION_PPLL #define PPLL_MAX_RENDER_NODE_COUNT PERMUTATION_PPLL Buffer TileCountBuffer; Buffer TileDataBuffer; int2 TileCountXY; Texture2D PPLLCounter; Texture2D PPLLNodeIndex; StructuredBuffer PPLLNodeData; [numthreads(TILE_PIXEL_SIZE_X, TILE_PIXEL_SIZE_Y, 1)] void MainCS( uint GroupIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { const uint TileSize = HAIR_TILE_SIZE; const uint TileCount = TileCountBuffer[HAIRTILE_HAIR_ALL]; const uint LinearIndex = GroupId.x + GroupId.y * TileCountXY.x; if (LinearIndex >= TileCount) { return; } const uint2 TileCoord = TileDataBuffer[LinearIndex]; const uint2 PixelCoord = TileCoord * TileSize + GroupThreadId.xy; const uint FirstNodeIndex = PPLLNodeIndex[PixelCoord]; float PixelCoverage = 0; const bool bExecute = FirstNodeIndex != 0xFFFFFFFF && all(PixelCoord < uint2(OutputResolution)); uint SortedIndex[PPLL_MAX_RENDER_NODE_COUNT]; float SortedDepth[PPLL_MAX_RENDER_NODE_COUNT]; for (int NodeIndex = 0; NodeIndex < PPLL_MAX_RENDER_NODE_COUNT; ++NodeIndex) { SortedDepth[NodeIndex] = 0.0f; } float TotalTransmittance = 1.0f; uint PixelNodeCount = 0; if (bExecute) { ////////// // First pass: sort PPLL element into nodes we are going to render uint NodeIndex = FirstNodeIndex; while (NodeIndex != 0xFFFFFFFF) { const FPackedHairVisPPLL PackedHairVis = PPLLNodeData[NodeIndex]; const FHairVis HairVis = UnpackHairVisPPLL(PackedHairVis); TotalTransmittance *= 1.0f - HairVis.Coverage; int InsertPos; for (InsertPos = 0; InsertPos < PPLL_MAX_RENDER_NODE_COUNT; ++InsertPos) { if (HairVis.Depth > SortedDepth[InsertPos]) { // The new node is closer break; } } if (InsertPos == PPLL_MAX_RENDER_NODE_COUNT) { // TODOAGREGATE Need to merge the node into last node // ==> cull it out for now } else { // Shift existing nodes from the end for (int Shift = PPLL_MAX_RENDER_NODE_COUNT - 1; Shift > InsertPos; --Shift) // TODO use PixelNodeCount in place of PPLL_MAX_RENDER_NODE_COUNT { SortedIndex[Shift] = SortedIndex[Shift-1]; SortedDepth[Shift] = SortedDepth[Shift-1]; } // TODOAGREGATE merge last node into agregated material // Insert new node SortedIndex[InsertPos] = NodeIndex; SortedDepth[InsertPos] = HairVis.Depth; PixelNodeCount++; } NodeIndex = PackedHairVis.NextNodeIndex; } } // Initialise group allocation node count if (GroupIndex == 0) { AllocationNodeCount = 0; AllocationNodeOffset= 0; } GroupMemoryBarrierWithGroupSync(); // Now notify how many nodes this thread wants to allocate uint PixelDataSubOffsetInStorage = 0; uint PixelRenderNodecount = min(PixelNodeCount, uint(PPLL_MAX_RENDER_NODE_COUNT)); InterlockedAdd(AllocationNodeCount, PixelRenderNodecount, PixelDataSubOffsetInStorage); GroupMemoryBarrierWithGroupSync(); // Now allocate all the nodes for this group contiguously in memory if (GroupIndex == 0 && AllocationNodeCount > 0) { InterlockedAdd(OutCompactNodeCounter[0], AllocationNodeCount, AllocationNodeOffset); } GroupMemoryBarrierWithGroupSync(); uint OffsetInStorage = AllocationNodeOffset + PixelDataSubOffsetInStorage; if (bExecute) { ////////// // Second pass: compute total coverage for validated nodes we are going to render float ValidPixelSampleTotalCoverage = 0.0f; float SortedCoverage[PPLL_MAX_RENDER_NODE_COUNT]; float TotalSortedTransmittance = 1.0f; for (uint i = 0; i < PixelRenderNodecount; ++i) { const uint NodePPLLIndex = SortedIndex[i]; const FPackedHairVisPPLL PackedHairVis = PPLLNodeData[NodePPLLIndex]; const FHairVis HairVis = UnpackHairVisPPLL(PackedHairVis); const float Coverage = HairVis.Coverage; // This should be 16bits, instead of 8bits for better precision // Update current node coverage as a function of previous nodes coverage SortedCoverage[i] = TotalSortedTransmittance * Coverage; // Update transmittance for the next strands TotalSortedTransmittance *= 1.0f - Coverage; // Accumulate total coverage. ValidPixelSampleTotalCoverage += SortedCoverage[i]; } ////////// // Third pass: write out compact nodes for rendering for (uint j = 0; j < PixelRenderNodecount; ++j) { const uint NodePPLLIndex = SortedIndex[j]; FPackedHairVis PackedHairVis = ConvertToPackedHairVis(PPLLNodeData[NodePPLLIndex]); // Coverage8bit is a weight normalising to 1 the contribution of all the compacted samples. Because later it is weighted by PixelCoverage. // Patch the coverage on the out node const float PatchedCoverage8bit = To8bitCoverage(SortedCoverage[j] / float(ValidPixelSampleTotalCoverage)); PatchPackedHairVisCoverage(PackedHairVis, PatchedCoverage8bit); // TODOAGREGATE if last, Create FPackedHairSample from aggregated data OutCompactNodeVis[OffsetInStorage + j] = PackedHairVis; OutCompactNodeCoord[OffsetInStorage + j] = PixelCoord; } PixelCoverage = TransmittanceToCoverage(TotalTransmittance, CoverageThreshold); } FNodeDesc NodeDesc; NodeDesc.Count = PixelRenderNodecount; NodeDesc.Offset = OffsetInStorage; OutCompactNodeIndex[PixelCoord] = EncodeNodeDesc(NodeDesc); OutCoverageTexture[PixelCoord] = PixelCoverage; } #else // PERMUTATION_PPLL or PERMUTATION_VISIBILITY /////////////////////////////////////////////////////////////////////////////////////////////////////////////// // MSAA visiblity buffer /////////////////////////////////////////////////////////////////////////////////////////////////////////////// #ifndef PERMUTATION_MSAACOUNT #error PERMUTATION_MSAACOUNT is not defined #endif #define MERGE_SAMPLE 0 Texture2D SceneDepthTexture; #if PERMUTATION_MSAACOUNT > 1 void ComputeUniqueSamplesWithoutDepthTest( inout uint4 OutSamples[PERMUTATION_MSAACOUNT], inout FSampleSetDesc OutSet, const uint2 PixelCoord, const float OpaqueDepth, Texture2DMS InMSAA_IDTexture, Texture2DMS InMSAA_DepthTexture) { OutSet.UniqueSampleCount = 0; OutSet.ValidSampleCount = 0; OutSet.HairSampleCount = PERMUTATION_MSAACOUNT; for (uint SampleIt = 0; SampleIt < OutSet.HairSampleCount; ++SampleIt) { // Note: InMSAA_IDTexture contains both the primitive ID and tha material ID. However // the material ID is constant along the primitive, so it is correct to use this as a // sorting/deduplication key const uint HairControlPointId = InMSAA_IDTexture.Load(PixelCoord, SampleIt); const bool bIsValid = HairControlPointId != GetInvalidHairControlPointId(); if (!bIsValid) continue; const float SampleDepth = InMSAA_DepthTexture.Load(PixelCoord, SampleIt); if (OpaqueDepth > SampleDepth) // Reverse-Z continue; ++OutSet.ValidSampleCount; #if MERGE_SAMPLE const float SceneDepth = ConvertFromDeviceZ(SampleDepth); #endif bool bAlreadyExist = false; for (uint UniqueIt = 0; UniqueIt < OutSet.UniqueSampleCount; ++UniqueIt) { #if MERGE_SAMPLE const float UniqueDepth = asfloat(OutSamples[UniqueIt].w); const float UniqueSceneDepth = ConvertFromDeviceZ(UniqueDepth); const bool bIsSimilar = HairControlPointId == OutSamples[UniqueIt].x || abs(UniqueSceneDepth - SceneDepth) < DepthTheshold; #else const bool bIsSimilar = HairControlPointId == OutSamples[UniqueIt].x; #endif if (bIsSimilar) { OutSamples[UniqueIt].y += 1; // Update the unique sample with the closest depth const uint IntDepth = asuint(SampleDepth); if (IntDepth > OutSamples[UniqueIt].w) { #if MERGE_SAMPLE OutSamples[UniqueIt].x = HairControlPointId; #endif OutSamples[UniqueIt].z = SampleIt; OutSamples[UniqueIt].w = asuint(SampleDepth); } bAlreadyExist = true; break; } } if (!bAlreadyExist) { OutSamples[OutSet.UniqueSampleCount].x = HairControlPointId; OutSamples[OutSet.UniqueSampleCount].y = 1; OutSamples[OutSet.UniqueSampleCount].z = SampleIt; OutSamples[OutSet.UniqueSampleCount].w = asuint(SampleDepth); ++OutSet.UniqueSampleCount; } } // Sort sample from closer to further. This is used later for updating sample coverage // based on ordered transmittance. See HairStrandsVisibilityComputeSampleCoverage.usf for more details. if (bSortSampleByDepth > 0) { for (uint i = 0; i < OutSet.UniqueSampleCount; ++i) { const uint DepthI = OutSamples[i].w; for (uint j = i+1; j < OutSet.UniqueSampleCount; ++j) { const uint DepthJ = OutSamples[j].w; if (DepthJ > DepthI) { uint4 Temp = OutSamples[i]; OutSamples[i] = OutSamples[j]; OutSamples[j] = Temp; } } } } } Texture2DMS MSAA_IDTexture; Texture2DMS MSAA_DepthTexture; #else void ComputeUniqueSamplesWithoutDepthTest( inout uint4 OutSamples[PERMUTATION_MSAACOUNT], inout FSampleSetDesc OutSet, const uint2 PixelCoord, const float OpaqueDepth, Texture2D InMSAA_IDTexture, Texture2D InMSAA_DepthTexture) { OutSet.UniqueSampleCount = 0; OutSet.ValidSampleCount = 0; OutSet.HairSampleCount = PERMUTATION_MSAACOUNT; const uint HairControlPointId = InMSAA_IDTexture.Load(uint3(PixelCoord, 0)); const bool bIsValid = HairControlPointId != GetInvalidHairControlPointId(); if (!bIsValid) return; const float SampleDepth = InMSAA_DepthTexture.Load(uint3(PixelCoord, 0)); if (OpaqueDepth > SampleDepth) // Reverse-Z return; ++OutSet.ValidSampleCount; OutSamples[0].x = HairControlPointId; OutSamples[0].y = 1; OutSamples[0].z = 0; OutSamples[0].w = asuint(SampleDepth); ++OutSet.UniqueSampleCount; } Texture2D MSAA_IDTexture; Texture2D MSAA_DepthTexture; uint2 GetIDTexture(uint2 PixelCoord, uint SampleIt) { return MSAA_IDTexture.Load(uint3(PixelCoord, 0)); } float GetDepthTexture(uint2 PixelCoord, uint SampleIt) { return MSAA_DepthTexture.Load(uint3(PixelCoord, 0)); } #endif // PERMUTATION_MSAACOUNT>1 int2 TileCountXY; uint TileSize; Buffer TileCountBuffer; Buffer TileDataBuffer; Texture2D ViewTransmittanceTexture; [numthreads(TILE_PIXEL_SIZE_X, TILE_PIXEL_SIZE_Y, 1)] void MainCS( uint GroupIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { if (GroupIndex == 0) { AllocationNodeCount = 0; AllocationNodeOffset = 0; } const uint TileCount = TileCountBuffer[HAIRTILE_HAIR_ALL]; const uint LinearIndex = GroupId.x + GroupId.y * TileCountXY.x; if (LinearIndex >= TileCount) { return; } const uint2 TileCoord = TileDataBuffer[LinearIndex]; uint2 PixelCoord = TileCoord * TileSize + GroupThreadId.xy; const bool bIsValid = all(PixelCoord < View.ViewRectMinAndSize.zw); if (!bIsValid) { PixelCoord = uint2(0, 0); } PixelCoord += View.ViewRectMinAndSize.xy; FSampleSetDesc SampleDesc; const float OpaqueDepth = SceneDepthTexture.Load(uint3(PixelCoord, 0)).r; uint4 Samples[PERMUTATION_MSAACOUNT]; // x:ControlPointIdId|MaterialId, y:Weight, z:SampleIt, w:Depth (as uint) ComputeUniqueSamplesWithoutDepthTest( Samples, SampleDesc, PixelCoord, OpaqueDepth, MSAA_IDTexture, MSAA_DepthTexture); FNodeDesc NodeDesc; NodeDesc.Count = SampleDesc.UniqueSampleCount; NodeDesc.Offset = 0; if (NodeDesc.Count > 0) { InterlockedAdd(AllocationNodeCount, NodeDesc.Count, NodeDesc.Offset); } GroupMemoryBarrierWithGroupSync(); if (GroupIndex == 0 && AllocationNodeCount > 0) { InterlockedAdd(OutCompactNodeCounter[0], AllocationNodeCount, AllocationNodeOffset); } GroupMemoryBarrierWithGroupSync(); // Allocate node space float PixelCoverage = 0; if (NodeDesc.Count > 0) { NodeDesc.Offset += AllocationNodeOffset; // Store final sort node data if (NodeDesc.Offset + NodeDesc.Count < MaxNodeCount) { float ClosestDepth = 0; // Inverse-Z for (uint OutIndex = 0; OutIndex < NodeDesc.Count; ++OutIndex) { // VisibilityData.Coverage8bit is a weight normalising to 1 the contribution of all the compacted samples. Because later it is weighted by PixelCoverage. FHairVis OutNodeVis; OutNodeVis.ControlPointId= UnpackHairVisControlPointId(Samples[OutIndex].x); OutNodeVis.Depth = asfloat(Samples[OutIndex].w); // MSAA_DepthTexture.Load(PixelCoord, Samples[OutIndex].z); OutNodeVis.Coverage8bit = To8bitCoverage(Samples[OutIndex].y / float(SampleDesc.ValidSampleCount)); OutNodeVis.MaterialId = UnpackHairVisMaterialId(Samples[OutIndex].x); ClosestDepth = max(ClosestDepth, OutNodeVis.Depth); // Inverse-Z const uint StoreIndex = NodeDesc.Offset + OutIndex; OutCompactNodeVis[StoreIndex] = PackHairVis(OutNodeVis); OutCompactNodeCoord[StoreIndex] = PixelCoord; } NodeDesc.Count = NodeDesc.Count; PixelCoverage = TransmittanceToCoverage(ViewTransmittanceTexture.Load(uint3(PixelCoord, 0)), CoverageThreshold); } } OutCompactNodeIndex[PixelCoord] = EncodeNodeDesc(NodeDesc); OutCoverageTexture[PixelCoord] = PixelCoverage; } #endif // PERMUTATION_PPLL or PERMUTATION_VISIBILITY