// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "../Common.ush" #include "NaniteDataDecode.ush" #include "NaniteHierarchyTraversalCommon.ush" #define CULLING_PASS_NO_OCCLUSION 0 #define CULLING_PASS_OCCLUSION_MAIN 1 #define CULLING_PASS_OCCLUSION_POST 2 #define CULLING_PASS_EXPLICIT_LIST 3 struct FCandidateNode { uint Flags; uint ViewId; uint InstanceId; uint NodeIndex; uint EnabledBitmask; }; uint GetCandidateNodeSize(bool bPostPass) { return bPostPass ? 12u : 8u; } uint GetCandidateClusterSize() { return NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS * 4; } // NodesAndClusterBatches layout: Main Cluster Batches, Main Candidate Nodes, Post Cluster Batches, Post Candidate Nodes uint GetClusterBatchesOffset() { return 0u; } uint GetCandidateNodesOffset() { return GetMaxClusterBatches() * 4u; } uint GetNodesAndBatchesOffset(bool bPostPass) { return bPostPass ? (GetCandidateNodesOffset() + MaxNodes * GetCandidateNodeSize(false)) : 0u; } uint GetCandidateClusterOffset() { return 0u; } void StoreVisibleCluster(RWByteAddressBuffer VisibleClusters, uint ClusterIdx, FVisibleCluster VisibleCluster, bool bHasPageData = false) { uint4 RawData = PackVisibleCluster(VisibleCluster, bHasPageData); #if NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 4 if (bHasPageData) { VisibleClusters.Store4(ClusterIdx * 16, RawData); } else { VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz); } #elif NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 3 if (bHasPageData) { VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz); } else { VisibleClusters.Store2(ClusterIdx * 8, RawData.xy); } #else #error Unexpected visible cluster size! #endif } uint4 PackCandidateNode(FCandidateNode Node) { // Leave at least one bit unused in each of the fields, so 0xFFFFFFFFu is never a valid value. uint4 RawData; RawData.x = (Node.InstanceId << NANITE_NUM_CULLING_FLAG_BITS) | Node.Flags; RawData.y = (Node.ViewId << NANITE_MAX_NODES_PER_PRIMITIVE_BITS) | Node.NodeIndex; RawData.z = Node.EnabledBitmask; RawData.w = 0; checkSlow(RawData.x != 0xFFFFFFFFu && RawData.y != 0xFFFFFFFFu && RawData.z != 0xFFFFFFFFu); return RawData; } FCandidateNode UnpackCandidateNode(uint4 RawData, bool bHasEnabledMask) { FCandidateNode Node; Node.Flags = BitFieldExtractU32(RawData.x, NANITE_NUM_CULLING_FLAG_BITS, 0); Node.InstanceId = BitFieldExtractU32(RawData.x, NANITE_MAX_INSTANCES_BITS, NANITE_NUM_CULLING_FLAG_BITS); Node.NodeIndex = BitFieldExtractU32(RawData.y, NANITE_MAX_NODES_PER_PRIMITIVE_BITS, 0); Node.ViewId = BitFieldExtractU32(RawData.y, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS, NANITE_MAX_NODES_PER_PRIMITIVE_BITS); Node.EnabledBitmask = bHasEnabledMask ? RawData.z : 0xFFFFFFFFu; return Node; } // Load/Store/Clear each have globally coherent buffer versions. Fix this with templates. void StoreCandidateClusterNoCheckCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster) { uint4 RawData = PackVisibleCluster(VisibleCluster, false); #if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3 CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz); #elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2 CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy); #else #error Unexpected candidate cluster size! #endif } void StoreCandidateClusterNoCheck(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster) { uint4 RawData = PackVisibleCluster(VisibleCluster, false); #if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3 CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz); #elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2 CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy); #else #error Unexpected candidate cluster size! #endif } void StoreCandidateClusterCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster) { checkSlow(ClusterIndex < MaxCandidateClusters); StoreCandidateClusterNoCheckCoherent(CandidateClusters, ClusterIndex, VisibleCluster); } void StoreCandidateCluster(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster) { checkSlow(ClusterIndex < MaxCandidateClusters); StoreCandidateClusterNoCheck(CandidateClusters, ClusterIndex, VisibleCluster); } // helper function to work around a shader macro expansion issue (incorrect expansion of self-referential macros) that causes, // e.g., #define MaxNodes NaniteRaster.MaxNodes, to expand to things like NaniteRaster.NaniteRaster.NaniteRaster.MaxNodes when used as parameter to a macro. void CheckNodeIndexHelper(uint NodeIndex) { uint MaxNodesTmp = MaxNodes; checkSlow(NodeIndex < MaxNodesTmp); } uint4 LoadCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset(); return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) : uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0); } uint4 LoadCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset(); return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) : uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0); } void StoreCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset(); if (bPostPass) NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz); else NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy); } void StoreCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset(); if (bPostPass) NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz); else NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy); } void StoreCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass); } void StoreCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass); } void ClearCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass); } void ClearCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass) { CheckNodeIndexHelper(NodeIndex); StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass); } uint LoadClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4); } uint LoadClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4); } void AddToClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add); } void AddToClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add); } void ClearClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0); } void ClearClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass) { checkSlow(BatchIndex < GetMaxClusterBatches()); NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0); }