Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteCulling.ush
2025-05-18 13:04:45 +08:00

223 lines
9.1 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "../Common.ush"
#include "NaniteDataDecode.ush"
#include "NaniteHierarchyTraversalCommon.ush"
#define CULLING_PASS_NO_OCCLUSION 0
#define CULLING_PASS_OCCLUSION_MAIN 1
#define CULLING_PASS_OCCLUSION_POST 2
#define CULLING_PASS_EXPLICIT_LIST 3
struct FCandidateNode
{
uint Flags;
uint ViewId;
uint InstanceId;
uint NodeIndex;
uint EnabledBitmask;
};
uint GetCandidateNodeSize(bool bPostPass) { return bPostPass ? 12u : 8u; }
uint GetCandidateClusterSize() { return NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS * 4; }
// NodesAndClusterBatches layout: Main Cluster Batches, Main Candidate Nodes, Post Cluster Batches, Post Candidate Nodes
uint GetClusterBatchesOffset() { return 0u; }
uint GetCandidateNodesOffset() { return GetMaxClusterBatches() * 4u; }
uint GetNodesAndBatchesOffset(bool bPostPass) { return bPostPass ? (GetCandidateNodesOffset() + MaxNodes * GetCandidateNodeSize(false)) : 0u; }
uint GetCandidateClusterOffset() { return 0u; }
void StoreVisibleCluster(RWByteAddressBuffer VisibleClusters, uint ClusterIdx, FVisibleCluster VisibleCluster, bool bHasPageData = false)
{
uint4 RawData = PackVisibleCluster(VisibleCluster, bHasPageData);
#if NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 4
if (bHasPageData)
{
VisibleClusters.Store4(ClusterIdx * 16, RawData);
}
else
{
VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz);
}
#elif NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 3
if (bHasPageData)
{
VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz);
}
else
{
VisibleClusters.Store2(ClusterIdx * 8, RawData.xy);
}
#else
#error Unexpected visible cluster size!
#endif
}
uint4 PackCandidateNode(FCandidateNode Node)
{
// Leave at least one bit unused in each of the fields, so 0xFFFFFFFFu is never a valid value.
uint4 RawData;
RawData.x = (Node.InstanceId << NANITE_NUM_CULLING_FLAG_BITS) | Node.Flags;
RawData.y = (Node.ViewId << NANITE_MAX_NODES_PER_PRIMITIVE_BITS) | Node.NodeIndex;
RawData.z = Node.EnabledBitmask;
RawData.w = 0;
checkSlow(RawData.x != 0xFFFFFFFFu && RawData.y != 0xFFFFFFFFu && RawData.z != 0xFFFFFFFFu);
return RawData;
}
FCandidateNode UnpackCandidateNode(uint4 RawData, bool bHasEnabledMask)
{
FCandidateNode Node;
Node.Flags = BitFieldExtractU32(RawData.x, NANITE_NUM_CULLING_FLAG_BITS, 0);
Node.InstanceId = BitFieldExtractU32(RawData.x, NANITE_MAX_INSTANCES_BITS, NANITE_NUM_CULLING_FLAG_BITS);
Node.NodeIndex = BitFieldExtractU32(RawData.y, NANITE_MAX_NODES_PER_PRIMITIVE_BITS, 0);
Node.ViewId = BitFieldExtractU32(RawData.y, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS, NANITE_MAX_NODES_PER_PRIMITIVE_BITS);
Node.EnabledBitmask = bHasEnabledMask ? RawData.z : 0xFFFFFFFFu;
return Node;
}
// Load/Store/Clear each have globally coherent buffer versions. Fix this with templates.
void StoreCandidateClusterNoCheckCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
{
uint4 RawData = PackVisibleCluster(VisibleCluster, false);
#if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3
CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz);
#elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2
CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy);
#else
#error Unexpected candidate cluster size!
#endif
}
void StoreCandidateClusterNoCheck(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
{
uint4 RawData = PackVisibleCluster(VisibleCluster, false);
#if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3
CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz);
#elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2
CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy);
#else
#error Unexpected candidate cluster size!
#endif
}
void StoreCandidateClusterCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
{
checkSlow(ClusterIndex < MaxCandidateClusters);
StoreCandidateClusterNoCheckCoherent(CandidateClusters, ClusterIndex, VisibleCluster);
}
void StoreCandidateCluster(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
{
checkSlow(ClusterIndex < MaxCandidateClusters);
StoreCandidateClusterNoCheck(CandidateClusters, ClusterIndex, VisibleCluster);
}
// helper function to work around a shader macro expansion issue (incorrect expansion of self-referential macros) that causes,
// e.g., #define MaxNodes NaniteRaster.MaxNodes, to expand to things like NaniteRaster.NaniteRaster.NaniteRaster.MaxNodes when used as parameter to a macro.
void CheckNodeIndexHelper(uint NodeIndex)
{
uint MaxNodesTmp = MaxNodes;
checkSlow(NodeIndex < MaxNodesTmp);
}
uint4 LoadCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) :
uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0);
}
uint4 LoadCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) :
uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0);
}
void StoreCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
if (bPostPass)
NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz);
else
NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy);
}
void StoreCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
if (bPostPass)
NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz);
else
NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy);
}
void StoreCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass);
}
void StoreCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass);
}
void ClearCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass);
}
void ClearCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
{
CheckNodeIndexHelper(NodeIndex);
StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass);
}
uint LoadClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4);
}
uint LoadClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4);
}
void AddToClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add);
}
void AddToClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add);
}
void ClearClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0);
}
void ClearClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
{
checkSlow(BatchIndex < GetMaxClusterBatches());
NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0);
}