223 lines
9.1 KiB
HLSL
223 lines
9.1 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "../Common.ush"
|
|
#include "NaniteDataDecode.ush"
|
|
#include "NaniteHierarchyTraversalCommon.ush"
|
|
|
|
#define CULLING_PASS_NO_OCCLUSION 0
|
|
#define CULLING_PASS_OCCLUSION_MAIN 1
|
|
#define CULLING_PASS_OCCLUSION_POST 2
|
|
#define CULLING_PASS_EXPLICIT_LIST 3
|
|
|
|
struct FCandidateNode
|
|
{
|
|
uint Flags;
|
|
uint ViewId;
|
|
uint InstanceId;
|
|
uint NodeIndex;
|
|
uint EnabledBitmask;
|
|
};
|
|
|
|
uint GetCandidateNodeSize(bool bPostPass) { return bPostPass ? 12u : 8u; }
|
|
uint GetCandidateClusterSize() { return NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS * 4; }
|
|
|
|
// NodesAndClusterBatches layout: Main Cluster Batches, Main Candidate Nodes, Post Cluster Batches, Post Candidate Nodes
|
|
uint GetClusterBatchesOffset() { return 0u; }
|
|
uint GetCandidateNodesOffset() { return GetMaxClusterBatches() * 4u; }
|
|
uint GetNodesAndBatchesOffset(bool bPostPass) { return bPostPass ? (GetCandidateNodesOffset() + MaxNodes * GetCandidateNodeSize(false)) : 0u; }
|
|
uint GetCandidateClusterOffset() { return 0u; }
|
|
|
|
void StoreVisibleCluster(RWByteAddressBuffer VisibleClusters, uint ClusterIdx, FVisibleCluster VisibleCluster, bool bHasPageData = false)
|
|
{
|
|
uint4 RawData = PackVisibleCluster(VisibleCluster, bHasPageData);
|
|
#if NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 4
|
|
if (bHasPageData)
|
|
{
|
|
VisibleClusters.Store4(ClusterIdx * 16, RawData);
|
|
}
|
|
else
|
|
{
|
|
VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz);
|
|
}
|
|
#elif NANITE_MAX_VISIBLE_CLUSTER_SIZE_DWORDS == 3
|
|
if (bHasPageData)
|
|
{
|
|
VisibleClusters.Store3(ClusterIdx * 12, RawData.xyz);
|
|
}
|
|
else
|
|
{
|
|
VisibleClusters.Store2(ClusterIdx * 8, RawData.xy);
|
|
}
|
|
#else
|
|
#error Unexpected visible cluster size!
|
|
#endif
|
|
}
|
|
|
|
uint4 PackCandidateNode(FCandidateNode Node)
|
|
{
|
|
// Leave at least one bit unused in each of the fields, so 0xFFFFFFFFu is never a valid value.
|
|
uint4 RawData;
|
|
RawData.x = (Node.InstanceId << NANITE_NUM_CULLING_FLAG_BITS) | Node.Flags;
|
|
RawData.y = (Node.ViewId << NANITE_MAX_NODES_PER_PRIMITIVE_BITS) | Node.NodeIndex;
|
|
RawData.z = Node.EnabledBitmask;
|
|
RawData.w = 0;
|
|
|
|
checkSlow(RawData.x != 0xFFFFFFFFu && RawData.y != 0xFFFFFFFFu && RawData.z != 0xFFFFFFFFu);
|
|
|
|
return RawData;
|
|
}
|
|
|
|
FCandidateNode UnpackCandidateNode(uint4 RawData, bool bHasEnabledMask)
|
|
{
|
|
FCandidateNode Node;
|
|
Node.Flags = BitFieldExtractU32(RawData.x, NANITE_NUM_CULLING_FLAG_BITS, 0);
|
|
Node.InstanceId = BitFieldExtractU32(RawData.x, NANITE_MAX_INSTANCES_BITS, NANITE_NUM_CULLING_FLAG_BITS);
|
|
Node.NodeIndex = BitFieldExtractU32(RawData.y, NANITE_MAX_NODES_PER_PRIMITIVE_BITS, 0);
|
|
Node.ViewId = BitFieldExtractU32(RawData.y, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS, NANITE_MAX_NODES_PER_PRIMITIVE_BITS);
|
|
Node.EnabledBitmask = bHasEnabledMask ? RawData.z : 0xFFFFFFFFu;
|
|
return Node;
|
|
}
|
|
|
|
// Load/Store/Clear each have globally coherent buffer versions. Fix this with templates.
|
|
void StoreCandidateClusterNoCheckCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
|
|
{
|
|
uint4 RawData = PackVisibleCluster(VisibleCluster, false);
|
|
#if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3
|
|
CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz);
|
|
#elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2
|
|
CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy);
|
|
#else
|
|
#error Unexpected candidate cluster size!
|
|
#endif
|
|
}
|
|
|
|
void StoreCandidateClusterNoCheck(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
|
|
{
|
|
uint4 RawData = PackVisibleCluster(VisibleCluster, false);
|
|
#if NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 3
|
|
CandidateClusters.Store3(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xyz);
|
|
#elif NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS == 2
|
|
CandidateClusters.Store2(GetCandidateClusterOffset() + ClusterIndex * GetCandidateClusterSize(), RawData.xy);
|
|
#else
|
|
#error Unexpected candidate cluster size!
|
|
#endif
|
|
}
|
|
|
|
void StoreCandidateClusterCoherent(RWCoherentByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
|
|
{
|
|
checkSlow(ClusterIndex < MaxCandidateClusters);
|
|
StoreCandidateClusterNoCheckCoherent(CandidateClusters, ClusterIndex, VisibleCluster);
|
|
}
|
|
|
|
void StoreCandidateCluster(RWByteAddressBuffer CandidateClusters, uint ClusterIndex, FVisibleCluster VisibleCluster)
|
|
{
|
|
checkSlow(ClusterIndex < MaxCandidateClusters);
|
|
StoreCandidateClusterNoCheck(CandidateClusters, ClusterIndex, VisibleCluster);
|
|
}
|
|
|
|
// helper function to work around a shader macro expansion issue (incorrect expansion of self-referential macros) that causes,
|
|
// e.g., #define MaxNodes NaniteRaster.MaxNodes, to expand to things like NaniteRaster.NaniteRaster.NaniteRaster.MaxNodes when used as parameter to a macro.
|
|
void CheckNodeIndexHelper(uint NodeIndex)
|
|
{
|
|
uint MaxNodesTmp = MaxNodes;
|
|
checkSlow(NodeIndex < MaxNodesTmp);
|
|
}
|
|
|
|
uint4 LoadCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
|
|
return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) :
|
|
uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0);
|
|
}
|
|
|
|
uint4 LoadCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
|
|
return bPostPass ? uint4(NodesAndClusterBatches.Load3(Offset + NodeIndex * 12), 0) :
|
|
uint4(NodesAndClusterBatches.Load2(Offset + NodeIndex * 8), 0, 0);
|
|
}
|
|
|
|
void StoreCandidateNodeDataCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
|
|
if (bPostPass)
|
|
NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz);
|
|
else
|
|
NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy);
|
|
}
|
|
|
|
void StoreCandidateNodeData(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, uint4 RawData, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
const uint Offset = GetNodesAndBatchesOffset(bPostPass) + GetCandidateNodesOffset();
|
|
if (bPostPass)
|
|
NodesAndClusterBatches.Store3(Offset + NodeIndex * 12, RawData.xyz);
|
|
else
|
|
NodesAndClusterBatches.Store2(Offset + NodeIndex * 8, RawData.xy);
|
|
}
|
|
|
|
void StoreCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass);
|
|
}
|
|
|
|
void StoreCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, FCandidateNode Node, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, PackCandidateNode(Node), bPostPass);
|
|
}
|
|
|
|
void ClearCandidateNodeCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreCandidateNodeDataCoherent(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass);
|
|
}
|
|
|
|
void ClearCandidateNode(RWByteAddressBuffer NodesAndClusterBatches, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreCandidateNodeData(NodesAndClusterBatches, NodeIndex, 0xFFFFFFFFu, bPostPass);
|
|
}
|
|
|
|
uint LoadClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4);
|
|
}
|
|
|
|
uint LoadClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
return NodesAndClusterBatches.Load(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4);
|
|
}
|
|
|
|
void AddToClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add);
|
|
}
|
|
|
|
void AddToClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, uint Add, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
NodesAndClusterBatches.InterlockedAdd(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, Add);
|
|
}
|
|
|
|
void ClearClusterBatchCoherent(RWCoherentByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0);
|
|
}
|
|
|
|
void ClearClusterBatch(RWByteAddressBuffer NodesAndClusterBatches, uint BatchIndex, bool bPostPass)
|
|
{
|
|
checkSlow(BatchIndex < GetMaxClusterBatches());
|
|
NodesAndClusterBatches.Store(GetNodesAndBatchesOffset(bPostPass) + GetClusterBatchesOffset() + BatchIndex * 4, 0);
|
|
}
|