Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteRasterBinning.usf
2025-05-18 13:04:45 +08:00

743 lines
24 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "../Common.ush"
#include "../SceneData.ush"
#include "../WaveOpUtil.ush"
#include "../ComputeShaderUtils.ush"
#define SPLIT_WORK_QUEUE (PATCHES) // TODO: Remove once shader rewriter has been fixed (UE-202409)
#ifndef NANITE_TESSELLATION
#define NANITE_TESSELLATION PATCHES
#endif
#include "NaniteAttributeDecode.ush"
#include "NaniteTessellation.ush"
#define RASTER_BIN_INIT (RASTER_BIN_PASS == NANITE_RASTER_BIN_INIT)
#define RASTER_BIN_COUNT (RASTER_BIN_PASS == NANITE_RASTER_BIN_COUNT)
#define RASTER_BIN_SCATTER (RASTER_BIN_PASS == NANITE_RASTER_BIN_SCATTER)
#define RASTER_BIN_RESERVE (RASTER_BIN_PASS == NANITE_RASTER_BIN_RESERVE)
#define RASTER_BIN_DEPTHBLOCK (RASTER_BIN_PASS == NANITE_RASTER_BIN_DEPTHBLOCK)
#define RASTER_BIN_FINALIZE (RASTER_BIN_PASS == NANITE_RASTER_BIN_FINALIZE)
#ifndef VIRTUAL_TEXTURE_TARGET
#define VIRTUAL_TEXTURE_TARGET 0
#endif
#ifndef FORCE_BATCHING
#define FORCE_BATCHING 0
#endif
#ifndef DEPTH_BUCKETING
#define DEPTH_BUCKETING 1
#endif
uint MeshPassIndex;
uint MinSupportedWaveSize;
uint MaxVisiblePatches;
uint MaxClusterIndirections;
RWStructuredBuffer<FNaniteRasterBinMeta> OutRasterBinMeta;
RWStructuredBuffer<uint> OutDepthBuckets;
FNaniteMaterialFlags GetRasterBinMaterialFlags(uint BinIndex)
{
return UnpackNaniteMaterialFlags(OutRasterBinMeta[BinIndex].MaterialFlags_DepthBlock & 0xFFFFu);
}
uint GetRasterBinDepthBlock(uint BinIndex)
{
return OutRasterBinMeta[BinIndex].MaterialFlags_DepthBlock >> 16;
}
void InitRasterBin(uint BinIndex)
{
OutRasterBinMeta[BinIndex].ClusterOffset = 0;
OutRasterBinMeta[BinIndex].BinSWCount = 0;
OutRasterBinMeta[BinIndex].BinHWCount = 0;
}
uint2 GetRasterBinCount(uint BinIndex)
{
return uint2(OutRasterBinMeta[BinIndex].BinSWCount, OutRasterBinMeta[BinIndex].BinHWCount);
}
uint GetRasterBinCapacity(uint BinIndex)
{
const uint2 BinCount = GetRasterBinCount(BinIndex);
return (BinCount.x + BinCount.y);
}
void SetRasterBinOffset(uint BinIndex, uint BinOffset)
{
OutRasterBinMeta[BinIndex].ClusterOffset = BinOffset;
}
uint GetRasterBinOffset(uint BinIndex)
{
return OutRasterBinMeta[BinIndex].ClusterOffset;
}
uint GetRemappedRasterBinFromIndex(
uint InRelativeMaterialIndex,
uint InPrimitiveIndex,
uint InRegularRasterBinCount,
uint InRenderFlags,
bool bFallbackRasterBin,
bool bVoxel)
{
uint RasterBin = GetMaterialRasterBinFromIndex(
InRelativeMaterialIndex,
InPrimitiveIndex,
MeshPassIndex,
InRegularRasterBinCount,
bFallbackRasterBin
);
const FNaniteMaterialFlags MaterialFlags = GetRasterBinMaterialFlags(RasterBin);
return RemapRasterBin(RasterBin, InRenderFlags, MaterialFlags, bVoxel);
}
#if NANITE_TESSELLATION
[numthreads(1, 1, 1)]
void InitVisiblePatchesArgs()
{
const uint NumVisiblePatches = min( RWVisiblePatchesArgs[3], MaxVisiblePatches );
RWVisiblePatchesArgs[0] = ( NumVisiblePatches + 63u ) / 64u;
RWVisiblePatchesArgs[1] = 1;
RWVisiblePatchesArgs[2] = 1;
}
#endif
#if RASTER_BIN_INIT
uint RasterBinCount;
[numthreads(64, 1, 1)]
void RasterBinInit(uint RasterBinIndex : SV_DispatchThreadID)
{
if (RasterBinIndex < RasterBinCount)
{
InitRasterBin(RasterBinIndex);
}
}
#elif RASTER_BIN_RESERVE
uint RasterBinCount;
RWStructuredBuffer<uint> OutRangeAllocator;
RWBuffer<uint> OutRasterBinArgsSWHW;
uint AllocateRasterBinRange(uint ClusterCount)
{
uint RangeStart;
WaveInterlockedAdd_(OutRangeAllocator[0], ClusterCount, RangeStart);
return RangeStart;
}
[numthreads(64, 1, 1)]
void RasterBinReserve(uint RasterBinIndex : SV_DispatchThreadID)
{
if (RasterBinIndex < RasterBinCount)
{
const uint RasterBinCapacity = GetRasterBinCapacity(RasterBinIndex);
const uint RasterBinOffset = AllocateRasterBinRange(RasterBinCapacity);
SetRasterBinOffset(RasterBinIndex, RasterBinOffset);
const uint ArgsOffset = (RasterBinIndex * NANITE_RASTERIZER_ARG_COUNT);
WriteRasterizerArgsSWHW(OutRasterBinArgsSWHW, ArgsOffset, 0u, 0u);
}
}
#elif RASTER_BIN_FINALIZE
uint RasterBinCount;
uint FinalizeMode;
RWBuffer<uint> OutRasterBinArgsSWHW;
[numthreads(64, 1, 1)]
void RasterBinFinalize(uint RasterBinIndex : SV_DispatchThreadID)
{
if (RasterBinIndex < RasterBinCount)
{
const uint Offset = (RasterBinIndex * NANITE_RASTERIZER_ARG_COUNT);
const uint HWCounterIndex = GetHWClusterCounterIndex(RenderFlags);
const uint BinOffset = GetRasterBinOffset(RasterBinIndex);
const uint SWClusterCount = OutRasterBinArgsSWHW[Offset + 0u];
const uint HWClusterCount = OutRasterBinArgsSWHW[Offset + HWCounterIndex];
const uint ClampedSWClusterCount = min(BinOffset + SWClusterCount, MaxClusterIndirections) - min(BinOffset, MaxClusterIndirections);
const uint ClampedHWClusterCount = min(BinOffset + SWClusterCount + HWClusterCount, MaxClusterIndirections) - min(BinOffset + SWClusterCount, MaxClusterIndirections);
OutRasterBinMeta[RasterBinIndex].BinSWCount = ClampedSWClusterCount;
OutRasterBinMeta[RasterBinIndex].BinHWCount = ClampedHWClusterCount;
OutRasterBinArgsSWHW[Offset + 0u] = ClampedSWClusterCount;
const bool bMeshShader = (RenderFlags & NANITE_RENDER_FLAG_MESH_SHADER) != 0;
BRANCH
if (FinalizeMode == 0 && bMeshShader) // Wrapping
{
#if !PLATFORM_REQUIRES_UNWRAPPED_MESH_SHADER_ARGS
// Need to span the launch size across X,Y,Z so any single dimension does not exceed the 64k
// limit as per https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#dispatchmesh-api
// The product of X*Y*Z can be 2^22, so we convert 1D->3D->1D to work around this limitation.
const uint DimensionLimit = (1u << 16u) - 1u;
const uint3 GroupCount = GetGroupCountWrapped(ClampedHWClusterCount, DimensionLimit.xx);
// HW: ThreadGroupCountXYZ
OutRasterBinArgsSWHW[Offset + 4u] = GroupCount.x;
OutRasterBinArgsSWHW[Offset + 5u] = GroupCount.y;
OutRasterBinArgsSWHW[Offset + 6u] = GroupCount.z;
#else
OutRasterBinArgsSWHW[Offset + 4u] = ClampedHWClusterCount;
#endif
}
else if (FinalizeMode == 1 && bMeshShader) // VK_NV_mesh_shader
{
// VK_NV_mesh_shader draw indirect command (count, first, <unused>)
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDrawMeshTasksIndirectCommandNV.html
OutRasterBinArgsSWHW[Offset + 4u] = ClampedHWClusterCount;
OutRasterBinArgsSWHW[Offset + 5u] = 0u;
}
else
{
OutRasterBinArgsSWHW[Offset + HWCounterIndex] = ClampedHWClusterCount;
}
}
}
#elif RASTER_BIN_DEPTHBLOCK
//TODO: Permute based on wave width for slightly better perf?
[numthreads(64, 1, 1)]
void RasterBinDepthBlock(uint DepthBlockIndex : SV_GroupID, uint ThreadIndex : SV_GroupIndex)
{
const uint LaneIndex = WaveGetLaneIndex();
const uint LaneCount = WaveGetLaneCount();
// Make sure this is just a single wave
if (ThreadIndex >= LaneCount)
{
return;
}
uint PrevSum = 0;
for (uint BaseIndex = 0; BaseIndex < NANITE_NUM_DEPTH_BUCKETS_PER_BLOCK; BaseIndex += LaneCount)
{
uint Index = DepthBlockIndex * NANITE_NUM_DEPTH_BUCKETS_PER_BLOCK + BaseIndex + LaneIndex;
const uint Count = OutDepthBuckets[Index];
const uint PrefixSum = PrevSum + WavePrefixSum(Count);
PrevSum = WaveReadLaneLast(PrefixSum + Count);
OutDepthBuckets[Index] = PrefixSum;
}
}
#else
Buffer<uint> InClusterOffsetSWHW;
StructuredBuffer<uint2> InClusterCountSWHW;
StructuredBuffer<uint2> InTotalPrevDrawClusters;
uint bUsePrimOrMeshShader;
uint RegularMaterialRasterBinCount;
#if RASTER_BIN_SCATTER
RWBuffer<uint> OutRasterBinArgsSWHW;
RWStructuredBuffer<uint2> OutRasterBinData;
#endif
uint ExtractVertReuseBatchInfo(uint2 PackedData, uint BitOffset)
{
uint BitConsumed = min(5, 32 - BitOffset);
uint BatchTriCount = BitFieldExtractU32(PackedData.x, BitConsumed, BitOffset);
if (5 - BitConsumed > 0)
{
BatchTriCount |= (BitFieldExtractU32(PackedData.y, 5 - BitConsumed, 0) << BitConsumed);
}
return BatchTriCount + 1;
}
uint DecodeVertReuseBatchInfo(FCluster Cluster, uint BatchInfoOffset, uint BatchIndex, bool bInlined)
{
uint BitOffset = CondMask(bInlined, 12u, Cluster.VertReuseBatchCountTableSize * 4u) + (BatchInfoOffset + BatchIndex) * 5;
uint DwordOffset = BitOffset >> 5;
BitOffset -= DwordOffset * 32;
uint2 PackedData;
if (bInlined)
{
PackedData = uint2(Cluster.VertReuseBatchInfo[DwordOffset], Cluster.VertReuseBatchInfo[DwordOffset + 1]);
}
else
{
PackedData = ClusterPageData.Load2(Cluster.PageBaseAddress + (Cluster.VertReuseBatchCountTableOffset + DwordOffset) * 4);
}
return ExtractVertReuseBatchInfo(PackedData, BitOffset);
}
#if RASTER_BIN_COUNT
void IncrementRasterBinCount(uint BinIndex, bool bSoftware, uint BatchCount, uint FlatDepthBucket)
{
// One atomic per lane causes severe contention that ends up dominating execution time.
// This path uses wave ops to reduce it to one atomic per unique bin in the wave.
// As clusters are allocated in ranges at group granularity (currently ~32 clusters),
// the expectation is that most waves will only have one or at most a few unique bins.
bool bDone = false;
while (WaveActiveAnyTrue(!bDone))
{
if (!bDone)
{
const uint UniformBinIndex = WaveReadLaneFirst(BinIndex);
const bool bUniformSoftware = (bool)WaveReadLaneFirst((uint)bSoftware);
if (BinIndex == UniformBinIndex && bSoftware == bUniformSoftware)
{
if (bUniformSoftware)
{
WaveInterlockedAdd(OutRasterBinMeta[UniformBinIndex].BinSWCount, BatchCount);
}
else
{
WaveInterlockedAdd(OutRasterBinMeta[UniformBinIndex].BinHWCount, BatchCount);
}
bDone = true;
}
}
}
#if DEPTH_BUCKETING
BRANCH
if (FlatDepthBucket != 0xFFFFFFFFu)
{
// TODO: We could merge this with the raster counters above if all the counters were in the same buffer. Worthwhile?
// TODO: Does scalarization make sense here?
InterlockedAdd(OutDepthBuckets[FlatDepthBucket], BatchCount);
}
#endif
}
#elif RASTER_BIN_SCATTER
uint AllocateRasterBinCluster(uint BinIndex, bool bSoftware, uint BatchCount, uint FlatDepthBucket)
{
uint ClusterOffset = 0u;
uint Offset = (BinIndex * NANITE_RASTERIZER_ARG_COUNT);
if (!bSoftware)
{
Offset += CondMask((RenderFlags & NANITE_RENDER_FLAG_MESH_SHADER) || (RenderFlags & NANITE_RENDER_FLAG_PRIMITIVE_SHADER), 4u, 5u);
}
bool bDone = false;
while(WaveActiveAnyTrue(!bDone))
{
if(!bDone)
{
const uint UniformOffset = WaveReadLaneFirst(Offset);
if (Offset == UniformOffset)
{
WaveInterlockedAdd_(OutRasterBinArgsSWHW[UniformOffset], BatchCount, ClusterOffset);
bDone = true;
}
}
}
const uint2 BinCount = GetRasterBinCount(BinIndex);
#if DEPTH_BUCKETING
BRANCH
if (FlatDepthBucket != 0xFFFFFFFFu)
{
// TODO: We could merge this with the raster counters above if all the counters were in the same buffer. Worthwhile?
// TODO: Does scalarization make sense here? Initial test says no.
// TODO: Consider still using the reverse order inside the buckets for slightly better order?
InterlockedAdd(OutDepthBuckets[FlatDepthBucket], BatchCount, ClusterOffset);
ClusterOffset = (!bSoftware ? BinCount.x : 0u) + ClusterOffset;
}
else
#endif
{
if (bSoftware)
{
ClusterOffset = BinCount.x - ClusterOffset - BatchCount; // Write clusters in reverse order for better z-test rejection for voxels and masked materials
}
else
{
ClusterOffset = BinCount.x + ClusterOffset; // HW cluster list already reversed, so write from beginning
}
}
return GetRasterBinOffset(BinIndex) + ClusterOffset;
}
#endif
void ExportRasterBin(uint RasterBin, FNaniteMaterialFlags BinMaterialFlags, uint ClusterIndex, uint RangeStart, uint RangeEnd, uint BatchCount, uint BatchInfoOffset, FCluster Cluster, bool bBatchInfoInlined, bool bSoftware, uint DepthBucket)
{
if (MinSupportedWaveSize < 32 && BinMaterialFlags.bPixelProgrammable)
{
// SW pixel programmable path requires wave size >= 32. Fall back to HW raster when that is not guaranteed.
bSoftware = false;
}
if (!BinMaterialFlags.bNoDerivativeOps)
{
// Materials requiring finite differences must run the HW path (SW lanes are processing random triangles)
bSoftware = false;
}
#if FORCE_BATCHING
const bool bForceBatching = true;
#else
const bool bForceBatching = false;
#endif
bool bUseBatch = !bSoftware && bUsePrimOrMeshShader && (bForceBatching || BinMaterialFlags.bVertexProgrammable);
if (BinMaterialFlags.bDisplacement)
{
// Displacement forces 100% software, and also the vert reuse batching
bSoftware = true;
bUseBatch = true;
}
if (Cluster.bVoxel)
{
bSoftware = true;
bUseBatch = false;
}
BatchCount = CondMask(bUseBatch, BatchCount, 1u);
uint FlatDepthBucket = 0xFFFFFFFFu;
#if DEPTH_BUCKETING
uint DepthBlock = GetRasterBinDepthBlock(RasterBin);
if (DepthBlock != 0xFFFFu)
{
DepthBlock += (bSoftware ? 0u : 1u);
FlatDepthBucket = DepthBlock * NANITE_NUM_DEPTH_BUCKETS_PER_BLOCK + DepthBucket;
}
#endif
#if RASTER_BIN_COUNT
IncrementRasterBinCount(RasterBin, bSoftware, BatchCount, FlatDepthBucket);
#elif RASTER_BIN_SCATTER
const uint BinClusterMapping = AllocateRasterBinCluster(RasterBin, bSoftware, BatchCount, FlatDepthBucket);
// Trim any overflow exports
BatchCount = min(BinClusterMapping + BatchCount, MaxClusterIndirections) - min(BinClusterMapping, MaxClusterIndirections);
for (uint BatchIndex = 0; BatchIndex < BatchCount; ++BatchIndex)
{
if (bUseBatch)
{
uint BatchTriCount = DecodeVertReuseBatchInfo(Cluster, BatchInfoOffset, BatchIndex, bBatchInfoInlined);
RangeEnd = RangeStart + BatchTriCount;
}
OutRasterBinData[BinClusterMapping + BatchIndex].x = ClusterIndex;
OutRasterBinData[BinClusterMapping + BatchIndex].y = (RangeStart << 16) | RangeEnd;
RangeStart = RangeEnd;
}
#endif
}
template< typename T, typename FFunction >
void ScalarizeForEachMatching( T Value, FFunction Function )
{
bool bDone = false;
while( WaveActiveAnyTrue( !bDone ) )
{
if( !bDone )
{
T UniformValue = WaveReadLaneFirst( Value );
if( UniformValue == Value )
{
Function( UniformValue );
bDone = true;
}
}
}
}
bool IsNaniteMaterialMergingAllowed(FNaniteMaterialFlags MaterialFlags)
{
return !MaterialFlags.bDisplacement; // Merging ranges is incompatible with tessellation. Cluster rasterizer assumes UVDensities is uniform.
}
[numthreads(64, 1, 1)]
void RasterBinBuild(uint RelativeClusterIndex : SV_DispatchThreadID, uint GroupThreadIndex : SV_GroupIndex)
{
#if PATCHES
const uint VisibleIndex = RelativeClusterIndex;
const uint NumVisiblePatches = min( VisiblePatchesArgs[3], MaxVisiblePatches );
if( VisibleIndex < NumVisiblePatches )
{
#if NANITE_TESSELLATION_PATCH_REFS
const uint PatchIndex = VisiblePatches.Load(VisibleIndex * 8);
const uint PatchEncoded = SplitWorkQueue.DataBuffer_Load(PatchIndex * 16);
#else
const uint PatchEncoded = VisiblePatches.Load(VisibleIndex * 16);
#endif
uint VisibleClusterIndex = PatchEncoded.x >> 7;
uint TriIndex = PatchEncoded.x & 0x7F;
FVisibleCluster VisibleCluster = GetVisibleCluster( VisibleClusterIndex, VIRTUAL_TEXTURE_TARGET );
FInstanceSceneData InstanceData = GetInstanceSceneDataUnchecked( VisibleCluster.InstanceId );
FCluster Cluster = GetCluster( VisibleCluster.PageIndex, VisibleCluster.ClusterIndex );
const uint RasterBin = GetRemappedRasterBinFromIndex(
GetRelativeMaterialIndex(Cluster, TriIndex),
InstanceData.PrimitiveId,
RegularMaterialRasterBinCount,
RenderFlags,
/* bFallbackRasterBin */ false,
Cluster.bVoxel
);
uint IndexInBin = 0;
// Scalarize atomics
bool bDone = false;
while( WaveActiveAnyTrue( !bDone ) )
{
if( !bDone )
{
const uint UniformBinIndex = WaveReadLaneFirst( RasterBin );
if( UniformBinIndex == RasterBin )
{
#if RASTER_BIN_COUNT
WaveInterlockedAddScalar(OutRasterBinMeta[UniformBinIndex].BinSWCount, 1);
#elif RASTER_BIN_SCATTER
WaveInterlockedAddScalar_( OutRasterBinArgsSWHW[ UniformBinIndex * NANITE_RASTERIZER_ARG_COUNT + 3 ], 1, IndexInBin );
const uint Num = IndexInBin + WaveActiveCountBits(true);
if(WaveIsFirstLane())
{
const uint NumGroups = (Num + MaxPatchesPerGroup - 1) / MaxPatchesPerGroup; // Slow integer divide. Fix me?
InterlockedMax(OutRasterBinArgsSWHW[ UniformBinIndex * NANITE_RASTERIZER_ARG_COUNT + 0 ], NumGroups);
}
#endif
bDone = true;
}
}
}
#if RASTER_BIN_SCATTER
const uint BinMapping = GetRasterBinOffset( RasterBin ) + IndexInBin;
OutRasterBinData[ BinMapping ].x = VisibleIndex;
OutRasterBinData[ BinMapping ].y = 0;
#endif
}
#else
const uint SWClusterCount = InClusterCountSWHW[0].x;
const uint HWClusterCount = InClusterCountSWHW[0].y;
const bool bSoftware = RelativeClusterIndex < SWClusterCount;
const uint ClusterCount = SWClusterCount + HWClusterCount;
if (RelativeClusterIndex < ClusterCount)
{
RelativeClusterIndex = CondMask(bSoftware, RelativeClusterIndex, RelativeClusterIndex - SWClusterCount);
uint ClusterOffset = 0;
const bool bHasPrevDrawData = (RenderFlags & NANITE_RENDER_FLAG_HAS_PREV_DRAW_DATA) != 0u;
BRANCH
if (bHasPrevDrawData)
{
ClusterOffset += CondMask(bSoftware, InTotalPrevDrawClusters[0].x, InTotalPrevDrawClusters[0].y);
}
#if IS_POST_PASS
ClusterOffset += CondMask(bSoftware, InClusterOffsetSWHW[0], InClusterOffsetSWHW[GetHWClusterCounterIndex(RenderFlags)]);
#endif
uint VisibleClusterIndex = RelativeClusterIndex + ClusterOffset;
// HW clusters are written from the top
VisibleClusterIndex = CondMask(bSoftware, VisibleClusterIndex, (MaxVisibleClusters - 1) - VisibleClusterIndex);
FVisibleCluster VisibleCluster = GetVisibleCluster(VisibleClusterIndex, VIRTUAL_TEXTURE_TARGET);
FInstanceSceneData InstanceData = GetInstanceSceneDataUnchecked(VisibleCluster);
FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
const bool bFallbackBin = (VisibleCluster.Flags & NANITE_CULLING_FLAG_FALLBACK_RASTER) != 0;
const uint DepthBucket = VisibleCluster.DepthBucket;
BRANCH
if (IsMaterialFastPath(Cluster))
{
uint RasterBin0 = 0;
uint RasterBin1 = 0;
uint RasterBin2 = 0;
uint RasterLen0 = 0;
uint RasterLen1 = 0;
uint RasterLen2 = 0;
uint BatchCount0 = BitFieldExtractU32(Cluster.VertReuseBatchInfo.x, 4, 0);
uint BatchCount1 = BitFieldExtractU32(Cluster.VertReuseBatchInfo.x, 4, 4);
uint BatchCount2 = BitFieldExtractU32(Cluster.VertReuseBatchInfo.x, 4, 8);
uint BatchInfoOffset0 = 0;
uint BatchInfoOffset1 = BatchCount0;
uint BatchInfoOffset2 = BatchCount0 + BatchCount1;
FNaniteMaterialFlags BinMaterialFlags0 = (FNaniteMaterialFlags)0;
FNaniteMaterialFlags BinMaterialFlags1 = (FNaniteMaterialFlags)0;
FNaniteMaterialFlags BinMaterialFlags2 = (FNaniteMaterialFlags)0;
// Length is remaining triangles after Material0 and Material1
const uint Material2Length = Cluster.MaterialTotalLength - (Cluster.Material0Length + Cluster.Material1Length);
// The 0th material range is always non-zero length
{
RasterBin0 = GetRemappedRasterBinFromIndex(Cluster.Material0Index, InstanceData.PrimitiveId, RegularMaterialRasterBinCount, RenderFlags, bFallbackBin, Cluster.bVoxel);
BinMaterialFlags0 = GetRasterBinMaterialFlags(RasterBin0);
}
BRANCH
if (Cluster.Material1Length > 0u)
{
RasterBin1 = GetRemappedRasterBinFromIndex(Cluster.Material1Index, InstanceData.PrimitiveId, RegularMaterialRasterBinCount, RenderFlags, bFallbackBin, Cluster.bVoxel);
BinMaterialFlags1 = GetRasterBinMaterialFlags(RasterBin1);
}
BRANCH
if (Material2Length > 0)
{
RasterBin2 = GetRemappedRasterBinFromIndex(Cluster.Material2Index, InstanceData.PrimitiveId, RegularMaterialRasterBinCount, RenderFlags, bFallbackBin, Cluster.bVoxel);
BinMaterialFlags2 = GetRasterBinMaterialFlags(RasterBin2);
}
const bool bAllowMerging0 = IsNaniteMaterialMergingAllowed(BinMaterialFlags0);
const bool bAllowMerging1 = IsNaniteMaterialMergingAllowed(BinMaterialFlags1);
const bool bAllowMerging2 = IsNaniteMaterialMergingAllowed(BinMaterialFlags2);
BRANCH
if (RasterBin0 == RasterBin1 && bAllowMerging0 && bAllowMerging1)
{
if (RasterBin0 == RasterBin2 && bAllowMerging2)
{
RasterLen0 = Cluster.MaterialTotalLength;
BatchCount0 += BatchCount1 + BatchCount2;
}
else
{
RasterLen0 = (Cluster.Material0Length + Cluster.Material1Length);
RasterLen2 = Material2Length;
BatchCount0 += BatchCount1;
}
}
else if (RasterBin1 == RasterBin2 && bAllowMerging1 && bAllowMerging2)
{
RasterLen0 = Cluster.Material0Length;
RasterLen1 = Cluster.MaterialTotalLength - Cluster.Material0Length;
BatchCount1 += BatchCount2;
}
else
{
RasterLen0 = Cluster.Material0Length;
RasterLen1 = Cluster.Material1Length;
RasterLen2 = Material2Length;
}
// The 0th material range is always non-zero length
{
ExportRasterBin(RasterBin0, BinMaterialFlags0, VisibleClusterIndex, 0u, RasterLen0, BatchCount0, BatchInfoOffset0, Cluster, true, bSoftware, DepthBucket);
}
BRANCH
if (RasterLen1 > 0)
{
const uint Range1Start = RasterLen0;
const uint Range1End = Range1Start + RasterLen1;
ExportRasterBin(RasterBin1, BinMaterialFlags1, VisibleClusterIndex, Range1Start, Range1End, BatchCount1, BatchInfoOffset1, Cluster, true, bSoftware, DepthBucket);
}
BRANCH
if (RasterLen2 > 0)
{
const uint Range2Start = (RasterLen0 + RasterLen1);
const uint Range2End = Range2Start + RasterLen2;
ExportRasterBin(RasterBin2, BinMaterialFlags2, VisibleClusterIndex, Range2Start, Range2End, BatchCount2, BatchInfoOffset2, Cluster, true, bSoftware, DepthBucket);
}
}
else
{
uint CurrentRangeBin = 0xFFFFFFFFu;
uint CurrentRangeStart = 0;
uint CurrentRangeEnd = 0;
uint CurrentBatchCount = 0;
uint CurrentBatchInfoOffset = 0;
FNaniteMaterialFlags CurrentBinMaterialFlags = (FNaniteMaterialFlags)0;
bool bCurrentAllowMerging = false;
FBitStreamReaderState BatchCountStreamState = BitStreamReader_Create_Aligned(Cluster.PageBaseAddress + Cluster.VertReuseBatchCountTableOffset * 4, 0, NANITE_MAX_CLUSTER_MATERIALS * 4);
uint TableOffset = Cluster.PageBaseAddress + Cluster.MaterialTableOffset * 4u;
LOOP for (uint TableEntry = 0; TableEntry < Cluster.MaterialTableLength; ++TableEntry)
{
const uint EncodedRange = ClusterPageData.Load(TableOffset);
TableOffset += 4;
uint TriStart;
uint TriLength;
uint MaterialIndex;
DecodeMaterialRange(EncodedRange, TriStart, TriLength, MaterialIndex);
const uint RasterBinN = GetRemappedRasterBinFromIndex(MaterialIndex, InstanceData.PrimitiveId, RegularMaterialRasterBinCount, RenderFlags, bFallbackBin, Cluster.bVoxel);
const FNaniteMaterialFlags BinMaterialFlags = GetRasterBinMaterialFlags(RasterBinN);
const bool bAllowMerging = IsNaniteMaterialMergingAllowed(BinMaterialFlags);
const uint BatchCount = BitStreamReader_Read_RO(ClusterPageData, BatchCountStreamState, 4, 4);
// Check if raster slot matches the current run, and that the triangle range is contiguous.
if ((RasterBinN == CurrentRangeBin) && bAllowMerging && bCurrentAllowMerging)
{
// Update current range
CurrentRangeEnd = TriStart + TriLength;
CurrentBatchCount += BatchCount;
}
else
{
// Not merging previous range, and previous range has valid triangles that need to be flushed
BRANCH
if (CurrentRangeEnd > 0)
{
ExportRasterBin(CurrentRangeBin, CurrentBinMaterialFlags, VisibleClusterIndex, CurrentRangeStart, CurrentRangeEnd, CurrentBatchCount, CurrentBatchInfoOffset, Cluster, false, bSoftware, DepthBucket);
}
CurrentRangeBin = RasterBinN;
CurrentRangeStart = TriStart;
CurrentRangeEnd = CurrentRangeStart + TriLength;
CurrentBatchInfoOffset += CurrentBatchCount;
CurrentBatchCount = BatchCount;
CurrentBinMaterialFlags = BinMaterialFlags;
bCurrentAllowMerging = bAllowMerging;
}
}
// Need to flush current range
ExportRasterBin(CurrentRangeBin, CurrentBinMaterialFlags, VisibleClusterIndex, CurrentRangeStart, CurrentRangeEnd, CurrentBatchCount, CurrentBatchInfoOffset, Cluster, false, bSoftware, DepthBucket);
}
}
#endif
}
#endif