1064 lines
30 KiB
C++
1064 lines
30 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "ClusterDAG.h"
|
|
#include "Async/Async.h"
|
|
#include "Async/ParallelFor.h"
|
|
#include "GraphPartitioner.h"
|
|
#include "BVHCluster.h"
|
|
#include "MeshSimplify.h"
|
|
|
|
namespace Nanite
|
|
{
|
|
|
|
void FClusterDAG::AddMesh(
|
|
const FConstMeshBuildVertexView& Verts,
|
|
TArrayView< const uint32 > Indexes,
|
|
TArrayView< const int32 > MaterialIndexes,
|
|
const FBounds3f& VertexBounds,
|
|
const FVertexFormat& VertexFormat )
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::ClusterTriangles);
|
|
|
|
uint32 Time0 = FPlatformTime::Cycles();
|
|
|
|
LOG_CRC( Verts );
|
|
LOG_CRC( Indexes );
|
|
|
|
bHasSkinning |= VertexFormat.NumBoneInfluences > 0;
|
|
bHasTangents |= VertexFormat.bHasTangents;
|
|
bHasColors |= VertexFormat.bHasColors;
|
|
|
|
uint32 NumTriangles = Indexes.Num() / 3;
|
|
|
|
FAdjacency Adjacency( Indexes.Num() );
|
|
FEdgeHash EdgeHash( Indexes.Num() );
|
|
|
|
auto GetPosition = [ &Verts, &Indexes ]( uint32 EdgeIndex )
|
|
{
|
|
return Verts.Position[ Indexes[ EdgeIndex ] ];
|
|
};
|
|
|
|
ParallelFor( TEXT("Nanite.ClusterTriangles.PF"), Indexes.Num(), 4096,
|
|
[&]( int32 EdgeIndex )
|
|
{
|
|
EdgeHash.Add_Concurrent( EdgeIndex, GetPosition );
|
|
} );
|
|
|
|
ParallelFor( TEXT("Nanite.ClusterTriangles.PF"), Indexes.Num(), 1024,
|
|
[&]( int32 EdgeIndex )
|
|
{
|
|
int32 AdjIndex = -1;
|
|
int32 AdjCount = 0;
|
|
EdgeHash.ForAllMatching( EdgeIndex, false, GetPosition,
|
|
[&]( int32 EdgeIndex, int32 OtherEdgeIndex )
|
|
{
|
|
AdjIndex = OtherEdgeIndex;
|
|
AdjCount++;
|
|
} );
|
|
|
|
if( AdjCount > 1 )
|
|
AdjIndex = -2;
|
|
|
|
Adjacency.Direct[ EdgeIndex ] = AdjIndex;
|
|
} );
|
|
|
|
FDisjointSet DisjointSet( NumTriangles );
|
|
|
|
for( uint32 EdgeIndex = 0, Num = Indexes.Num(); EdgeIndex < Num; EdgeIndex++ )
|
|
{
|
|
if( Adjacency.Direct[ EdgeIndex ] == -2 )
|
|
{
|
|
// EdgeHash is built in parallel, so we need to sort before use to ensure determinism.
|
|
// This path is only executed in the rare event that an edge is shared by more than two triangles,
|
|
// so performance impact should be negligible in practice.
|
|
TArray< TPair< int32, int32 >, TInlineAllocator< 16 > > Edges;
|
|
EdgeHash.ForAllMatching( EdgeIndex, false, GetPosition,
|
|
[&]( int32 EdgeIndex0, int32 EdgeIndex1 )
|
|
{
|
|
Edges.Emplace( EdgeIndex0, EdgeIndex1 );
|
|
} );
|
|
Edges.Sort();
|
|
|
|
for( const TPair< int32, int32 >& Edge : Edges )
|
|
{
|
|
Adjacency.Link( Edge.Key, Edge.Value );
|
|
}
|
|
}
|
|
|
|
Adjacency.ForAll( EdgeIndex,
|
|
[&]( int32 EdgeIndex0, int32 EdgeIndex1 )
|
|
{
|
|
if( EdgeIndex0 > EdgeIndex1 )
|
|
DisjointSet.UnionSequential( EdgeIndex0 / 3, EdgeIndex1 / 3 );
|
|
} );
|
|
}
|
|
|
|
uint32 BoundaryTime = FPlatformTime::Cycles();
|
|
UE_LOG( LogStaticMesh, Log,
|
|
TEXT("Adjacency [%.2fs], tris: %i, UVs %i%s%s"),
|
|
FPlatformTime::ToMilliseconds( BoundaryTime - Time0 ) / 1000.0f,
|
|
Indexes.Num() / 3,
|
|
VertexFormat.NumTexCoords,
|
|
VertexFormat.bHasTangents ? TEXT(", Tangents") : TEXT(""),
|
|
VertexFormat.bHasColors ? TEXT(", Color") : TEXT("") );
|
|
|
|
#if 0//NANITE_VOXEL_DATA
|
|
FBVHCluster Partitioner( NumTriangles, FCluster::ClusterSize - 4, FCluster::ClusterSize );
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::PartitionGraph);
|
|
|
|
Partitioner.Build(
|
|
[ &Verts, &Indexes ]( uint32 TriIndex )
|
|
{
|
|
FBounds3f Bounds;
|
|
Bounds = Verts.Position[ Indexes[ TriIndex * 3 + 0 ] ];
|
|
Bounds += Verts.Position[ Indexes[ TriIndex * 3 + 1 ] ];
|
|
Bounds += Verts.Position[ Indexes[ TriIndex * 3 + 2 ] ];
|
|
return Bounds;
|
|
} );
|
|
|
|
check( Partitioner.Ranges.Num() );
|
|
|
|
LOG_CRC( Partitioner.Ranges );
|
|
}
|
|
#else
|
|
FGraphPartitioner Partitioner( NumTriangles, FCluster::ClusterSize - 4, FCluster::ClusterSize );
|
|
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::PartitionGraph);
|
|
|
|
auto GetCenter = [ &Verts, &Indexes ]( uint32 TriIndex )
|
|
{
|
|
FVector3f Center;
|
|
Center = Verts.Position[ Indexes[ TriIndex * 3 + 0 ] ];
|
|
Center += Verts.Position[ Indexes[ TriIndex * 3 + 1 ] ];
|
|
Center += Verts.Position[ Indexes[ TriIndex * 3 + 2 ] ];
|
|
return Center * (1.0f / 3.0f);
|
|
};
|
|
Partitioner.BuildLocalityLinks( DisjointSet, VertexBounds, MaterialIndexes, GetCenter );
|
|
|
|
auto* RESTRICT Graph = Partitioner.NewGraph( NumTriangles * 3 );
|
|
|
|
for( uint32 i = 0; i < NumTriangles; i++ )
|
|
{
|
|
Graph->AdjacencyOffset[i] = Graph->Adjacency.Num();
|
|
|
|
uint32 TriIndex = Partitioner.Indexes[i];
|
|
|
|
for( int k = 0; k < 3; k++ )
|
|
{
|
|
Adjacency.ForAll( 3 * TriIndex + k,
|
|
[ &Partitioner, Graph ]( int32 EdgeIndex, int32 AdjIndex )
|
|
{
|
|
Partitioner.AddAdjacency( Graph, AdjIndex / 3, 4 * 65 );
|
|
} );
|
|
}
|
|
|
|
Partitioner.AddLocalityLinks( Graph, TriIndex, 1 );
|
|
}
|
|
Graph->AdjacencyOffset[ NumTriangles ] = Graph->Adjacency.Num();
|
|
|
|
bool bSingleThreaded = NumTriangles < 5000;
|
|
|
|
Partitioner.PartitionStrict( Graph, !bSingleThreaded );
|
|
check( Partitioner.Ranges.Num() );
|
|
|
|
LOG_CRC( Partitioner.Ranges );
|
|
}
|
|
#endif
|
|
|
|
const uint32 OptimalNumClusters = FMath::DivideAndRoundUp< int32 >( Indexes.Num(), FCluster::ClusterSize * 3 );
|
|
|
|
uint32 ClusterTime = FPlatformTime::Cycles();
|
|
UE_LOG( LogStaticMesh, Log, TEXT("Clustering [%.2fs]. Ratio: %f"), FPlatformTime::ToMilliseconds( ClusterTime - BoundaryTime ) / 1000.0f, (float)Partitioner.Ranges.Num() / (float)OptimalNumClusters );
|
|
|
|
const uint32 BaseCluster = Clusters.Num();
|
|
Clusters.AddDefaulted( Partitioner.Ranges.Num() );
|
|
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::BuildClusters);
|
|
ParallelFor( TEXT("Nanite.BuildClusters.PF"), Partitioner.Ranges.Num(), 1024,
|
|
[&]( int32 Index )
|
|
{
|
|
auto& Range = Partitioner.Ranges[ Index ];
|
|
|
|
Clusters[ BaseCluster + Index ] = FCluster(
|
|
Verts,
|
|
Indexes,
|
|
MaterialIndexes,
|
|
VertexFormat,
|
|
Range.Begin, Range.End,
|
|
Partitioner.Indexes, Partitioner.SortedTo, Adjacency );
|
|
|
|
// Negative notes it's a leaf
|
|
Clusters[ BaseCluster + Index ].EdgeLength *= -1.0f;
|
|
});
|
|
}
|
|
|
|
uint32 LeavesTime = FPlatformTime::Cycles();
|
|
UE_LOG( LogStaticMesh, Log, TEXT("Leaves [%.2fs]"), FPlatformTime::ToMilliseconds( LeavesTime - ClusterTime ) / 1000.0f );
|
|
}
|
|
|
|
static const uint32 MinGroupSize = 8;
|
|
static const uint32 MaxGroupSize = 32;
|
|
|
|
void FClusterDAG::ReduceMesh( uint32 ClusterRangeStart, uint32 ClusterRangeNum, uint32 MeshIndex )
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::DAG.ReduceMesh);
|
|
|
|
if( ClusterRangeNum == 0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
TUniquePtr<FRayTracingScene> RayTracingScene;
|
|
|
|
#if NANITE_VOXEL_DATA
|
|
if( Settings.bPreserveArea )
|
|
{
|
|
RayTracingScene = MakeUnique< FRayTracingScene >( Clusters, ClusterRangeStart, ClusterRangeNum );
|
|
}
|
|
#endif
|
|
|
|
uint32 LevelOffset = ClusterRangeStart;
|
|
|
|
TAtomic< uint32 > NumClusters( Clusters.Num() );
|
|
|
|
bool bFirstLevel = true;
|
|
|
|
UE::Tasks::FCancellationToken* CancellationToken = UE::Tasks::FCancellationTokenScope::GetCurrentCancellationToken();
|
|
while( true )
|
|
{
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
TArrayView< FCluster > LevelClusters( &Clusters[ LevelOffset ], bFirstLevel ? ClusterRangeNum : (Clusters.Num() - LevelOffset) );
|
|
bFirstLevel = false;
|
|
|
|
uint32 NumExternalEdges = 0;
|
|
|
|
float MinError = +MAX_flt;
|
|
float MaxError = -MAX_flt;
|
|
float AvgError = 0.0f;
|
|
|
|
for( FCluster& Cluster : LevelClusters )
|
|
{
|
|
NumExternalEdges += Cluster.NumExternalEdges;
|
|
TotalBounds += Cluster.Bounds;
|
|
|
|
MinError = FMath::Min( MinError, Cluster.LODError );
|
|
MaxError = FMath::Max( MaxError, Cluster.LODError );
|
|
AvgError += Cluster.LODError;
|
|
}
|
|
AvgError /= (float)LevelClusters.Num();
|
|
|
|
UE_LOG( LogStaticMesh, Verbose, TEXT("Num clusters %i. Error %.4f, %.4f, %.4f"), LevelClusters.Num(), MinError, AvgError, MaxError );
|
|
|
|
uint32 MaxClusterSize = FCluster::ClusterSize;
|
|
if( LevelClusters.Num() < 2 )
|
|
{
|
|
if( LevelClusters[0].NumTris )
|
|
{
|
|
break;
|
|
}
|
|
else if( LevelClusters[0].MaterialIndexes.Num() > 64 )
|
|
{
|
|
MaxClusterSize = 64;
|
|
}
|
|
else if( LevelClusters[0].MaterialIndexes.Num() > 32 )
|
|
{
|
|
MaxClusterSize = 32;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
if( LevelClusters.Num() <= MaxGroupSize )
|
|
{
|
|
TArray< uint32, TInlineAllocator< MaxGroupSize > > Children;
|
|
|
|
uint32 NumGroupElements = 0;
|
|
for( FCluster& Cluster : LevelClusters )
|
|
{
|
|
NumGroupElements += Cluster.MaterialIndexes.Num();
|
|
Children.Add( LevelOffset++ );
|
|
}
|
|
uint32 MaxParents = FMath::DivideAndRoundUp( NumGroupElements, MaxClusterSize * 2 );
|
|
|
|
LevelOffset = Clusters.Num();
|
|
Clusters.AddDefaulted( MaxParents );
|
|
Groups.AddDefaulted( 1 );
|
|
|
|
ReduceGroup( RayTracingScene.Get(), NumClusters, Children, MaxClusterSize, MaxParents, Groups.Num() - 1, MeshIndex );
|
|
|
|
check( LevelOffset < NumClusters );
|
|
|
|
// Correct num to atomic count
|
|
Clusters.SetNum( NumClusters, EAllowShrinking::No );
|
|
|
|
continue;
|
|
}
|
|
|
|
struct FExternalEdge
|
|
{
|
|
uint32 ClusterIndex;
|
|
int32 EdgeIndex;
|
|
};
|
|
TArray< FExternalEdge > ExternalEdges;
|
|
FHashTable ExternalEdgeHash;
|
|
TAtomic< uint32 > ExternalEdgeOffset(0);
|
|
|
|
// We have a total count of NumExternalEdges so we can allocate a hash table without growing.
|
|
ExternalEdges.AddUninitialized( NumExternalEdges );
|
|
ExternalEdgeHash.Clear( 1 << FMath::FloorLog2( NumExternalEdges ), NumExternalEdges );
|
|
|
|
// Add edges to hash table
|
|
ParallelFor( TEXT("Nanite.BuildDAG.PF"), LevelClusters.Num(), 32,
|
|
[&]( uint32 ClusterIndex )
|
|
{
|
|
FCluster& Cluster = LevelClusters[ ClusterIndex ];
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
for( int32 EdgeIndex = 0; EdgeIndex < Cluster.ExternalEdges.Num(); EdgeIndex++ )
|
|
{
|
|
if( Cluster.ExternalEdges[ EdgeIndex ] )
|
|
{
|
|
uint32 VertIndex0 = Cluster.Indexes[ EdgeIndex ];
|
|
uint32 VertIndex1 = Cluster.Indexes[ Cycle3( EdgeIndex ) ];
|
|
|
|
const FVector3f& Position0 = Cluster.GetPosition( VertIndex0 );
|
|
const FVector3f& Position1 = Cluster.GetPosition( VertIndex1 );
|
|
|
|
uint32 Hash0 = HashPosition( Position0 );
|
|
uint32 Hash1 = HashPosition( Position1 );
|
|
uint32 Hash = Murmur32( { Hash0, Hash1 } );
|
|
|
|
uint32 ExternalEdgeIndex = ExternalEdgeOffset++;
|
|
ExternalEdges[ ExternalEdgeIndex ] = { ClusterIndex, EdgeIndex };
|
|
ExternalEdgeHash.Add_Concurrent( Hash, ExternalEdgeIndex );
|
|
}
|
|
}
|
|
});
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
check( ExternalEdgeOffset == ExternalEdges.Num() );
|
|
|
|
TAtomic< uint32 > NumAdjacency(0);
|
|
|
|
// Find matching edge in other clusters
|
|
ParallelFor( TEXT("Nanite.BuildDAG.PF"), LevelClusters.Num(), 32,
|
|
[&]( uint32 ClusterIndex )
|
|
{
|
|
FCluster& Cluster = LevelClusters[ ClusterIndex ];
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
for( int32 EdgeIndex = 0; EdgeIndex < Cluster.ExternalEdges.Num(); EdgeIndex++ )
|
|
{
|
|
if( Cluster.ExternalEdges[ EdgeIndex ] )
|
|
{
|
|
uint32 VertIndex0 = Cluster.Indexes[ EdgeIndex ];
|
|
uint32 VertIndex1 = Cluster.Indexes[ Cycle3( EdgeIndex ) ];
|
|
|
|
const FVector3f& Position0 = Cluster.GetPosition( VertIndex0 );
|
|
const FVector3f& Position1 = Cluster.GetPosition( VertIndex1 );
|
|
|
|
uint32 Hash0 = HashPosition( Position0 );
|
|
uint32 Hash1 = HashPosition( Position1 );
|
|
uint32 Hash = Murmur32( { Hash1, Hash0 } );
|
|
|
|
for( uint32 ExternalEdgeIndex = ExternalEdgeHash.First( Hash ); ExternalEdgeHash.IsValid( ExternalEdgeIndex ); ExternalEdgeIndex = ExternalEdgeHash.Next( ExternalEdgeIndex ) )
|
|
{
|
|
FExternalEdge ExternalEdge = ExternalEdges[ ExternalEdgeIndex ];
|
|
|
|
FCluster& OtherCluster = LevelClusters[ ExternalEdge.ClusterIndex ];
|
|
|
|
if( OtherCluster.ExternalEdges[ ExternalEdge.EdgeIndex ] )
|
|
{
|
|
uint32 OtherVertIndex0 = OtherCluster.Indexes[ ExternalEdge.EdgeIndex ];
|
|
uint32 OtherVertIndex1 = OtherCluster.Indexes[ Cycle3( ExternalEdge.EdgeIndex ) ];
|
|
|
|
if( Position0 == OtherCluster.GetPosition( OtherVertIndex1 ) &&
|
|
Position1 == OtherCluster.GetPosition( OtherVertIndex0 ) )
|
|
{
|
|
if( ClusterIndex != ExternalEdge.ClusterIndex )
|
|
{
|
|
// Increase it's count
|
|
Cluster.AdjacentClusters.FindOrAdd( ExternalEdge.ClusterIndex, 0 )++;
|
|
|
|
// Can't break or a triple edge might be non-deterministically connected.
|
|
// Need to find all matching, not just first.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
NumAdjacency += Cluster.AdjacentClusters.Num();
|
|
|
|
// Force deterministic order of adjacency.
|
|
Cluster.AdjacentClusters.KeySort(
|
|
[ &LevelClusters ]( uint32 A, uint32 B )
|
|
{
|
|
return LevelClusters[A].GUID < LevelClusters[B].GUID;
|
|
} );
|
|
});
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
FDisjointSet DisjointSet( LevelClusters.Num() );
|
|
|
|
for( uint32 ClusterIndex = 0; ClusterIndex < (uint32)LevelClusters.Num(); ClusterIndex++ )
|
|
{
|
|
for( auto& Pair : LevelClusters[ ClusterIndex ].AdjacentClusters )
|
|
{
|
|
uint32 OtherClusterIndex = Pair.Key;
|
|
|
|
uint32 Count = LevelClusters[ OtherClusterIndex ].AdjacentClusters.FindChecked( ClusterIndex );
|
|
check( Count == Pair.Value );
|
|
|
|
if( ClusterIndex > OtherClusterIndex )
|
|
{
|
|
DisjointSet.UnionSequential( ClusterIndex, OtherClusterIndex );
|
|
}
|
|
}
|
|
}
|
|
|
|
FGraphPartitioner Partitioner( LevelClusters.Num(), MinGroupSize, MaxGroupSize );
|
|
|
|
auto GetCenter = [&]( uint32 Index )
|
|
{
|
|
FBounds3f& Bounds = LevelClusters[ Index ].Bounds;
|
|
return 0.5f * ( Bounds.Min + Bounds.Max );
|
|
};
|
|
Partitioner.BuildLocalityLinks( DisjointSet, TotalBounds, TArrayView< const int32 >(), GetCenter );
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto* RESTRICT Graph = Partitioner.NewGraph( NumAdjacency );
|
|
|
|
for( int32 i = 0; i < LevelClusters.Num(); i++ )
|
|
{
|
|
Graph->AdjacencyOffset[i] = Graph->Adjacency.Num();
|
|
|
|
uint32 ClusterIndex = Partitioner.Indexes[i];
|
|
|
|
for( auto& Pair : LevelClusters[ ClusterIndex ].AdjacentClusters )
|
|
{
|
|
uint32 OtherClusterIndex = Pair.Key;
|
|
uint32 NumSharedEdges = Pair.Value;
|
|
|
|
const auto& Cluster0 = Clusters[ LevelOffset + ClusterIndex ];
|
|
const auto& Cluster1 = Clusters[ LevelOffset + OtherClusterIndex ];
|
|
|
|
bool bSiblings = Cluster0.GroupIndex != MAX_uint32 && Cluster0.GroupIndex == Cluster1.GroupIndex;
|
|
|
|
Partitioner.AddAdjacency( Graph, OtherClusterIndex, NumSharedEdges * ( bSiblings ? 1 : 16 ) + 4 );
|
|
}
|
|
|
|
Partitioner.AddLocalityLinks( Graph, ClusterIndex, 1 );
|
|
}
|
|
Graph->AdjacencyOffset[ Graph->Num ] = Graph->Adjacency.Num();
|
|
|
|
LOG_CRC( Graph->Adjacency );
|
|
LOG_CRC( Graph->AdjacencyCost );
|
|
LOG_CRC( Graph->AdjacencyOffset );
|
|
|
|
bool bSingleThreaded = LevelClusters.Num() <= 32;
|
|
|
|
Partitioner.PartitionStrict( Graph, !bSingleThreaded );
|
|
|
|
LOG_CRC( Partitioner.Ranges );
|
|
|
|
uint32 MaxParents = 0;
|
|
for( auto& Range : Partitioner.Ranges )
|
|
{
|
|
uint32 NumGroupElements = 0;
|
|
for( uint32 i = Range.Begin; i < Range.End; i++ )
|
|
{
|
|
// Global indexing is needed in Reduce()
|
|
Partitioner.Indexes[i] += LevelOffset;
|
|
NumGroupElements += Clusters[ Partitioner.Indexes[i] ].MaterialIndexes.Num();
|
|
}
|
|
MaxParents += FMath::DivideAndRoundUp( NumGroupElements, MaxClusterSize * 2 );
|
|
}
|
|
|
|
LevelOffset = Clusters.Num();
|
|
|
|
Clusters.AddDefaulted( MaxParents );
|
|
Groups.AddDefaulted( Partitioner.Ranges.Num() );
|
|
|
|
ParallelFor( TEXT("Nanite.BuildDAG.PF"), Partitioner.Ranges.Num(), 1,
|
|
[&]( int32 PartitionIndex )
|
|
{
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto& Range = Partitioner.Ranges[ PartitionIndex ];
|
|
|
|
TArrayView< uint32 > Children( &Partitioner.Indexes[ Range.Begin ], Range.End - Range.Begin );
|
|
|
|
uint32 NumGroupElements = 0;
|
|
for( uint32 i = Range.Begin; i < Range.End; i++ )
|
|
{
|
|
NumGroupElements += Clusters[ Partitioner.Indexes[i] ].MaterialIndexes.Num();
|
|
}
|
|
uint32 MaxParents = FMath::DivideAndRoundUp( NumGroupElements, MaxClusterSize * 2 );
|
|
uint32 ClusterGroupIndex = PartitionIndex + Groups.Num() - Partitioner.Ranges.Num();
|
|
|
|
ReduceGroup( RayTracingScene.Get(), NumClusters, Children, MaxClusterSize, MaxParents, ClusterGroupIndex, MeshIndex );
|
|
} );
|
|
|
|
if (CancellationToken && CancellationToken->IsCanceled())
|
|
{
|
|
return;
|
|
}
|
|
|
|
check( LevelOffset < NumClusters );
|
|
|
|
// Correct num to atomic count
|
|
Clusters.SetNum( NumClusters, EAllowShrinking::No );
|
|
|
|
// Force a deterministic order of the generated parent clusters
|
|
{
|
|
// TODO: Optimize me.
|
|
// Just sorting the array directly seems like the safest option at this stage (right before UE5 final build).
|
|
// On AOD_Shield this seems to be on the order of 0.01s in practice.
|
|
// As the Clusters array is already conservatively allocated, it seems storing the parent clusters in their designated
|
|
// conservative ranges and then doing a compaction pass at the end would be a more efficient solution that doesn't involve sorting.
|
|
|
|
//uint32 StartTime = FPlatformTime::Cycles();
|
|
TArrayView< FCluster > Parents( &Clusters[ LevelOffset ], Clusters.Num() - LevelOffset );
|
|
Parents.Sort(
|
|
[&]( const FCluster& A, const FCluster& B )
|
|
{
|
|
return A.GUID < B.GUID;
|
|
} );
|
|
//UE_LOG(LogStaticMesh, Log, TEXT("SortTime Adjacency [%.2fs]"), FPlatformTime::ToMilliseconds(FPlatformTime::Cycles() - StartTime) / 1000.0f);
|
|
}
|
|
|
|
}
|
|
|
|
#if RAY_TRACE_VOXELS
|
|
for( FCluster& Cluster : Clusters )
|
|
{
|
|
Cluster.ExtraVoxels.Empty(); // VOXELTODO: Free this earlier
|
|
}
|
|
#endif
|
|
|
|
// Max out root node
|
|
uint32 RootIndex = LevelOffset;
|
|
FClusterGroup RootClusterGroup;
|
|
RootClusterGroup.Children.Add( RootIndex );
|
|
RootClusterGroup.Bounds = Clusters[ RootIndex ].SphereBounds;
|
|
RootClusterGroup.LODBounds = FSphere3f( 0 );
|
|
RootClusterGroup.MaxParentLODError = 1e10f;
|
|
RootClusterGroup.MinLODError = -1.0f;
|
|
RootClusterGroup.MipLevel = Clusters[ RootIndex ].MipLevel + 1;
|
|
RootClusterGroup.MeshIndex = MeshIndex;
|
|
RootClusterGroup.AssemblyPartIndex = MAX_uint32;
|
|
RootClusterGroup.bTrimmed = false;
|
|
Clusters[ RootIndex ].GroupIndex = Groups.Num();
|
|
Groups.Add( RootClusterGroup );
|
|
}
|
|
|
|
|
|
float InverseLerp(
|
|
float y,
|
|
float x0, float y0,
|
|
float x1, float y1 )
|
|
{
|
|
return ( x0 * (y1 - y) - x1 * (y0 - y) ) / ( y1 - y0 );
|
|
}
|
|
|
|
float InverseLerp(
|
|
float y,
|
|
float x0, float y0,
|
|
float x1, float y1,
|
|
float x2, float y2 )
|
|
{
|
|
// Inverse quadratic interpolation
|
|
#if 0
|
|
float a = (y0 - y) * (x1 - x0) * (y1 - y2);
|
|
float b = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0);
|
|
float c = (y2 - y) * (x1 - x2) * (y0 - y1);
|
|
|
|
return x1 + b / (a + c);
|
|
#else
|
|
return
|
|
(y - y1) * (y - y2) * x0 / ( (y0 - y1) * (y0 - y2) ) +
|
|
(y - y2) * (y - y0) * x1 / ( (y1 - y2) * (y1 - y0) ) +
|
|
(y - y0) * (y - y1) * x2 / ( (y2 - y0) * (y2 - y1) );
|
|
#endif
|
|
}
|
|
|
|
// Brent's method
|
|
template< typename FuncType >
|
|
float BrentRootFind(
|
|
float y, float Tolerance,
|
|
float xA, float yA,
|
|
float xB, float yB,
|
|
float xGuess, bool bInitialGuess,
|
|
int32 MaxIter,
|
|
FuncType&& Func )
|
|
{
|
|
if( FMath::Abs( yA - y ) < FMath::Abs( yB - y ) )
|
|
{
|
|
Swap( xA, xB );
|
|
Swap( yA, yB );
|
|
}
|
|
|
|
float xC = xA;
|
|
float yC = yA;
|
|
float xD = xA;
|
|
|
|
bool bBisection = true;
|
|
|
|
for( int32 i = 0; i < MaxIter; i++ )
|
|
{
|
|
if( FMath::Abs( xB - xA ) < SMALL_NUMBER ||
|
|
FMath::Abs( yB - y ) <= Tolerance )
|
|
break;
|
|
|
|
if( yC != yA && yC != yB )
|
|
{
|
|
xGuess = InverseLerp(
|
|
y,
|
|
xA, yA,
|
|
xB, yB,
|
|
xC, yC );
|
|
}
|
|
else if( !bInitialGuess )
|
|
{
|
|
xGuess = InverseLerp(
|
|
y,
|
|
xA, yA,
|
|
xB, yB );
|
|
}
|
|
bInitialGuess = false;
|
|
|
|
if( bBisection )
|
|
{
|
|
bBisection =
|
|
FMath::Abs( xGuess - xB ) >= 0.5f * FMath::Abs( xB - xC ) ||
|
|
FMath::Abs( xB - xC ) < SMALL_NUMBER;
|
|
}
|
|
else
|
|
{
|
|
bBisection =
|
|
FMath::Abs( xGuess - xB ) >= 0.5f * FMath::Abs( xC - xD ) ||
|
|
FMath::Abs( xC - xD ) < SMALL_NUMBER;
|
|
}
|
|
|
|
// Outside of interval
|
|
if( ( xGuess - ( 0.75f * xA + 0.25f * xB ) ) * ( xGuess - xB ) >= 0.0f )
|
|
bBisection = true;
|
|
|
|
if( bBisection )
|
|
xGuess = 0.5f * ( xA + xB );
|
|
|
|
float yGuess = Func( xGuess );
|
|
|
|
xD = xC;
|
|
xC = xB;
|
|
yC = yB;
|
|
|
|
if( ( yA - y ) * ( yGuess - y ) < 0.0f )
|
|
{
|
|
xB = xGuess;
|
|
yB = yGuess;
|
|
}
|
|
else
|
|
{
|
|
xA = xGuess;
|
|
yA = yGuess;
|
|
}
|
|
|
|
if( FMath::Abs( yA - y ) < FMath::Abs( yB - y ) )
|
|
{
|
|
Swap( xA, xB );
|
|
Swap( yA, yB );
|
|
}
|
|
}
|
|
|
|
return xB;
|
|
}
|
|
|
|
template< typename FPartitioner, typename FPartitionFunc >
|
|
bool SplitCluster( FCluster& Merged, TArray< FCluster >& Clusters, TAtomic< uint32 >& NumClusters, uint32 MaxClusterSize, uint32& NumParents, uint32& ParentStart, uint32& ParentEnd, FPartitionFunc&& PartitionFunc )
|
|
{
|
|
if( Merged.MaterialIndexes.Num() <= (int32)MaxClusterSize )
|
|
{
|
|
ParentEnd = ( NumClusters += 1 );
|
|
ParentStart = ParentEnd - 1;
|
|
|
|
Clusters[ ParentStart ] = Merged;
|
|
Clusters[ ParentStart ].Bound();
|
|
return true;
|
|
}
|
|
else if( NumParents > 1 )
|
|
{
|
|
check( MaxClusterSize == FCluster::ClusterSize );
|
|
|
|
FAdjacency Adjacency = Merged.BuildAdjacency();
|
|
FPartitioner Partitioner( Merged.MaterialIndexes.Num(), MaxClusterSize - 4, MaxClusterSize );
|
|
PartitionFunc( Partitioner, Adjacency );
|
|
|
|
if( Partitioner.Ranges.Num() <= (int32)NumParents )
|
|
{
|
|
NumParents = Partitioner.Ranges.Num();
|
|
ParentEnd = ( NumClusters += NumParents );
|
|
ParentStart = ParentEnd - NumParents;
|
|
|
|
int32 Parent = ParentStart;
|
|
for( auto& Range : Partitioner.Ranges )
|
|
{
|
|
Clusters[ Parent ] = FCluster( Merged, Range.Begin, Range.End, Partitioner.Indexes, Partitioner.SortedTo, Adjacency );
|
|
Parent++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void FClusterDAG::ReduceGroup( FRayTracingScene* RayTracingScene, TAtomic< uint32 >& NumClusters, TArrayView< uint32 > Children, uint32 MaxClusterSize, uint32 NumParents, int32 GroupIndex, uint32 MeshIndex )
|
|
{
|
|
check( GroupIndex >= 0 );
|
|
|
|
bool bAnyTriangles = false;
|
|
bool bAllTriangles = true;
|
|
|
|
TArray< FSphere3f, TInlineAllocator< MaxGroupSize > > Children_LODBounds;
|
|
TArray< FSphere3f, TInlineAllocator< MaxGroupSize > > Children_SphereBounds;
|
|
|
|
float ChildMinLODError = MAX_flt;
|
|
float ChildMaxLODError = 0.0f;
|
|
for( uint32 Child : Children )
|
|
{
|
|
FCluster& Cluster = Clusters[ Child ];
|
|
|
|
bAnyTriangles = bAnyTriangles || Cluster.NumTris > 0;
|
|
bAllTriangles = bAllTriangles && Cluster.NumTris > 0;
|
|
|
|
bool bLeaf = Cluster.EdgeLength < 0.0f;
|
|
float LODError = Cluster.LODError;
|
|
|
|
// Force monotonic nesting.
|
|
Children_LODBounds.Add( Cluster.LODBounds );
|
|
Children_SphereBounds.Add( Cluster.SphereBounds );
|
|
ChildMinLODError = FMath::Min( ChildMinLODError, bLeaf ? -1.0f : LODError );
|
|
ChildMaxLODError = FMath::Max( ChildMaxLODError, LODError );
|
|
|
|
Cluster.GroupIndex = GroupIndex;
|
|
Groups[ GroupIndex ].Children.Add( Child );
|
|
check( Groups[ GroupIndex ].Children.Num() <= NANITE_MAX_CLUSTERS_PER_GROUP_TARGET );
|
|
}
|
|
|
|
FSphere3f ParentLODBounds( Children_LODBounds.GetData(), Children_LODBounds.Num() );
|
|
FSphere3f ParentBounds( Children_SphereBounds.GetData(), Children_SphereBounds.Num() );
|
|
|
|
uint32 ParentStart = 0;
|
|
uint32 ParentEnd = 0;
|
|
|
|
FCluster Merged;
|
|
float SimplifyError = MAX_flt;
|
|
|
|
bool bVoxels = false;
|
|
#if NANITE_VOXEL_DATA
|
|
FCluster& FirstCluster = Clusters[ Children[0] ];
|
|
bVoxels = !bAllTriangles || Settings.bPreserveArea;
|
|
#endif
|
|
|
|
uint32 TargetClusterSize = MaxClusterSize - 2;
|
|
if( bAllTriangles )
|
|
{
|
|
uint32 TargetNumTris = NumParents * TargetClusterSize;
|
|
|
|
#if NANITE_VOXEL_DATA
|
|
if( !bVoxels ||
|
|
Settings.VoxelLevel == 0 ||
|
|
Settings.VoxelLevel > FirstCluster.MipLevel + 1 )
|
|
#endif
|
|
{
|
|
Merged = FCluster( *this, Children );
|
|
SimplifyError = Merged.Simplify( *this, TargetNumTris );
|
|
}
|
|
}
|
|
|
|
#if NANITE_VOXEL_DATA
|
|
if( bVoxels )
|
|
{
|
|
uint32 TotalVerts = 0;
|
|
float SurfaceArea = 0.0f;
|
|
for( uint32 Child : Children )
|
|
{
|
|
TotalVerts += Clusters[ Child ].NumVerts;
|
|
SurfaceArea += Clusters[ Child ].SurfaceArea;
|
|
}
|
|
|
|
int32 TargetNumBricks = NumParents * MaxClusterSize;
|
|
//uint32 TargetNumVoxels = TargetNumBricks * 16;
|
|
uint32 TargetNumVoxels = ( TotalVerts * 3 ) / 4;
|
|
|
|
float VoxelSize = FMath::Sqrt( SurfaceArea / TargetNumVoxels );
|
|
VoxelSize *= 0.75f;
|
|
|
|
VoxelSize = FMath::Max( VoxelSize, ChildMaxLODError );
|
|
|
|
#if 0
|
|
// Round to pow2
|
|
// = exp2( floor( log2(x) + 0.5 ) )
|
|
FFloat32 VoxelSizeF( VoxelSize * UE_SQRT_2 );
|
|
VoxelSizeF.Components.Mantissa = 0;
|
|
VoxelSize = VoxelSizeF.FloatValue;
|
|
#endif
|
|
|
|
float EstimatedVoxelSize = VoxelSize;
|
|
|
|
while( VoxelSize < SimplifyError )
|
|
{
|
|
FCluster Voxelized;
|
|
Voxelized.Voxelize( *this, *RayTracingScene, Children, VoxelSize );
|
|
|
|
if( Voxelized.NumVerts < TargetNumVoxels &&
|
|
Voxelized.Bricks.Num() < TargetNumBricks )
|
|
{
|
|
bool bSplitSuccess = SplitCluster< FBVHCluster >( Voxelized, Clusters, NumClusters, MaxClusterSize, NumParents, ParentStart, ParentEnd,
|
|
[ &Voxelized ]( FBVHCluster& Partitioner, FAdjacency& Adjacency )
|
|
{
|
|
Partitioner.Build(
|
|
[ &Voxelized ]( uint32 VertIndex )
|
|
{
|
|
FBounds3f Bounds;
|
|
Bounds = FVector3f( Voxelized.Bricks[ VertIndex ].Position );
|
|
return Bounds;
|
|
} );
|
|
} );
|
|
|
|
#if RAY_TRACE_VOXELS
|
|
if( Voxelized.NumTris == 0 )
|
|
{
|
|
// Distribute extra voxels to closest clusters
|
|
for( const FVector3f& Position : Voxelized.ExtraVoxels )
|
|
{
|
|
float BestDistance = MAX_flt;
|
|
uint32 BestParentIndex = 0xFFFFFFFFu;
|
|
for( uint32 ParentIndex = ParentStart; ParentIndex < ParentEnd; ParentIndex++ )
|
|
{
|
|
FVector3f BoundsCenter = Clusters[ ParentIndex ].Bounds.GetCenter();
|
|
float Distance = ( Position - BoundsCenter ).GetAbsMax();
|
|
if( Distance < BestDistance )
|
|
{
|
|
BestDistance = Distance;
|
|
BestParentIndex = ParentIndex;
|
|
}
|
|
}
|
|
|
|
Clusters[ BestParentIndex ].ExtraVoxels.Add( Position );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
check( bSplitSuccess );
|
|
break;
|
|
}
|
|
|
|
VoxelSize *= 1.1f;
|
|
}
|
|
|
|
if( VoxelSize < SimplifyError )
|
|
SimplifyError = VoxelSize;
|
|
else
|
|
bVoxels = false;
|
|
}
|
|
#endif
|
|
|
|
if( !bVoxels )
|
|
{
|
|
check( bAllTriangles );
|
|
|
|
while(1)
|
|
{
|
|
bool bSplitSuccess = SplitCluster< FGraphPartitioner >( Merged, Clusters, NumClusters, MaxClusterSize, NumParents, ParentStart, ParentEnd,
|
|
[ &Merged ]( FGraphPartitioner& Partitioner, FAdjacency& Adjacency )
|
|
{
|
|
Merged.Split( Partitioner, Adjacency );
|
|
} );
|
|
|
|
if( bSplitSuccess )
|
|
break;
|
|
|
|
TargetClusterSize -= 2;
|
|
if( TargetClusterSize <= MaxClusterSize / 2 )
|
|
break;
|
|
|
|
uint32 TargetNumTris = NumParents * TargetClusterSize;
|
|
|
|
// Start over from scratch. Continuing from simplified cluster screws up ExternalEdges and LODError.
|
|
Merged = FCluster( *this, Children );
|
|
SimplifyError = Merged.Simplify( *this, TargetNumTris );
|
|
}
|
|
}
|
|
|
|
float ParentMaxLODError = FMath::Max( ChildMaxLODError, SimplifyError );
|
|
|
|
// Force parents to have same LOD data. They are all dependent.
|
|
for( uint32 Parent = ParentStart; Parent < ParentEnd; Parent++ )
|
|
{
|
|
Clusters[ Parent ].LODBounds = ParentLODBounds;
|
|
Clusters[ Parent ].LODError = ParentMaxLODError;
|
|
Clusters[ Parent ].GeneratingGroupIndex = GroupIndex;
|
|
}
|
|
|
|
Groups[ GroupIndex ].Bounds = ParentBounds;
|
|
Groups[ GroupIndex ].LODBounds = ParentLODBounds;
|
|
Groups[ GroupIndex ].MinLODError = ChildMinLODError;
|
|
Groups[ GroupIndex ].MaxParentLODError = ParentMaxLODError;
|
|
Groups[ GroupIndex ].MipLevel = Clusters[ Children[0] ].MipLevel;
|
|
Groups[ GroupIndex ].MeshIndex = MeshIndex;
|
|
Groups[ GroupIndex ].AssemblyPartIndex = MAX_uint32;
|
|
Groups[ GroupIndex ].bTrimmed = false;
|
|
}
|
|
|
|
FBinaryHeap< float > FClusterDAG::FindCut(
|
|
uint32 TargetNumTris,
|
|
float TargetError,
|
|
uint32 TargetOvershoot,
|
|
TBitArray<>* SelectedGroupsMask ) const
|
|
{
|
|
const FClusterGroup& RootGroup = Groups.Last();
|
|
const FCluster& RootCluster = Clusters[ RootGroup.Children[0] ];
|
|
|
|
bool bHitTargetBefore = false;
|
|
|
|
float MinError = RootCluster.LODError;
|
|
|
|
TBitArray<> VisitedGroups;
|
|
VisitedGroups.Init(false, Groups.Num());
|
|
VisitedGroups[Groups.Num() - 1] = true;
|
|
|
|
FBinaryHeap< float > Heap;
|
|
Heap.Add( -RootCluster.LODError, RootGroup.Children[0] );
|
|
|
|
uint32 CurNumTris = RootCluster.NumTris;
|
|
|
|
while( true )
|
|
{
|
|
// Grab highest error cluster to replace to reduce cut error
|
|
const uint32 ClusterIndex = Heap.Top();
|
|
const FCluster& Cluster = Clusters[ ClusterIndex ];
|
|
const FClusterGroup& Group = Groups[ Cluster.GroupIndex ];
|
|
const uint32 NumInstances = Group.AssemblyPartIndex == MAX_uint32 ? 1u : AssemblyPartData[ Group.AssemblyPartIndex ].NumTransforms;
|
|
|
|
if( Cluster.MipLevel == 0 )
|
|
break;
|
|
if( Cluster.GeneratingGroupIndex == MAX_uint32 )
|
|
break;
|
|
|
|
bool bHitTarget = CurNumTris > TargetNumTris || MinError < TargetError;
|
|
|
|
// Overshoot the target by TargetOvershoot number of triangles. This allows granular edge collapses to better minimize error to the target.
|
|
if( TargetOvershoot > 0 && bHitTarget && !bHitTargetBefore )
|
|
{
|
|
TargetNumTris = CurNumTris + TargetOvershoot;
|
|
bHitTarget = false;
|
|
bHitTargetBefore = true;
|
|
}
|
|
|
|
if( bHitTarget && Cluster.LODError < MinError )
|
|
break;
|
|
|
|
Heap.Pop();
|
|
CurNumTris -= Cluster.NumTris * NumInstances;
|
|
|
|
check( Cluster.LODError <= MinError );
|
|
MinError = Cluster.LODError;
|
|
|
|
if (VisitedGroups[Cluster.GeneratingGroupIndex])
|
|
{
|
|
continue;
|
|
}
|
|
VisitedGroups[Cluster.GeneratingGroupIndex] = true;
|
|
|
|
const FClusterGroup& NextGroup = Groups[ Cluster.GeneratingGroupIndex ];
|
|
const uint32 NextNumInstances = NextGroup.AssemblyPartIndex == MAX_uint32 ? 1u : AssemblyPartData[ NextGroup.AssemblyPartIndex ].NumTransforms;
|
|
|
|
for( uint32 Child : NextGroup.Children )
|
|
{
|
|
if( !Heap.IsPresent( Child ) )
|
|
{
|
|
const FCluster& ChildCluster = Clusters[ Child ];
|
|
|
|
check( ChildCluster.MipLevel < Cluster.MipLevel );
|
|
check( ChildCluster.LODError <= MinError );
|
|
Heap.Add( -ChildCluster.LODError, Child );
|
|
CurNumTris += ChildCluster.NumTris * NextNumInstances;
|
|
}
|
|
}
|
|
|
|
// TODO: Nanite-Assemblies: Double-check this. I think we have to handle the case where we cross the threshold from the mip tail
|
|
// into the lower mips of assembly parts. I believe it's possible otherwise to get into a situation where some mip tail clusters
|
|
// that were generated by assembly parts are still present on the heap and now overlap with an instanced, higher LOD of the part.
|
|
// Maybe this can be solved simply by detecting when we're crossing that threshold here and removing all clusters from the heap
|
|
// whose generating group == NextGroup like this? Not sure if it covers all cases though.
|
|
if (Group.AssemblyPartIndex == MAX_uint32 && NextGroup.AssemblyPartIndex != MAX_uint32)
|
|
{
|
|
for (int32 OtherGroupIndex = 0; OtherGroupIndex < Groups.Num(); ++OtherGroupIndex)
|
|
{
|
|
const FClusterGroup& OtherGroup = Groups[OtherGroupIndex];
|
|
if (OtherGroup.MipLevel < Group.MipLevel)
|
|
{
|
|
// Skip over higher mip groups
|
|
continue;
|
|
}
|
|
|
|
for (uint32 OtherClusterIndex : OtherGroup.Children)
|
|
{
|
|
const FCluster& OtherCluster = Clusters[OtherClusterIndex];
|
|
if (Heap.IsPresent(OtherClusterIndex) &&
|
|
OtherCluster.GeneratingGroupIndex == Cluster.GeneratingGroupIndex)
|
|
{
|
|
Heap.Remove(OtherClusterIndex);
|
|
CurNumTris -= OtherCluster.NumTris;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (SelectedGroupsMask)
|
|
{
|
|
*SelectedGroupsMask = MoveTemp(VisitedGroups);
|
|
}
|
|
|
|
return Heap;
|
|
}
|
|
|
|
} // namespace Nanite
|