Files
UnrealEngine/Engine/Source/Developer/NaniteBuilder/Private/GraphPartitioner.cpp
2025-05-18 13:04:45 +08:00

352 lines
9.8 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "GraphPartitioner.h"
#include "Async/Async.h"
#include "Async/LocalWorkQueue.h"
#include "EngineLogs.h"
#include "HAL/PlatformMemory.h"
FGraphPartitioner::FGraphPartitioner( uint32 InNumElements, int32 InMinPartitionSize, int32 InMaxPartitionSize )
: NumElements( InNumElements )
, MinPartitionSize( InMinPartitionSize )
, MaxPartitionSize( InMaxPartitionSize )
{
Indexes.AddUninitialized( NumElements );
for( uint32 i = 0; i < NumElements; i++ )
{
Indexes[i] = i;
}
}
FGraphPartitioner::FGraphData* FGraphPartitioner::NewGraph( uint32 NumAdjacency ) const
{
NumAdjacency += LocalityLinks.Num();
FGraphData* RESTRICT Graph = new FGraphPartitioner::FGraphData;
Graph->Offset = 0;
Graph->Num = NumElements;
Graph->Adjacency.Reserve( NumAdjacency );
Graph->AdjacencyCost.Reserve( NumAdjacency );
Graph->AdjacencyOffset.AddUninitialized( NumElements + 1 );
return Graph;
}
void FGraphPartitioner::Partition( FGraphData* Graph )
{
if( Graph->Num <= MaxPartitionSize )
{
PartitionIDs.AddUninitialized( NumElements );
const int32 TargetPartitionSize = ( MinPartitionSize + MaxPartitionSize ) / 2;
const int32 TargetNumPartitions = FMath::DivideAndRoundUp( Graph->Num, TargetPartitionSize );
idx_t NumConstraints = 1;
idx_t NumParts = TargetNumPartitions;
idx_t EdgesCut = 0;
idx_t Options[ METIS_NOPTIONS ];
METIS_SetDefaultOptions( Options );
Options[ METIS_OPTION_UFACTOR ] = 200;//( 1000 * MaxPartitionSize * TargetNumPartitions ) / NumElements - 1000;
//Options[ METIS_OPTION_NCUTS ] = 8;
//Options[ METIS_OPTION_IPTYPE ] = METIS_IPTYPE_RANDOM;
//Options[ METIS_OPTION_SEED ] = 17;
//int r = METIS_PartGraphRecursive(
int r = METIS_PartGraphKway(
&Graph->Num,
&NumConstraints, // number of balancing constraints
Graph->AdjacencyOffset.GetData(),
Graph->Adjacency.GetData(),
NULL, // Vert weights
NULL, // Vert sizes for computing the total communication volume
Graph->AdjacencyCost.GetData(), // Edge weights
&NumParts,
NULL, // Target partition weight
NULL, // Allowed load imbalance tolerance
Options,
&EdgesCut,
PartitionIDs.GetData()
);
if (r == METIS_ERROR_MEMORY)
{
UE_LOG(LogStaticMesh, Error, TEXT("Call to METIS_PartGraphKway() failed - error code: %d, Graph->Num: %d"), r, Graph->Num);
// We can't get the precise allocation size, but Metis logs an error that contains the actual error.
FPlatformMemory::OnOutOfMemory(0, 0);
}
if( ensure( r == METIS_OK ) )
{
TArray< uint32 > ElementCount;
ElementCount.AddZeroed( TargetNumPartitions );
for( uint32 i = 0; i < NumElements; i++ )
{
ElementCount[ PartitionIDs[i] ]++;
}
uint32 Begin = 0;
Ranges.AddUninitialized( TargetNumPartitions );
for( int32 PartitionIndex = 0; PartitionIndex < TargetNumPartitions; PartitionIndex++ )
{
Ranges[ PartitionIndex ] = { Begin, Begin + ElementCount[ PartitionIndex ] };
Begin += ElementCount[ PartitionIndex ];
ElementCount[ PartitionIndex ] = 0;
}
TArray< uint32 > OldIndexes;
Swap( Indexes, OldIndexes );
Indexes.AddUninitialized( NumElements );
for( uint32 i = 0; i < NumElements; i++ )
{
uint32 PartitionIndex = PartitionIDs[i];
uint32 Offset = Ranges[ PartitionIndex ].Begin;
uint32 Num = ElementCount[ PartitionIndex ]++;
Indexes[ Offset + Num ] = OldIndexes[i];
}
PartitionIDs.Empty();
}
}
else
{
// Single
Ranges.Add( { 0, NumElements } );
}
for( uint32 i = 0; i < NumElements; i++ )
{
SortedTo[ Indexes[i] ] = i;
}
}
void FGraphPartitioner::BisectGraph( FGraphData* Graph, FGraphData* ChildGraphs[2] )
{
ChildGraphs[0] = nullptr;
ChildGraphs[1] = nullptr;
auto AddPartition = [ this ]( int32 Offset, int32 Num )
{
FRange& Range = Ranges[ NumPartitions++ ];
Range.Begin = Offset;
Range.End = Offset + Num;
};
if( Graph->Num <= MaxPartitionSize )
{
AddPartition( Graph->Offset, Graph->Num );
return;
}
const int32 TargetPartitionSize = ( MinPartitionSize + MaxPartitionSize ) / 2;
const int32 TargetNumPartitions = FMath::Max( 2, FMath::DivideAndRoundNearest( Graph->Num, TargetPartitionSize ) );
check( Graph->AdjacencyOffset.Num() == Graph->Num + 1 );
idx_t NumConstraints = 1;
idx_t NumParts = 2;
idx_t EdgesCut = 0;
real_t PartitionWeights[] = {
float( TargetNumPartitions / 2 ) / TargetNumPartitions,
1.0f - float( TargetNumPartitions / 2 ) / TargetNumPartitions
};
idx_t Options[ METIS_NOPTIONS ];
METIS_SetDefaultOptions( Options );
// Allow looser tolerance when at the higher levels. Strict balance isn't that important until it gets closer to partition sized.
bool bLoose = TargetNumPartitions >= 128 || MaxPartitionSize / MinPartitionSize > 1;
bool bSlow = Graph->Num < 4096;
Options[ METIS_OPTION_UFACTOR ] = bLoose ? 200 : 1;
//Options[ METIS_OPTION_NCUTS ] = Graph->Num < 1024 ? 8 : ( Graph->Num < 4096 ? 4 : 1 );
//Options[ METIS_OPTION_NCUTS ] = bSlow ? 4 : 1;
//Options[ METIS_OPTION_NITER ] = bSlow ? 20 : 10;
//Options[ METIS_OPTION_IPTYPE ] = METIS_IPTYPE_RANDOM;
//Options[ METIS_OPTION_MINCONN ] = 1;
int r = METIS_PartGraphRecursive(
&Graph->Num,
&NumConstraints, // number of balancing constraints
Graph->AdjacencyOffset.GetData(),
Graph->Adjacency.GetData(),
NULL, // Vert weights
NULL, // Vert sizes for computing the total communication volume
Graph->AdjacencyCost.GetData(), // Edge weights
&NumParts,
PartitionWeights, // Target partition weight
NULL, // Allowed load imbalance tolerance
Options,
&EdgesCut,
PartitionIDs.GetData() + Graph->Offset
);
if (r == METIS_ERROR_MEMORY)
{
UE_LOG(LogStaticMesh, Error, TEXT("Call to METIS_PartGraphRecursive() failed - error code: %d, Graph->Num: %d"), r, Graph->Num);
// We can't get the precise allocation size, but Metis logs an error that contains the actual error.
FPlatformMemory::OnOutOfMemory(0, 0);
}
checkf(r == METIS_OK, TEXT("Call to METIS_PartGraphRecursive() failed - error code: %d, Graph->Num: %d"), r, Graph->Num);
{
// In place divide the array
// Both sides remain sorted but back is reversed.
int32 Front = Graph->Offset;
int32 Back = Graph->Offset + Graph->Num - 1;
while( Front <= Back )
{
while( Front <= Back && PartitionIDs[ Front ] == 0 )
{
SwappedWith[ Front ] = Front;
Front++;
}
while( Front <= Back && PartitionIDs[ Back ] == 1 )
{
SwappedWith[ Back ] = Back;
Back--;
}
if( Front < Back )
{
Swap( Indexes[ Front ], Indexes[ Back ] );
SwappedWith[ Front ] = Back;
SwappedWith[ Back ] = Front;
Front++;
Back--;
}
}
int32 Split = Front;
int32 Num[2];
Num[0] = Split - Graph->Offset;
Num[1] = Graph->Offset + Graph->Num - Split;
check( Num[0] > 0 );
check( Num[1] > 0 );
if( Num[0] <= MaxPartitionSize &&
Num[1] <= MaxPartitionSize )
{
AddPartition( Graph->Offset, Num[0] );
AddPartition( Split, Num[1] );
}
else
{
for( int32 i = 0; i < 2; i++ )
{
ChildGraphs[i] = new FGraphData;
ChildGraphs[i]->Adjacency.Reserve( Graph->Adjacency.Num() >> 1 );
ChildGraphs[i]->AdjacencyCost.Reserve( Graph->Adjacency.Num() >> 1 );
ChildGraphs[i]->AdjacencyOffset.Reserve( Num[i] + 1 );
ChildGraphs[i]->Num = Num[i];
}
ChildGraphs[0]->Offset = Graph->Offset;
ChildGraphs[1]->Offset = Split;
for( int32 i = 0; i < Graph->Num; i++ )
{
FGraphData* ChildGraph = ChildGraphs[ i >= ChildGraphs[0]->Num ];
ChildGraph->AdjacencyOffset.Add( ChildGraph->Adjacency.Num() );
int32 OrgIndex = SwappedWith[ Graph->Offset + i ] - Graph->Offset;
for( idx_t AdjIndex = Graph->AdjacencyOffset[ OrgIndex ]; AdjIndex < Graph->AdjacencyOffset[ OrgIndex + 1 ]; AdjIndex++ )
{
idx_t Adj = Graph->Adjacency[ AdjIndex ];
idx_t AdjCost = Graph->AdjacencyCost[ AdjIndex ];
// Remap to child
Adj = SwappedWith[ Graph->Offset + Adj ] - ChildGraph->Offset;
// Edge connects to node in this graph
if( 0 <= Adj && Adj < ChildGraph->Num )
{
ChildGraph->Adjacency.Add( Adj );
ChildGraph->AdjacencyCost.Add( AdjCost );
}
}
}
ChildGraphs[0]->AdjacencyOffset.Add( ChildGraphs[0]->Adjacency.Num() );
ChildGraphs[1]->AdjacencyOffset.Add( ChildGraphs[1]->Adjacency.Num() );
}
}
}
void FGraphPartitioner::RecursiveBisectGraph( FGraphData* Graph )
{
FGraphData* ChildGraphs[2];
BisectGraph( Graph, ChildGraphs );
delete Graph;
if( ChildGraphs[0] && ChildGraphs[1] )
{
RecursiveBisectGraph( ChildGraphs[0] );
RecursiveBisectGraph( ChildGraphs[1] );
}
}
void FGraphPartitioner::PartitionStrict( FGraphData* Graph, bool bThreaded )
{
PartitionIDs.AddUninitialized( NumElements );
SwappedWith.AddUninitialized( NumElements );
// Adding to atomically so size big enough to not need to grow.
int32 NumPartitionsExpected = FMath::DivideAndRoundUp( Graph->Num, MinPartitionSize );
Ranges.AddUninitialized( NumPartitionsExpected * 2 );
NumPartitions = 0;
if( bThreaded && NumPartitionsExpected > 4 )
{
TLocalWorkQueue<FGraphData> LocalWork(Graph);
LocalWork.Run(MakeYCombinator([this, &LocalWork](auto Self, FGraphData* Graph) -> void
{
FGraphData* ChildGraphs[2];
BisectGraph( Graph, ChildGraphs );
delete Graph;
if( ChildGraphs[0] && ChildGraphs[1] )
{
// Only spawn add a worker thread if remaining work is expected to be large enough
if (ChildGraphs[0]->Num > 256)
{
LocalWork.AddTask(ChildGraphs[0]);
LocalWork.AddWorkers(1);
}
else
{
Self(ChildGraphs[0]);
}
Self(ChildGraphs[1]);
}
}));
}
else
{
RecursiveBisectGraph( Graph );
}
Ranges.SetNum( NumPartitions );
if( bThreaded )
{
// Force a deterministic order
Ranges.Sort();
}
PartitionIDs.Empty();
SwappedWith.Empty();
for( uint32 i = 0; i < NumElements; i++ )
{
SortedTo[ Indexes[i] ] = i;
}
}