// Copyright Epic Games, Inc. All Rights Reserved. #include "GraphPartitioner.h" #include "Async/Async.h" #include "Async/LocalWorkQueue.h" #include "EngineLogs.h" #include "HAL/PlatformMemory.h" FGraphPartitioner::FGraphPartitioner( uint32 InNumElements, int32 InMinPartitionSize, int32 InMaxPartitionSize ) : NumElements( InNumElements ) , MinPartitionSize( InMinPartitionSize ) , MaxPartitionSize( InMaxPartitionSize ) { Indexes.AddUninitialized( NumElements ); for( uint32 i = 0; i < NumElements; i++ ) { Indexes[i] = i; } } FGraphPartitioner::FGraphData* FGraphPartitioner::NewGraph( uint32 NumAdjacency ) const { NumAdjacency += LocalityLinks.Num(); FGraphData* RESTRICT Graph = new FGraphPartitioner::FGraphData; Graph->Offset = 0; Graph->Num = NumElements; Graph->Adjacency.Reserve( NumAdjacency ); Graph->AdjacencyCost.Reserve( NumAdjacency ); Graph->AdjacencyOffset.AddUninitialized( NumElements + 1 ); return Graph; } void FGraphPartitioner::Partition( FGraphData* Graph ) { if( Graph->Num <= MaxPartitionSize ) { PartitionIDs.AddUninitialized( NumElements ); const int32 TargetPartitionSize = ( MinPartitionSize + MaxPartitionSize ) / 2; const int32 TargetNumPartitions = FMath::DivideAndRoundUp( Graph->Num, TargetPartitionSize ); idx_t NumConstraints = 1; idx_t NumParts = TargetNumPartitions; idx_t EdgesCut = 0; idx_t Options[ METIS_NOPTIONS ]; METIS_SetDefaultOptions( Options ); Options[ METIS_OPTION_UFACTOR ] = 200;//( 1000 * MaxPartitionSize * TargetNumPartitions ) / NumElements - 1000; //Options[ METIS_OPTION_NCUTS ] = 8; //Options[ METIS_OPTION_IPTYPE ] = METIS_IPTYPE_RANDOM; //Options[ METIS_OPTION_SEED ] = 17; //int r = METIS_PartGraphRecursive( int r = METIS_PartGraphKway( &Graph->Num, &NumConstraints, // number of balancing constraints Graph->AdjacencyOffset.GetData(), Graph->Adjacency.GetData(), NULL, // Vert weights NULL, // Vert sizes for computing the total communication volume Graph->AdjacencyCost.GetData(), // Edge weights &NumParts, NULL, // Target partition weight NULL, // Allowed load imbalance tolerance Options, &EdgesCut, PartitionIDs.GetData() ); if (r == METIS_ERROR_MEMORY) { UE_LOG(LogStaticMesh, Error, TEXT("Call to METIS_PartGraphKway() failed - error code: %d, Graph->Num: %d"), r, Graph->Num); // We can't get the precise allocation size, but Metis logs an error that contains the actual error. FPlatformMemory::OnOutOfMemory(0, 0); } if( ensure( r == METIS_OK ) ) { TArray< uint32 > ElementCount; ElementCount.AddZeroed( TargetNumPartitions ); for( uint32 i = 0; i < NumElements; i++ ) { ElementCount[ PartitionIDs[i] ]++; } uint32 Begin = 0; Ranges.AddUninitialized( TargetNumPartitions ); for( int32 PartitionIndex = 0; PartitionIndex < TargetNumPartitions; PartitionIndex++ ) { Ranges[ PartitionIndex ] = { Begin, Begin + ElementCount[ PartitionIndex ] }; Begin += ElementCount[ PartitionIndex ]; ElementCount[ PartitionIndex ] = 0; } TArray< uint32 > OldIndexes; Swap( Indexes, OldIndexes ); Indexes.AddUninitialized( NumElements ); for( uint32 i = 0; i < NumElements; i++ ) { uint32 PartitionIndex = PartitionIDs[i]; uint32 Offset = Ranges[ PartitionIndex ].Begin; uint32 Num = ElementCount[ PartitionIndex ]++; Indexes[ Offset + Num ] = OldIndexes[i]; } PartitionIDs.Empty(); } } else { // Single Ranges.Add( { 0, NumElements } ); } for( uint32 i = 0; i < NumElements; i++ ) { SortedTo[ Indexes[i] ] = i; } } void FGraphPartitioner::BisectGraph( FGraphData* Graph, FGraphData* ChildGraphs[2] ) { ChildGraphs[0] = nullptr; ChildGraphs[1] = nullptr; auto AddPartition = [ this ]( int32 Offset, int32 Num ) { FRange& Range = Ranges[ NumPartitions++ ]; Range.Begin = Offset; Range.End = Offset + Num; }; if( Graph->Num <= MaxPartitionSize ) { AddPartition( Graph->Offset, Graph->Num ); return; } const int32 TargetPartitionSize = ( MinPartitionSize + MaxPartitionSize ) / 2; const int32 TargetNumPartitions = FMath::Max( 2, FMath::DivideAndRoundNearest( Graph->Num, TargetPartitionSize ) ); check( Graph->AdjacencyOffset.Num() == Graph->Num + 1 ); idx_t NumConstraints = 1; idx_t NumParts = 2; idx_t EdgesCut = 0; real_t PartitionWeights[] = { float( TargetNumPartitions / 2 ) / TargetNumPartitions, 1.0f - float( TargetNumPartitions / 2 ) / TargetNumPartitions }; idx_t Options[ METIS_NOPTIONS ]; METIS_SetDefaultOptions( Options ); // Allow looser tolerance when at the higher levels. Strict balance isn't that important until it gets closer to partition sized. bool bLoose = TargetNumPartitions >= 128 || MaxPartitionSize / MinPartitionSize > 1; bool bSlow = Graph->Num < 4096; Options[ METIS_OPTION_UFACTOR ] = bLoose ? 200 : 1; //Options[ METIS_OPTION_NCUTS ] = Graph->Num < 1024 ? 8 : ( Graph->Num < 4096 ? 4 : 1 ); //Options[ METIS_OPTION_NCUTS ] = bSlow ? 4 : 1; //Options[ METIS_OPTION_NITER ] = bSlow ? 20 : 10; //Options[ METIS_OPTION_IPTYPE ] = METIS_IPTYPE_RANDOM; //Options[ METIS_OPTION_MINCONN ] = 1; int r = METIS_PartGraphRecursive( &Graph->Num, &NumConstraints, // number of balancing constraints Graph->AdjacencyOffset.GetData(), Graph->Adjacency.GetData(), NULL, // Vert weights NULL, // Vert sizes for computing the total communication volume Graph->AdjacencyCost.GetData(), // Edge weights &NumParts, PartitionWeights, // Target partition weight NULL, // Allowed load imbalance tolerance Options, &EdgesCut, PartitionIDs.GetData() + Graph->Offset ); if (r == METIS_ERROR_MEMORY) { UE_LOG(LogStaticMesh, Error, TEXT("Call to METIS_PartGraphRecursive() failed - error code: %d, Graph->Num: %d"), r, Graph->Num); // We can't get the precise allocation size, but Metis logs an error that contains the actual error. FPlatformMemory::OnOutOfMemory(0, 0); } checkf(r == METIS_OK, TEXT("Call to METIS_PartGraphRecursive() failed - error code: %d, Graph->Num: %d"), r, Graph->Num); { // In place divide the array // Both sides remain sorted but back is reversed. int32 Front = Graph->Offset; int32 Back = Graph->Offset + Graph->Num - 1; while( Front <= Back ) { while( Front <= Back && PartitionIDs[ Front ] == 0 ) { SwappedWith[ Front ] = Front; Front++; } while( Front <= Back && PartitionIDs[ Back ] == 1 ) { SwappedWith[ Back ] = Back; Back--; } if( Front < Back ) { Swap( Indexes[ Front ], Indexes[ Back ] ); SwappedWith[ Front ] = Back; SwappedWith[ Back ] = Front; Front++; Back--; } } int32 Split = Front; int32 Num[2]; Num[0] = Split - Graph->Offset; Num[1] = Graph->Offset + Graph->Num - Split; check( Num[0] > 0 ); check( Num[1] > 0 ); if( Num[0] <= MaxPartitionSize && Num[1] <= MaxPartitionSize ) { AddPartition( Graph->Offset, Num[0] ); AddPartition( Split, Num[1] ); } else { for( int32 i = 0; i < 2; i++ ) { ChildGraphs[i] = new FGraphData; ChildGraphs[i]->Adjacency.Reserve( Graph->Adjacency.Num() >> 1 ); ChildGraphs[i]->AdjacencyCost.Reserve( Graph->Adjacency.Num() >> 1 ); ChildGraphs[i]->AdjacencyOffset.Reserve( Num[i] + 1 ); ChildGraphs[i]->Num = Num[i]; } ChildGraphs[0]->Offset = Graph->Offset; ChildGraphs[1]->Offset = Split; for( int32 i = 0; i < Graph->Num; i++ ) { FGraphData* ChildGraph = ChildGraphs[ i >= ChildGraphs[0]->Num ]; ChildGraph->AdjacencyOffset.Add( ChildGraph->Adjacency.Num() ); int32 OrgIndex = SwappedWith[ Graph->Offset + i ] - Graph->Offset; for( idx_t AdjIndex = Graph->AdjacencyOffset[ OrgIndex ]; AdjIndex < Graph->AdjacencyOffset[ OrgIndex + 1 ]; AdjIndex++ ) { idx_t Adj = Graph->Adjacency[ AdjIndex ]; idx_t AdjCost = Graph->AdjacencyCost[ AdjIndex ]; // Remap to child Adj = SwappedWith[ Graph->Offset + Adj ] - ChildGraph->Offset; // Edge connects to node in this graph if( 0 <= Adj && Adj < ChildGraph->Num ) { ChildGraph->Adjacency.Add( Adj ); ChildGraph->AdjacencyCost.Add( AdjCost ); } } } ChildGraphs[0]->AdjacencyOffset.Add( ChildGraphs[0]->Adjacency.Num() ); ChildGraphs[1]->AdjacencyOffset.Add( ChildGraphs[1]->Adjacency.Num() ); } } } void FGraphPartitioner::RecursiveBisectGraph( FGraphData* Graph ) { FGraphData* ChildGraphs[2]; BisectGraph( Graph, ChildGraphs ); delete Graph; if( ChildGraphs[0] && ChildGraphs[1] ) { RecursiveBisectGraph( ChildGraphs[0] ); RecursiveBisectGraph( ChildGraphs[1] ); } } void FGraphPartitioner::PartitionStrict( FGraphData* Graph, bool bThreaded ) { PartitionIDs.AddUninitialized( NumElements ); SwappedWith.AddUninitialized( NumElements ); // Adding to atomically so size big enough to not need to grow. int32 NumPartitionsExpected = FMath::DivideAndRoundUp( Graph->Num, MinPartitionSize ); Ranges.AddUninitialized( NumPartitionsExpected * 2 ); NumPartitions = 0; if( bThreaded && NumPartitionsExpected > 4 ) { TLocalWorkQueue LocalWork(Graph); LocalWork.Run(MakeYCombinator([this, &LocalWork](auto Self, FGraphData* Graph) -> void { FGraphData* ChildGraphs[2]; BisectGraph( Graph, ChildGraphs ); delete Graph; if( ChildGraphs[0] && ChildGraphs[1] ) { // Only spawn add a worker thread if remaining work is expected to be large enough if (ChildGraphs[0]->Num > 256) { LocalWork.AddTask(ChildGraphs[0]); LocalWork.AddWorkers(1); } else { Self(ChildGraphs[0]); } Self(ChildGraphs[1]); } })); } else { RecursiveBisectGraph( Graph ); } Ranges.SetNum( NumPartitions ); if( bThreaded ) { // Force a deterministic order Ranges.Sort(); } PartitionIDs.Empty(); SwappedWith.Empty(); for( uint32 i = 0; i < NumElements; i++ ) { SortedTo[ Indexes[i] ] = i; } }