995 lines
39 KiB
C++
995 lines
39 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "GraphLoadBalance.h"
|
|
|
|
#include "Algo/MinElement.h"
|
|
#include "Algo/Sort.h"
|
|
#include "Algo/Transform.h"
|
|
#include "GraphReachability.h"
|
|
#include "Math/UnrealMathUtility.h"
|
|
#include "Misc/ScopeExit.h"
|
|
#include "Packing.h"
|
|
#include "SortSpecialCases.h"
|
|
|
|
namespace Algo::Graph
|
|
{
|
|
/** Implements ConstructLoadBalance. */
|
|
class FLoadBalanceBuilder
|
|
{
|
|
public:
|
|
FLoadBalanceBuilder(TConstArrayView<TConstArrayView<FVertex>> InGraph, int32 InNumBuckets,
|
|
TConstArrayView<TConstArrayView<FVertex>> InTransposeGraph,
|
|
TConstArrayView<TConstArrayView<FVertex>> InReachabilityGraph,
|
|
TArray<TArray<FVertex>>& OutAssignments, TArray<TArray<FVertex>>& OutRootAssignments);
|
|
|
|
void LoadBalance();
|
|
|
|
private:
|
|
/**
|
|
* Roots and vertices in a cluster of vertices, which is a transitively-closed set of vertices that should be
|
|
* assigned to a bucket as a group.
|
|
*
|
|
* Initially every cluster is one of the disjoint subgraphs of the ingraph, but we might need to split some of
|
|
* those clusters to balance the buckets.
|
|
*
|
|
* Also holds intermediate data used by PackClusters.
|
|
*/
|
|
struct FCluster
|
|
{
|
|
TArray<Algo::Graph::FVertex> Roots;
|
|
TArray<Algo::Graph::FVertex> Vertices;
|
|
int32 SplitGroup = INDEX_NONE;
|
|
};
|
|
|
|
/** An output bucket of vertices, represented as a collection of clusters. */
|
|
struct FBucket
|
|
{
|
|
TArray<int32> ClusterIndices;
|
|
int32 Size = 0;
|
|
|
|
void CalculateSize(TConstArrayView<FCluster> Clusters);
|
|
};
|
|
|
|
/** Helper enum for SplitCluster. Types of estimations for the results of merging a root into a cluster. */
|
|
enum class EEstimationType
|
|
{
|
|
Exact,
|
|
TightUpperBound,
|
|
LooseUpperBoundUniformIncrease,
|
|
};
|
|
/**
|
|
* Helper struct for SplitCluster. Records metrics about the results of a merge between a root's reachable vertices
|
|
* and a cluster's accumulated vertices. Calculating the merge is too expensive, and even updating estimates for
|
|
* the merge results for every root after every decision is too expensive, so this struct supports multiple levels
|
|
* of estimation.
|
|
*/
|
|
struct FRootAndClusterMergeData
|
|
{
|
|
/** The root being merged. Note the cluster being merged is implicit - the owner of this mergedata. */
|
|
FVertex Root = Algo::Graph::InvalidVertex;
|
|
/**
|
|
* UpperBound for how many vertices will reduce when merging this cluster.
|
|
* The definition of a vertex reducing is that it exists in both the cluster and the root and will therefore
|
|
* not cause an increase in cluster size when the two are merged.
|
|
* This field is an exact value rather than an estimate if bExact is true. It can be modified upwards based
|
|
* on changes to the size of the cluster.
|
|
*/
|
|
int32 ReductionUpperBound = 0;
|
|
/** Number of vertices in the root being merged. Copied here for spatial locality of the data when sorting. */
|
|
int32 RootSize = 0;
|
|
/** The size the cluster had the last time we estimated or modified ReductionUpperBound. */
|
|
int32 ClusterSizeWhenEstimated = 0;
|
|
/**
|
|
* The value of ReductionUpperBound the last time we calculated reduction exactly. Used in combination with
|
|
* the cluster's RootReductions list to recalculate the exact reduction without needing to compare vertices.
|
|
*/
|
|
int32 PreviousExactEstimate = 0;
|
|
/** Whether ReductionUpperBound is known to be exact or is an upperbound estimate. */
|
|
bool bExact = false;
|
|
|
|
/**
|
|
* Return whether A is a worse merge (e.g. lower reduction value) than B. We sort merges from worst to best
|
|
* so we can pop the best merge off the back of a TArray without any shifts.
|
|
*/
|
|
static bool IsWorse(FRootAndClusterMergeData& A, FRootAndClusterMergeData& B);
|
|
};
|
|
|
|
/** Helper struct for SplitCluster. Data about one of the clusters we are building up by merging root vertices. */
|
|
struct FMergeCluster
|
|
{
|
|
/** Roots that have been merged into the cluster. */
|
|
TArray<Algo::Graph::FVertex> Roots;
|
|
/** NumVertices-length bitarray specifying which vertices are in the cluster. */
|
|
TBitArray<> VertexInCluster;
|
|
/**
|
|
* NumVertices-length integer array specifying for each root vertex how many vertices reachable from that root
|
|
* have been added onto VertexInCluster since the last time RootReductions were consumed. This allows us to
|
|
* recalculate the number of reductions that will occur when merging the root into the cluster.
|
|
*/
|
|
TArray<int32> RootReductions;
|
|
/**
|
|
* An array of the mergedatas for merging every remaining root into the cluster.
|
|
* The array is sorted from worst to best estimated merge results. After each new root is committed we pop the
|
|
* root's mergedata off the end, and do some work to update the estimates for the remaining roots and restore
|
|
* the sortedness of the array.
|
|
*/
|
|
TArray<FRootAndClusterMergeData> MergeDatas;
|
|
|
|
/** Number of true bits in VertexInCluster, aka the number of vertices in the cluster. */
|
|
int32 VertexInClusterCount = 0;
|
|
};
|
|
|
|
/**
|
|
* Helper struct for SplitCluster. Metrics about the best merge for each cluster. We sort a list of these structs
|
|
* to find the best merge across all clusters.
|
|
*/
|
|
struct FClusterMergeSelectionData
|
|
{
|
|
/** The root being merged */
|
|
FVertex Root;
|
|
/* The cluster being merged */
|
|
FMergeCluster* Cluster;
|
|
/** The exact reduction value - @see FRootAndClusterMergeData for definition of reduction. */
|
|
int32 Reduction;
|
|
/** The post-merge size of the cluster. */
|
|
int32 NewSize;
|
|
/** The spread between the new max-sized cluster and the new min-sized cluster if this merge is selected. */
|
|
int32 Spread;
|
|
/** True iff the new spread is over the maximum allowed and the importance of the overage is not suppressed. */
|
|
bool bOverSpread;
|
|
|
|
/** Return whether A is a better merge to select than B */
|
|
static bool IsBetter(const FClusterMergeSelectionData& A, const FClusterMergeSelectionData& B);
|
|
};
|
|
|
|
void FindDisjointSubgraphs(TArray<FCluster>& OutDisjointSubgraphs);
|
|
void FindRoots(TArrayView<FCluster> InOutDisjointSubgraphs);
|
|
void PackClusters(TArray<FCluster>&& Clusters, TArray<FCluster>& OutBuckets);
|
|
/** Merge the clusters listed in each bucket into a single merged cluster for the bucket. */
|
|
void CreateBucketClusters(TConstArrayView<FBucket> Buckets, TConstArrayView<FCluster> Clusters,
|
|
TArray<FCluster>& OutBuckets);
|
|
TArray<FLoadBalanceBuilder::FCluster> SplitCluster(FCluster&& InCluster, int32 SplitSize, int32 DesiredSpread);
|
|
/** After growing the cluster, update estimates in the cluster's sorted list of MergeDatas and resort the list. */
|
|
void UpdateClusterMergeDatas(FMergeCluster& Cluster, TBitArray<>& RootAssigned);
|
|
/**
|
|
* Merges the vertices of the given cluster and root, optionally stores the merged results in cluster, and returns the
|
|
* number of vertices from root that reduced (were already present in cluster).
|
|
*/
|
|
int32 CalculateMergeResults(FMergeCluster& Cluster, FVertex Root, bool bWriteResultsToCluster);
|
|
/**
|
|
* Update the estimate for the given mergedata in the given cluster, using the given estimatetype, after growing
|
|
* the cluster. Report whether the update changed the ReductionUpperBound.
|
|
*/
|
|
void UpdateMergeDataEstimate(FMergeCluster& Cluster, FRootAndClusterMergeData& MergeData,
|
|
EEstimationType EstimationType, bool& bOutEstimateWasModified);
|
|
|
|
TConstArrayView<TConstArrayView<FVertex>> Graph;
|
|
TConstArrayView<TConstArrayView<FVertex>> TransposeGraph;
|
|
TConstArrayView<TConstArrayView<FVertex>> ReachabilityGraph;
|
|
/**
|
|
* Only valid during SplitCluster. List for each vertex of the cluster's roots that have the vertex in their
|
|
* reachability graph.
|
|
*/
|
|
TArray<TConstArrayView<FVertex>> ReachableByRootGraph;
|
|
TArray64<FVertex> ReachableByRootGraphBuffer;
|
|
/**
|
|
* Only valid during SplitCluster. Due to memory constraints not all roots can be reported in ReachableByRootGraph.
|
|
* This NumVertices-length array records whether the given vertex is a root and is in that graph.
|
|
*/
|
|
TBitArray<> RootInReachableByRootSet;
|
|
TBitArray<> VisitedScratch;
|
|
TArray<FVertex> StackScratch;
|
|
TArray<FRootAndClusterMergeData> UpdateMergeScratch;
|
|
TArray<TArray<FVertex>>& Assignments;
|
|
TArray<TArray<FVertex>>& RootAssignments;
|
|
int32 NumBuckets;
|
|
int32 NumVertices;
|
|
};
|
|
|
|
void ConstructLoadBalance(TConstArrayView<TConstArrayView<FVertex>> Graph, int32 NumBuckets,
|
|
TArray<TArray<FVertex>>& OutAssignments)
|
|
{
|
|
FLoadBalanceContext Context;
|
|
Context.Graph = Graph;
|
|
Context.NumBuckets = NumBuckets;
|
|
Context.OutAssignments = &OutAssignments;
|
|
ConstructLoadBalance(Context);
|
|
}
|
|
|
|
void ConstructLoadBalance(FLoadBalanceContext& Context)
|
|
{
|
|
check(Context.OutAssignments != nullptr);
|
|
check(Context.NumBuckets > 0);
|
|
|
|
TArray64<FVertex> ReachabilityGraphEdgesBuffer;
|
|
TArray<TConstArrayView<FVertex>> ReachabilityGraphBuffer;
|
|
TConstArrayView<TConstArrayView<FVertex>> ReachabilityGraph = Context.ReachabilityGraph;
|
|
TArray64<FVertex> TransposeGraphEdgesBuffer;
|
|
TArray<TConstArrayView<FVertex>> TransposeGraphBuffer;
|
|
TConstArrayView<TConstArrayView<FVertex>> TransposeGraph = Context.TransposeGraph;
|
|
TArray<TArray<FVertex>> RootAssignmentsBuffer;
|
|
TArray<TArray<FVertex>>* RootAssignments = Context.OutRootAssignments;
|
|
int32 NumVertices = Context.Graph.Num();
|
|
|
|
if (ReachabilityGraph.Num() != NumVertices)
|
|
{
|
|
ConstructReachabilityGraph(Context.Graph, ReachabilityGraphEdgesBuffer, ReachabilityGraphBuffer);
|
|
ReachabilityGraph = ReachabilityGraphBuffer;
|
|
}
|
|
if (TransposeGraph.Num() != NumVertices)
|
|
{
|
|
ConstructTransposeGraph(Context.Graph, TransposeGraphEdgesBuffer, TransposeGraphBuffer);
|
|
TransposeGraph = TransposeGraphBuffer;
|
|
}
|
|
if (RootAssignments == nullptr)
|
|
{
|
|
RootAssignments = &RootAssignmentsBuffer;
|
|
}
|
|
|
|
FLoadBalanceBuilder Builder(Context.Graph, Context.NumBuckets, TransposeGraph, ReachabilityGraph,
|
|
*Context.OutAssignments, *RootAssignments);
|
|
Builder.LoadBalance();
|
|
}
|
|
|
|
FLoadBalanceBuilder::FLoadBalanceBuilder(TConstArrayView<TConstArrayView<FVertex>> InGraph, int32 InNumBuckets,
|
|
TConstArrayView<TConstArrayView<FVertex>> InTransposeGraph,
|
|
TConstArrayView<TConstArrayView<FVertex>> InReachabilityGraph,
|
|
TArray<TArray<FVertex>>& OutAssignments, TArray<TArray<FVertex>>& OutRootAssignments)
|
|
: Graph(InGraph)
|
|
, TransposeGraph(InTransposeGraph)
|
|
, ReachabilityGraph(InReachabilityGraph)
|
|
, Assignments(OutAssignments)
|
|
, RootAssignments(OutRootAssignments)
|
|
, NumBuckets(InNumBuckets)
|
|
, NumVertices(InGraph.Num())
|
|
{
|
|
}
|
|
|
|
void FLoadBalanceBuilder::LoadBalance()
|
|
{
|
|
TArray<FCluster> DisjointSubgraphs;
|
|
FindDisjointSubgraphs(DisjointSubgraphs);
|
|
|
|
FindRoots(DisjointSubgraphs);
|
|
|
|
TArray<FCluster> Buckets;
|
|
PackClusters(MoveTemp(DisjointSubgraphs), Buckets);
|
|
|
|
Assignments.SetNum(NumBuckets, EAllowShrinking::No);
|
|
RootAssignments.SetNum(NumBuckets, EAllowShrinking::No);
|
|
for (int32 BucketIndex = 0; BucketIndex < NumBuckets; ++BucketIndex)
|
|
{
|
|
Assignments[BucketIndex] = MoveTemp(Buckets[BucketIndex].Vertices);
|
|
RootAssignments[BucketIndex] = MoveTemp(Buckets[BucketIndex].Roots);
|
|
}
|
|
}
|
|
|
|
void FLoadBalanceBuilder::FindDisjointSubgraphs(TArray<FCluster>& OutDisjointSubgraphs)
|
|
{
|
|
TArray<FCluster>& Subgraphs = OutDisjointSubgraphs;
|
|
Subgraphs.Reset();
|
|
TBitArray<>& Visited = VisitedScratch;
|
|
TArray<FVertex>& Stack = StackScratch;
|
|
|
|
Visited.Init(false, NumVertices);
|
|
Stack.Reset(NumVertices);
|
|
|
|
// While there are unvisted vertices, graphsearch from an arbitrary vertex on the union graph of Edges+References.
|
|
// All vertices found in that search are a maximal subgraph and one of our disjoint subgraphs.
|
|
for (FVertex RootVertex = 0; RootVertex < NumVertices; ++RootVertex)
|
|
{
|
|
if (Visited[RootVertex])
|
|
{
|
|
continue;
|
|
}
|
|
|
|
check(Stack.IsEmpty());
|
|
FCluster& Subgraph = Subgraphs.Emplace_GetRef();
|
|
Visited[RootVertex] = true;
|
|
Subgraph.Vertices.Add(RootVertex);
|
|
Stack.Add(RootVertex);
|
|
|
|
while (!Stack.IsEmpty())
|
|
{
|
|
FVertex Vertex = Stack.Pop(EAllowShrinking::No);
|
|
for (TConstArrayView<FVertex> Edges : { Graph[Vertex], TransposeGraph[Vertex] })
|
|
{
|
|
for (FVertex Edge : Edges)
|
|
{
|
|
if (!Visited[Edge])
|
|
{
|
|
Visited[Edge] = true;
|
|
Subgraph.Vertices.Add(Edge);
|
|
Stack.Add(Edge);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort the vertices in each Subgraph back into Root to Leaf order
|
|
for (FCluster& Subgraph : Subgraphs)
|
|
{
|
|
Algo::Sort(Subgraph.Vertices);
|
|
}
|
|
}
|
|
|
|
void FLoadBalanceBuilder::FindRoots(TArrayView<FCluster> InOutDisjointSubgraphs)
|
|
{
|
|
TBitArray<>& Visited = VisitedScratch;
|
|
|
|
// Since the subgraphs are disjoint, we can use the same Visited set for all of them without clearing it
|
|
// between subgraphs, and we can read reachability from the reachabilitygraph for the entire graph.
|
|
Visited.Init(false, NumVertices);
|
|
int32 NumMarkedVertices = 0;
|
|
|
|
for (FCluster& Subgraph : InOutDisjointSubgraphs)
|
|
{
|
|
// Roots include all the vertices of the subgraph that do not have any referencers, but they can also include
|
|
// vertices in a cycle, so to find them we we have to iteratively subtract reachable vertices from remaining.
|
|
int32 NumSubgraphVertices = Subgraph.Vertices.Num();
|
|
Subgraph.Roots.Reset(NumSubgraphVertices);
|
|
|
|
// The vertices are sorted in RootToLeaf order. Iterating from 0 to N-1 and ignoring any reachables from
|
|
// previous roots will mean that each new vertex is a root.
|
|
for (FVertex Root : Subgraph.Vertices)
|
|
{
|
|
if (Visited[Root])
|
|
{
|
|
continue;
|
|
}
|
|
Subgraph.Roots.Add(Root);
|
|
for (FVertex Reachable : ReachabilityGraph[Root])
|
|
{
|
|
if (!Visited[Reachable])
|
|
{
|
|
Visited[Reachable] = true;
|
|
++NumMarkedVertices;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
check(NumMarkedVertices == NumVertices); // The subgraphs should span the graph
|
|
}
|
|
|
|
void FLoadBalanceBuilder::FBucket::CalculateSize(TConstArrayView<FCluster> Clusters)
|
|
{
|
|
Size = 0;
|
|
for (int32 ClusterIndex : ClusterIndices)
|
|
{
|
|
Size += Clusters[ClusterIndex].Vertices.Num();
|
|
}
|
|
};
|
|
|
|
void FLoadBalanceBuilder::PackClusters(TArray<FCluster>&& InDisjointSubgraphs, TArray<FCluster>& OutBuckets)
|
|
{
|
|
// Our clusters initially are the disjoint subgraphs, but we might split some of the subgraphs into
|
|
// strongly-related but not disjoint Clusters. So for the rest of the algorithm we assume they are generalized
|
|
// clusters and might overlap.
|
|
TArray<FCluster>& Clusters = InDisjointSubgraphs;
|
|
TArray<FBucket> Buckets; // Buckets that we populate by calling ScheduleValues
|
|
Buckets.SetNum(NumBuckets);
|
|
|
|
if (NumBuckets < 2)
|
|
{
|
|
Buckets[0].ClusterIndices = Algo::RangeArray<TArray<int32>>(0, Clusters.Num());
|
|
Buckets[0].CalculateSize(Clusters);
|
|
CreateBucketClusters(Buckets, Clusters, OutBuckets);
|
|
return;
|
|
}
|
|
|
|
// MaxSpreadToNumVerticesRatio is a tuning variable used to specify how hard we want to look for an optimal
|
|
// solution. When the spread is less than this fraction of NumVertices, we stop looking.
|
|
constexpr double MaxSpreadToNumVerticesRatio = .1;
|
|
int32 MaxSpread = MaxSpreadToNumVerticesRatio * NumVertices;
|
|
TArray<TArray<int32>> BucketsClusterIndices; // Output from Algo::ScheduleValues
|
|
TArray<TArray<int32>> PackExclusionGroups; // Input to Algo::ScheduleValues
|
|
TArray<int32> ClusterCosts; // Input to Algo::ScheduleValues
|
|
Algo::Transform(Clusters, ClusterCosts, [](const FCluster& Cluster) { return Cluster.Vertices.Num(); });
|
|
int32 SplitAttempts = 0;
|
|
|
|
for (;;)
|
|
{
|
|
Algo::ScheduleValues(ClusterCosts, NumBuckets, PackExclusionGroups, BucketsClusterIndices);
|
|
for (int32 BucketIndex = 0; BucketIndex < NumBuckets; ++BucketIndex)
|
|
{
|
|
FBucket& Bucket = Buckets[BucketIndex];
|
|
Bucket.ClusterIndices = MoveTemp(BucketsClusterIndices[BucketIndex]);
|
|
Bucket.CalculateSize(Clusters);
|
|
}
|
|
Algo::Sort(Buckets, [](const FBucket& A, const FBucket& B) { return A.Size > B.Size; });
|
|
FBucket& BiggestBucket = Buckets[0];
|
|
int32 Spread = BiggestBucket.Size - Buckets.Last().Size;
|
|
if (Spread <= MaxSpread)
|
|
{
|
|
break;
|
|
}
|
|
if (SplitAttempts >= NumBuckets)
|
|
{
|
|
// Tried too many times; we should only need to split at most one cluster per bucket
|
|
break;
|
|
};
|
|
|
|
int32 SplitSize = NumBuckets;
|
|
// We can only split clusters up into roots. Splitting within a root does not help minimize the results because
|
|
// adding the vertex that is not the root to a bucket makes that bucket overlap the bucket containing the root
|
|
// of the vertex without adding any new vertices to the vertices spanned by the pair of buckets.
|
|
// From the clusters assigned to the biggest bucket, pick the smallest one that has >= SplitSize roots.
|
|
// If none have >= SplitSize roots, pick the smallest one with the largest number of roots and clamp the
|
|
// SplitSize to number of roots.
|
|
int32 IndexToSplit = *Algo::MinElement(BiggestBucket.ClusterIndices,
|
|
[&Clusters, SplitSize](int32 A, int32 B)
|
|
{
|
|
FCluster& ClusterA = Clusters[A];
|
|
FCluster& ClusterB = Clusters[B];
|
|
if ((ClusterA.SplitGroup == INDEX_NONE) != (ClusterB.SplitGroup == INDEX_NONE))
|
|
{
|
|
// Clusters resulting from a split are not allowed to be split a second time, because their fragments
|
|
// would reduce when merged into a single cluster in a bucket and the bucket size would not match
|
|
// the size we expected when packing. Push all clusters with a SplitGroup to the back of the sort.
|
|
return ClusterA.SplitGroup == INDEX_NONE;
|
|
}
|
|
bool bAHasEnoughRoots = ClusterA.Roots.Num() >= SplitSize;
|
|
if (bAHasEnoughRoots != (ClusterB.Roots.Num() >= SplitSize))
|
|
{
|
|
return bAHasEnoughRoots;
|
|
}
|
|
if (!bAHasEnoughRoots && ClusterA.Roots.Num() != ClusterB.Roots.Num())
|
|
{
|
|
return ClusterA.Roots.Num() < ClusterB.Roots.Num();
|
|
}
|
|
return ClusterA.Vertices.Num() < ClusterB.Vertices.Num();
|
|
});
|
|
FCluster& ClusterToSplit = Clusters[IndexToSplit];
|
|
if (ClusterToSplit.SplitGroup != INDEX_NONE)
|
|
{
|
|
// All elements were invalid; there is nothing we can split in the biggest bucket, which means there is
|
|
// nothing further we can split to reduce the spread
|
|
break;
|
|
}
|
|
SplitSize = FMath::Min(SplitSize, ClusterToSplit.Roots.Num());
|
|
if (SplitSize < 2)
|
|
{
|
|
// All elements in the biggest bucket had only a single root and are unsplittable; there is nothing further
|
|
// we can split to reduce the spread
|
|
break;
|
|
}
|
|
|
|
int32 NewBiggestBucketSize = BiggestBucket.Size - ClusterToSplit.Vertices.Num();
|
|
int32 NewMaximum = FMath::Max(NewBiggestBucketSize, Buckets[1].Size);
|
|
int32 NewMinimum = FMath::Min(NewBiggestBucketSize, Buckets.Last().Size);
|
|
int32 DesiredSpread = NewMaximum - NewMinimum + MaxSpread;
|
|
|
|
TArray<FCluster> NewClusters = SplitCluster(MoveTemp(ClusterToSplit), SplitSize, DesiredSpread);
|
|
|
|
// Remove the SplitCluster from wherever it is in the middle of Clusters, and shift down by one all of our
|
|
// indices in data that persists between loops that were pointing to clusters at a higher index.
|
|
Clusters.RemoveAt(IndexToSplit);
|
|
ClusterCosts.RemoveAt(IndexToSplit);
|
|
for (TArray<int32>& ExclusionGroup : PackExclusionGroups)
|
|
{
|
|
for (int32& ClusterIndex : ExclusionGroup)
|
|
{
|
|
// We should not have tried to split any cluster in an exclusiongroup
|
|
check(ClusterIndex != IndexToSplit);
|
|
if (ClusterIndex > IndexToSplit)
|
|
{
|
|
--ClusterIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
// SplitClusters from the same original Cluster are not allowed to be assigned to the same
|
|
// bucket, because they would merge and reduce the total size of that bucket and make our
|
|
// packing not balanced, so we create an exclusiongroup for each split.
|
|
TArray<int32>& ExclusionGroup = PackExclusionGroups.Emplace_GetRef();
|
|
check(NewClusters.Num() == SplitSize);
|
|
for (FCluster& Cluster : NewClusters)
|
|
{
|
|
ClusterCosts.Add(Cluster.Vertices.Num());
|
|
int32 ClusterIndex = Clusters.Num();
|
|
Cluster.SplitGroup = SplitAttempts;
|
|
ExclusionGroup.Add(ClusterIndex);
|
|
Clusters.Add(MoveTemp(Cluster));
|
|
}
|
|
++SplitAttempts;
|
|
}
|
|
|
|
CreateBucketClusters(Buckets, Clusters, OutBuckets);
|
|
}
|
|
|
|
void FLoadBalanceBuilder::CreateBucketClusters(TConstArrayView<FBucket> Buckets, TConstArrayView<FCluster> Clusters, TArray<FCluster>& OutBuckets)
|
|
{
|
|
OutBuckets.SetNum(NumBuckets, EAllowShrinking::No);
|
|
for (int32 BucketIndex = 0; BucketIndex < NumBuckets; ++BucketIndex)
|
|
{
|
|
const FBucket& Bucket = Buckets[BucketIndex];
|
|
FCluster& OutBucket = OutBuckets[BucketIndex];
|
|
int32 NumRoots = 0;
|
|
for (int32 ClusterIndex : Bucket.ClusterIndices)
|
|
{
|
|
NumRoots += Clusters[ClusterIndex].Roots.Num();
|
|
}
|
|
OutBucket.Vertices.Reset(Bucket.Size);
|
|
OutBucket.Roots.Reset(NumRoots);
|
|
for (int32 ClusterIndex : Bucket.ClusterIndices)
|
|
{
|
|
const FCluster& Cluster = Clusters[ClusterIndex];
|
|
OutBucket.Vertices.Append(Cluster.Vertices);
|
|
OutBucket.Roots.Append(Cluster.Roots);
|
|
}
|
|
Algo::Sort(OutBucket.Vertices);
|
|
OutBucket.Vertices.SetNum(Algo::Unique(OutBucket.Vertices), EAllowShrinking::No );
|
|
Algo::Sort(OutBucket.Roots);
|
|
OutBucket.Roots.SetNum(Algo::Unique(OutBucket.Roots), EAllowShrinking::No);
|
|
}
|
|
}
|
|
|
|
template <typename RangeType, typename MinimumsRangeType, typename ProjectedElementType, typename ProjectionType>
|
|
void GetMaxAndMins(RangeType&& Range, ProjectedElementType& OutMaximum, MinimumsRangeType&& InOutMinimums,
|
|
ProjectionType Proj)
|
|
{
|
|
int32 NumMinimums = GetNum(InOutMinimums);
|
|
int32 NumRange = GetNum(Range);
|
|
check(NumRange >= NumMinimums && NumMinimums > 0);
|
|
OutMaximum = Proj(Range[0]);
|
|
InOutMinimums[0] = OutMaximum;
|
|
int32 Index;
|
|
for (Index = 1; Index < NumMinimums; ++Index)
|
|
{
|
|
ProjectedElementType Element = Proj(Range[Index]);
|
|
InOutMinimums[Index] = Element;
|
|
OutMaximum = FMath::Max(OutMaximum, Element);
|
|
}
|
|
Algo::Sort(InOutMinimums);
|
|
for (; Index < NumRange; ++Index)
|
|
{
|
|
ProjectedElementType Element = Proj(Range[Index]);
|
|
if (Element < InOutMinimums.Last())
|
|
{
|
|
InOutMinimums.Last() = Element;
|
|
Algo::RestoreSort(InOutMinimums, NumMinimums - 1, TLess<ProjectedElementType>());
|
|
}
|
|
OutMaximum = FMath::Max(OutMaximum, Element);
|
|
}
|
|
}
|
|
|
|
TArray<FLoadBalanceBuilder::FCluster> FLoadBalanceBuilder::SplitCluster(FCluster&& InCluster, int32 SplitSize, int32 DesiredSpread)
|
|
{
|
|
check(SplitSize >= 2);
|
|
|
|
// Create the ReachedByRoot graph edges for all vertices, so we know which other roots to update when the
|
|
// vertices of a root are merged into one of the output clusters. There might be a large number of roots
|
|
// and so the ReachedByRoot graph might be too large to fit in memory; restrict it in size to 1000*NumVertices.
|
|
constexpr int32 ReachableByRootEdgesPerVertexLimit = 1000;
|
|
ReachableByRootGraph.Reset();
|
|
ReachableByRootGraphBuffer.Reset();
|
|
TArray<FVertex> RootsInGraph;
|
|
ConstructPartialTransposeGraph(ReachabilityGraph, InCluster.Roots,
|
|
ReachableByRootEdgesPerVertexLimit * NumVertices, ReachableByRootGraphBuffer,
|
|
ReachableByRootGraph, RootsInGraph);
|
|
RootInReachableByRootSet.Init(false, NumVertices);
|
|
for (FVertex Root : RootsInGraph)
|
|
{
|
|
RootInReachableByRootSet[Root] = true;
|
|
}
|
|
ON_SCOPE_EXIT
|
|
{
|
|
ReachableByRootGraph.Empty();
|
|
ReachableByRootGraphBuffer.Empty();
|
|
RootInReachableByRootSet.Empty();
|
|
UpdateMergeScratch.Empty();
|
|
};
|
|
|
|
// Create RootAssigned arraymap to specify whether each root is assigned
|
|
TBitArray<> RootAssigned;
|
|
RootAssigned.SetNumUninitialized(NumVertices);
|
|
RootAssigned.SetRange(0, NumVertices, false);
|
|
int32 NumRemainingRoots = InCluster.Roots.Num();
|
|
|
|
// Create one MergeCluster per splitsize
|
|
int32 NumClusters = SplitSize;
|
|
TArray<FMergeCluster> Clusters;
|
|
Clusters.Reserve(NumClusters);
|
|
for (int32 ClusterIndex = 0; ClusterIndex < NumClusters; ++ClusterIndex)
|
|
{
|
|
FMergeCluster& Cluster = Clusters.Emplace_GetRef();
|
|
Cluster.VertexInCluster.Init(false, NumVertices);
|
|
Cluster.RootReductions.SetNumZeroed(NumVertices);
|
|
}
|
|
|
|
// Assign the biggest root to output cluster 0. Its better for the algorithm to know where it's headed so it can
|
|
// prefer to merge in roots that reduce well with that biggest root. We cannot seed the other buckets however,
|
|
// because we don't know which other roots will end up NOT being assigned to the bucket with the biggest root.
|
|
{
|
|
FVertex BestRoot = *Algo::MinElement(InCluster.Roots, [this](FVertex A, FVertex B)
|
|
{ return ReachabilityGraph[A].Num() > ReachabilityGraph[B].Num(); });
|
|
|
|
check(!RootAssigned[BestRoot]);
|
|
RootAssigned[BestRoot] = true;
|
|
--NumRemainingRoots;
|
|
Clusters[0].Roots.Add(BestRoot);
|
|
|
|
CalculateMergeResults(Clusters[0], BestRoot, true /* bWriteResultsToCluster */);
|
|
}
|
|
|
|
// Initialize the MergeDatas in each cluster; for better performance do this after assigning the intial seed.
|
|
for (FMergeCluster& Cluster : Clusters)
|
|
{
|
|
TArray<FRootAndClusterMergeData>& MergeDatas = Cluster.MergeDatas;
|
|
MergeDatas.Reserve(NumRemainingRoots);
|
|
for (FVertex Root : InCluster.Roots)
|
|
{
|
|
if (!RootAssigned[Root])
|
|
{
|
|
FRootAndClusterMergeData& MergeData = MergeDatas.Emplace_GetRef();
|
|
MergeData.Root = Root;
|
|
MergeData.RootSize = ReachabilityGraph[Root].Num();
|
|
bool bModified;
|
|
UpdateMergeDataEstimate(Cluster, MergeData, EEstimationType::TightUpperBound, bModified);
|
|
}
|
|
}
|
|
Algo::Sort(MergeDatas, FRootAndClusterMergeData::IsWorse);
|
|
}
|
|
|
|
// The main loop: on each iteration of the loop find the best root to move into the best cluster,
|
|
// where best is defined by maximizing reduction and minimizing spread. Assign that best root to the best cluster.
|
|
int32 BalancedAmountPerCluster = InCluster.Vertices.Num() / NumClusters;
|
|
TArray<FClusterMergeSelectionData> SelectionDatas;
|
|
while (NumRemainingRoots > 0)
|
|
{
|
|
for (FMergeCluster& Cluster : Clusters)
|
|
{
|
|
// We need to update every Cluster's MergeDatas on every loop, rather than just the one that won the last
|
|
// loop, in case the best root in a non-winning cluster was the same as the winner's.
|
|
UpdateClusterMergeDatas(Cluster, RootAssigned);
|
|
}
|
|
int32 MaxClusterSize;
|
|
int32 MinClusterSizes[2];
|
|
GetMaxAndMins(Clusters, MaxClusterSize, TArrayView<int32>(MinClusterSizes),
|
|
[&Clusters](const FMergeCluster& Cluster) { return Cluster.VertexInClusterCount; });
|
|
SelectionDatas.Reset();
|
|
|
|
for (FMergeCluster& Cluster : Clusters)
|
|
{
|
|
FRootAndClusterMergeData& MergeData = Cluster.MergeDatas.Last();
|
|
FClusterMergeSelectionData& SelectionData = SelectionDatas.Emplace_GetRef();
|
|
SelectionData.Root = MergeData.Root;
|
|
SelectionData.Cluster = &Cluster;
|
|
|
|
int32 OldSize = Cluster.VertexInClusterCount;
|
|
check(MergeData.bExact&& MergeData.ClusterSizeWhenEstimated == OldSize);
|
|
SelectionData.Reduction = MergeData.ReductionUpperBound;
|
|
SelectionData.NewSize = OldSize + MergeData.RootSize - SelectionData.Reduction;
|
|
|
|
int32 NewMaximum = FMath::Max(MaxClusterSize, SelectionData.NewSize);
|
|
int32 NewMinimum;
|
|
if (OldSize == MinClusterSizes[0])
|
|
{
|
|
NewMinimum = FMath::Min(SelectionData.NewSize, MinClusterSizes[1]);
|
|
}
|
|
else
|
|
{
|
|
NewMinimum = MinClusterSizes[0];
|
|
}
|
|
SelectionData.Spread = NewMaximum - NewMinimum;
|
|
bool bCausedSpread = NewMaximum > MaxClusterSize;
|
|
bool bSpreadIsAProblem = NewMaximum > BalancedAmountPerCluster;
|
|
bool bSpreadIsOver = SelectionData.Spread > DesiredSpread;
|
|
SelectionData.bOverSpread = bCausedSpread && bSpreadIsAProblem && bSpreadIsOver;
|
|
}
|
|
|
|
FClusterMergeSelectionData* BestData = Algo::MinElement(SelectionDatas, FClusterMergeSelectionData::IsBetter);
|
|
FMergeCluster& BestCluster = *BestData->Cluster;
|
|
int32 Reduction = CalculateMergeResults(BestCluster, BestData->Root, true /* bWriteResultsToCluster */);
|
|
check(Reduction == BestData->Reduction);
|
|
BestCluster.Roots.Add(BestData->Root);
|
|
check(!RootAssigned[BestData->Root]);
|
|
RootAssigned[BestData->Root] = true;
|
|
--NumRemainingRoots;
|
|
}
|
|
|
|
TArray<FCluster> OutClusters;
|
|
OutClusters.Reserve(Clusters.Num());
|
|
for (FMergeCluster& Cluster : Clusters)
|
|
{
|
|
FCluster& OutCluster = OutClusters.Emplace_GetRef();
|
|
OutCluster.Vertices.Empty(Cluster.VertexInClusterCount);
|
|
for (FVertex Vertex = 0; Vertex < NumVertices; ++Vertex)
|
|
{
|
|
if (Cluster.VertexInCluster[Vertex])
|
|
{
|
|
OutCluster.Vertices.Add(Vertex);
|
|
}
|
|
}
|
|
OutCluster.Roots = MoveTemp(Cluster.Roots);
|
|
}
|
|
return OutClusters;
|
|
}
|
|
|
|
void FLoadBalanceBuilder::UpdateClusterMergeDatas(FMergeCluster& Cluster, TBitArray<>& RootAssigned)
|
|
{
|
|
TArray<FRootAndClusterMergeData>& MergeDatas = Cluster.MergeDatas;
|
|
check(!MergeDatas.IsEmpty());
|
|
|
|
int32 OriginalMergeDataNum = MergeDatas.Num();
|
|
int32 RemovedMergedDataNum = 0;
|
|
|
|
// Pop mergedatas for completed roots off the back of the list as we encounter them
|
|
// Set BestMergeData to the first mergedata found for a remaining root
|
|
FRootAndClusterMergeData BestMergeData = MergeDatas.Pop(EAllowShrinking::No);
|
|
while (RootAssigned[BestMergeData.Root])
|
|
{
|
|
++RemovedMergedDataNum;
|
|
check(!MergeDatas.IsEmpty()); // We only update when there are remaining roots, so it should never be empty
|
|
BestMergeData = MergeDatas.Pop(EAllowShrinking::No);
|
|
}
|
|
|
|
// Update the mergedata to get the exact value; usually this will be less than the upperbound and we need to
|
|
// compare exact values when choosing the best. The mergedata at the back of the list is the estimated best and
|
|
// is likely to be the actual best, so its a good first mergedata to make exact.
|
|
bool bModified;
|
|
UpdateMergeDataEstimate(Cluster, BestMergeData, EEstimationType::Exact, bModified);
|
|
|
|
// Collect all the mergedatas from the back of the list until we reach a point in the list where the
|
|
// (uniform-upperbound) estimates for all remaining mergedatas are worse than the (tighter-bound) estimate for
|
|
// the worst merge we've found so far.
|
|
UpdateMergeScratch.Reset(MergeDatas.Num());
|
|
TOptional<FRootAndClusterMergeData> WorstMergeData;
|
|
for (;;)
|
|
{
|
|
// Continue to pop off mergedatas for completed roots as we find them.
|
|
while (!MergeDatas.IsEmpty() && RootAssigned[MergeDatas.Last().Root])
|
|
{
|
|
++RemovedMergedDataNum;
|
|
MergeDatas.Pop(EAllowShrinking::No);
|
|
}
|
|
if (MergeDatas.IsEmpty())
|
|
{
|
|
// This happens e.g. when only one root remains
|
|
break;
|
|
}
|
|
|
|
// Compare the new end of the list. Update its estimate using LooseUpperBoundUniformIncrease.
|
|
// (1) This is the most conservative estimate, so if it's worse than any of the estimates or exact values we've
|
|
// found so far, we know its tighter estimate will also be worse.
|
|
// (2) This is a uniform estimate: it raises all up-to-date estimate types of all roots by the same amount.
|
|
// So since the roots from this point and earlier in the list were previously sorted by their estimate,
|
|
// the sort order will be unchanged even if we bring them all up-to-date with LooseUpperBoundUniformIncrease
|
|
// estimation, and further, their non-up-to-date value will be <= their up-to-date value.
|
|
//
|
|
// These two conditions are sufficient to guarantee that we don't have inspect mergedatas earlier in the list
|
|
// to correctly find the best merge or to resort the list once the end of the list has a worse estimate than
|
|
// our current worst.
|
|
{
|
|
FRootAndClusterMergeData& CompareMergeData = MergeDatas.Last();
|
|
FRootAndClusterMergeData* WorstMergeDataPtr = WorstMergeData.IsSet() ?
|
|
&WorstMergeData.GetValue() : &BestMergeData;
|
|
UpdateMergeDataEstimate(Cluster, CompareMergeData, EEstimationType::LooseUpperBoundUniformIncrease,
|
|
bModified);
|
|
if (!FRootAndClusterMergeData::IsWorse(*WorstMergeDataPtr, CompareMergeData))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// NewMergeData's most-conservative estimate is as good or better than the worst estimate we've found
|
|
// so far. Sort it into the (partially sorted) list of mergedatas we're collecting: either it's the best,
|
|
// or the worse, or it's in the otherwise-unsorted middle.
|
|
// Sorting the middle at the end is faster than sorting it as we go because it avoids shifts.
|
|
// NewMergeData doesn't get to count itself as the best unless its estimate has been made exact, but don't
|
|
// spend time making the estimate exact unless we have to.
|
|
FRootAndClusterMergeData NewMergeData = MergeDatas.Pop(EAllowShrinking::No);
|
|
|
|
bool bIsNewBest = false;
|
|
bool bIsNewWorst = false;
|
|
// If WorstMergeData is not set then we already compared NewMergeData to BestMergeData above and do not
|
|
// need to redo the comparison
|
|
if (!WorstMergeData.IsSet() || FRootAndClusterMergeData::IsWorse(BestMergeData, NewMergeData))
|
|
{
|
|
// Change to the tighter estimate and recompare
|
|
UpdateMergeDataEstimate(Cluster, NewMergeData, EEstimationType::TightUpperBound, bModified);
|
|
// If the NewMergeData's value was not modified when we changed to a tighter estimate, then we
|
|
// know it is still better than the BestMergeData and we do not need to compare again
|
|
if (!bModified || FRootAndClusterMergeData::IsWorse(BestMergeData, NewMergeData))
|
|
{
|
|
// Change to the tightest estimate (the exact value) and recompare
|
|
UpdateMergeDataEstimate(Cluster, NewMergeData, EEstimationType::Exact, bModified);
|
|
bIsNewBest = !bModified || FRootAndClusterMergeData::IsWorse(BestMergeData, NewMergeData);
|
|
}
|
|
// NewMergeData was previously better than the worst mergedata, but we changed it to a TightUpperBound
|
|
// or Exact value so it might be worse now. It can't be worse than the worst if it's better than the best.
|
|
bIsNewWorst = !bIsNewBest && (!WorstMergeData.IsSet() || FRootAndClusterMergeData::IsWorse(NewMergeData, *WorstMergeData));
|
|
}
|
|
if (bIsNewBest)
|
|
{
|
|
if (!WorstMergeData.IsSet())
|
|
{
|
|
WorstMergeData.Emplace(MoveTemp(BestMergeData));
|
|
}
|
|
else
|
|
{
|
|
UpdateMergeScratch.Add(MoveTemp(BestMergeData));
|
|
}
|
|
BestMergeData = MoveTemp(NewMergeData);
|
|
}
|
|
else if (bIsNewWorst)
|
|
{
|
|
if (WorstMergeData.IsSet())
|
|
{
|
|
UpdateMergeScratch.Add(MoveTemp(*WorstMergeData));
|
|
}
|
|
WorstMergeData.Emplace(MoveTemp(NewMergeData));
|
|
}
|
|
else
|
|
{
|
|
UpdateMergeScratch.Add(MoveTemp(NewMergeData));
|
|
}
|
|
}
|
|
|
|
// Push all the mergedatas we pulled off the list back onto the list, sorted from worst to best
|
|
if (WorstMergeData.IsSet())
|
|
{
|
|
MergeDatas.Add(*WorstMergeData);
|
|
}
|
|
if (!UpdateMergeScratch.IsEmpty())
|
|
{
|
|
Algo::Sort(UpdateMergeScratch, FRootAndClusterMergeData::IsWorse);
|
|
for (FRootAndClusterMergeData& Updated : UpdateMergeScratch)
|
|
{
|
|
MergeDatas.Add(MoveTemp(Updated));
|
|
}
|
|
UpdateMergeScratch.Reset();
|
|
}
|
|
MergeDatas.Add(MoveTemp(BestMergeData));
|
|
check(MergeDatas.Num() == OriginalMergeDataNum - RemovedMergedDataNum);
|
|
}
|
|
|
|
int32 FLoadBalanceBuilder::CalculateMergeResults(FMergeCluster& Cluster, FVertex Root, bool bWriteResultsToCluster)
|
|
{
|
|
int32 Reduction = 0;
|
|
if (bWriteResultsToCluster)
|
|
{
|
|
TArray<int32>& RootReductions = Cluster.RootReductions;
|
|
for (FVertex Reachable : ReachabilityGraph[Root])
|
|
{
|
|
if (!Cluster.VertexInCluster[Reachable])
|
|
{
|
|
Cluster.VertexInCluster[Reachable] = true;
|
|
// When we add a vertex to the cluster, we need to inform every remainingroot in the cluster that has
|
|
// that vertex in its reachability set that the number of reductions it will have when merged into the
|
|
// cluster has increased by one.
|
|
for (FVertex ReachedByRoot : ReachableByRootGraph[Reachable])
|
|
{
|
|
++RootReductions[ReachedByRoot];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
++Reduction;
|
|
}
|
|
}
|
|
Cluster.VertexInClusterCount += ReachabilityGraph[Root].Num() - Reduction;
|
|
}
|
|
else
|
|
{
|
|
for (FVertex Reachable : ReachabilityGraph[Root])
|
|
{
|
|
Reduction += Cluster.VertexInCluster[Reachable] ? 1 : 0;
|
|
}
|
|
}
|
|
return Reduction;
|
|
}
|
|
|
|
void FLoadBalanceBuilder::UpdateMergeDataEstimate(FMergeCluster& Cluster, FRootAndClusterMergeData& MergeData,
|
|
EEstimationType EstimationType, bool& bOutEstimateWasModified)
|
|
{
|
|
int32 ClusterSize = Cluster.VertexInClusterCount;
|
|
MergeData.bExact = MergeData.bExact & (ClusterSize == MergeData.ClusterSizeWhenEstimated);
|
|
if (MergeData.bExact)
|
|
{
|
|
bOutEstimateWasModified = false;
|
|
return;
|
|
}
|
|
|
|
int32 PreviousReductionUpperBound = MergeData.ReductionUpperBound;
|
|
if (EstimationType == EEstimationType::Exact ||
|
|
(EstimationType != EEstimationType::LooseUpperBoundUniformIncrease &&
|
|
RootInReachableByRootSet[MergeData.Root]))
|
|
{
|
|
if (RootInReachableByRootSet[MergeData.Root])
|
|
{
|
|
// We have data for every vertex about whether it is reachable by the root, and we use that data
|
|
// every time we merge a vertex into a cluster, to increment the reduction count of every root
|
|
// that includes the vertex.
|
|
// This allows this function to do a cheap update of the exact value of the root's reduction:
|
|
// we just consume the recorded delta reduction value of this root and add it to the reduction it
|
|
// had the last time we updated it.
|
|
TArray<int32>& RootReductions = Cluster.RootReductions;
|
|
MergeData.ReductionUpperBound = MergeData.PreviousExactEstimate + RootReductions[MergeData.Root];
|
|
RootReductions[MergeData.Root] = 0;
|
|
}
|
|
else
|
|
{
|
|
// We don't have the reachedby data for this root, so we have to do the slow calculation of the
|
|
// merge results
|
|
MergeData.ReductionUpperBound = CalculateMergeResults(Cluster, MergeData.Root,
|
|
false /* bWriteResultsToCluster */);
|
|
}
|
|
MergeData.PreviousExactEstimate = MergeData.ReductionUpperBound;
|
|
MergeData.bExact = true;
|
|
}
|
|
else
|
|
{
|
|
int32 LooseEstimate = MergeData.ReductionUpperBound + ClusterSize - MergeData.ClusterSizeWhenEstimated;
|
|
if (EstimationType == EEstimationType::TightUpperBound)
|
|
{
|
|
// TightUpperBound assumes all new vertices in the cluster will reduce with this one,
|
|
// but notes that the number of reductions is <= size of this root
|
|
MergeData.ReductionUpperBound = FMath::Min(LooseEstimate, MergeData.RootSize);
|
|
}
|
|
else
|
|
{
|
|
check(EstimationType == EEstimationType::LooseUpperBoundUniformIncrease);
|
|
// LooseUpperBoundUniformIncrease is the same as TightUpperBound, but without the clamping
|
|
// It has two properties useful for our sorted array:
|
|
// 1) UpperBoundUniformIncrease(CurrentCluster) >= UpperBoundUniformIncrease(PreviousSmallerCluster)
|
|
// for all MergeDatas and all growth from PreviousSmallerCluster to CurrentCluster
|
|
// 2) UpperBoundUniformIncrease(CurrentCluster, MergeDataA) - UpperBoundUniformIncrease(PreviousSmallerCluster, MergeDataA) ==
|
|
// UpperBoundUniformIncrease(CurrentCluster, MergeDataB) - UpperBoundUniformIncrease(PreviousSmallerCluster, MergeDataB)
|
|
// for all MergeDataA, MergeDataB
|
|
// These two properties allow us to know we do not need to investigate MergeDatas with an UpperBoundUniformIncrease estimate
|
|
// worse than our current worst estimate.
|
|
MergeData.ReductionUpperBound = LooseEstimate;
|
|
}
|
|
}
|
|
MergeData.ClusterSizeWhenEstimated = ClusterSize;
|
|
bOutEstimateWasModified = MergeData.ReductionUpperBound != PreviousReductionUpperBound;
|
|
};
|
|
|
|
bool FLoadBalanceBuilder::FRootAndClusterMergeData::IsWorse(FRootAndClusterMergeData& A, FRootAndClusterMergeData& B)
|
|
{
|
|
if (A.ReductionUpperBound != B.ReductionUpperBound)
|
|
{
|
|
// First maximize reduction, aka maximize sharing
|
|
return A.ReductionUpperBound < B.ReductionUpperBound;
|
|
}
|
|
if (A.RootSize != B.RootSize)
|
|
{
|
|
// If the reduction is the same, prefer to merge the smaller root; if we can reduce
|
|
// a smaller root by the same amount as a larger root, we should take the smaller root because it
|
|
// adds fewer unreduced vertices
|
|
return A.RootSize > B.RootSize;
|
|
}
|
|
// Prefer earlier root values if all else is equal.
|
|
// TODO: This improves the splitting, I'm not sure why.
|
|
return A.Root > B.Root;
|
|
};
|
|
|
|
bool FLoadBalanceBuilder::FClusterMergeSelectionData::IsBetter(const FClusterMergeSelectionData& A,
|
|
const FClusterMergeSelectionData& B)
|
|
{
|
|
if (A.bOverSpread != B.bOverSpread)
|
|
{
|
|
// If a merge goes over the spread, don't use it unless all merges go over the spread
|
|
return !A.bOverSpread;
|
|
}
|
|
if (A.bOverSpread && A.Spread != B.Spread)
|
|
{
|
|
// If all merges go over the spread, pick the one that minimizes the resultant spread
|
|
return A.Spread < B.Spread;
|
|
}
|
|
if (A.Reduction != B.Reduction)
|
|
{
|
|
// When spread is not a factor, we want to maximize reduction, aka maximize sharing
|
|
return A.Reduction > B.Reduction;
|
|
}
|
|
// If the reduction is the same, prefer the merge with the smaller final size; if we can reduce
|
|
// a smaller root by the same amount as a larger root, we should take the smaller root because it
|
|
// adds fewer unreduced vertices
|
|
return A.NewSize < B.NewSize;
|
|
}
|
|
|
|
}
|
|
|