Files
UnrealEngine/Engine/Source/Runtime/MassEntity/Private/MassProcessorDependencySolver.cpp
2025-05-18 13:04:45 +08:00

1027 lines
41 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "MassProcessorDependencySolver.h"
#include "MassProcessingTypes.h"
#include "MassProcessor.h"
#include "MassTypeManager.h"
#include "Logging/MessageLog.h"
#include "HAL/FileManager.h"
#include "Engine/World.h"
#if WITH_MASSENTITY_DEBUG
#include "Algo/Count.h"
#endif // WITH_MASSENTITY_DEBUG
#define LOCTEXT_NAMESPACE "Mass"
DEFINE_LOG_CATEGORY_STATIC(LogMassDependencies, Warning, All);
namespace UE::Mass::Private
{
FString NameViewToString(TConstArrayView<FName> View)
{
if (View.Num() == 0)
{
return TEXT("[]");
}
FString ReturnVal = FString::Printf(TEXT("[%s"), *View[0].ToString());
for (int i = 1; i < View.Num(); ++i)
{
ReturnVal += FString::Printf(TEXT(", %s"), *View[i].ToString());
}
return ReturnVal + TEXT("]");
}
bool DoArchetypeContainersOverlap(TConstArrayView<FMassArchetypeHandle> A, const TArray<FMassArchetypeHandle>& B)
{
for (const FMassArchetypeHandle& HandleA : A)
{
if (B.Contains(HandleA))
{
return true;
}
}
return false;
}
#if WITH_MASSENTITY_DEBUG
void LogCycle(TArray<FMassProcessorDependencySolver::FNode> AllNodes, TConstArrayView<int32> CycleIndices, TArray<uint32>& InOutReportedCycleHashes)
{
check(CycleIndices.Num());
// We extract unique indices involved in the cycle below.
// Note that we want to preserve the order since it will provide more meaningful debugging context.
// But we do find the "lowest node index" so that we can generate a deterministic
// hash representing the cycle, regardless of which node was being processed when the cycle was found.
// We use this information to not report the same cycle multiple times.
// Finding the cycle start (the first node that has been encountered more than once)
int32 CycleStartElementIndex = INDEX_NONE;
for (const int32 CycleNodeIndex : CycleIndices)
{
if (Algo::Count(CycleIndices, CycleNodeIndex) > 1)
{
CycleStartElementIndex = CycleIndices.Find(CycleNodeIndex);
break;
}
}
// Finding the cycle length by finding the other occurence of cycle-starting node
const int32 CycleLength = MakeArrayView(&CycleIndices[CycleStartElementIndex + 1], CycleIndices.Num() - CycleStartElementIndex - 1).Find(CycleIndices[CycleStartElementIndex]) + 1;
check(CycleLength > 0);
TConstArrayView<int32> CycleView = MakeArrayView(&CycleIndices[CycleStartElementIndex], CycleLength);
// Find the deterministic cycle start, only used for hash generation.
const int32* MinElementIndex = Algo::MinElement(CycleView);
check(MinElementIndex);
const int32 LowestCycleElementIndex = CycleView.Find(*MinElementIndex);
// Calculate cycle's hash
int32 ElementIndex = LowestCycleElementIndex;
uint32 CycleHash = static_cast<uint32>(CycleView[ElementIndex++]);
for (int32 CycleCounter = 1; CycleCounter < CycleView.Num(); ++CycleCounter)
{
ElementIndex %= CycleView.Num();
CycleHash = HashCombine(CycleHash, CycleView[ElementIndex++]);
}
if (InOutReportedCycleHashes.Find(CycleHash) == INDEX_NONE)
{
InOutReportedCycleHashes.Add(CycleHash);
UE_LOG(LogMassDependencies, Error, TEXT("Detected processing dependency cycle:"));
for (const int32 CycleNodeIndex : CycleView)
{
if (const UMassProcessor* Processor = AllNodes[CycleNodeIndex].Processor)
{
UE_LOG(LogMassDependencies, Warning, TEXT("\t%s, group: %s, before: %s, after %s")
, *Processor->GetName()
, *Processor->GetExecutionOrder().ExecuteInGroup.ToString()
, *NameViewToString(Processor->GetExecutionOrder().ExecuteBefore)
, *NameViewToString(Processor->GetExecutionOrder().ExecuteAfter));
}
else
{
// group
UE_LOG(LogMassDependencies, Warning, TEXT("\tGroup %s"), *AllNodes[CycleNodeIndex].Name.ToString());
}
}
}
}
#endif // WITH_MASSENTITY_DEBUG
bool bProcessorExecutionPriorityEnabled = true;
bool bPickHigherPriorityNodesRegardlessOfRequirements = true;
namespace
{
FAutoConsoleVariableRef ConsoleVariables[] =
{
FAutoConsoleVariableRef(
TEXT("mass.dependencies.ProcessorExecutionPriorityEnabled"),
bProcessorExecutionPriorityEnabled,
TEXT("Controls whether UMassProcessor.ExecutionPriority value is being used during dependency calculations"),
ECVF_Default)
, FAutoConsoleVariableRef(
TEXT("mass.dependencies.PickHigherPriorityNodesRegardlessOfRequirements"),
bPickHigherPriorityNodesRegardlessOfRequirements,
TEXT("If enabled, will result in lower priority nodes not being picked, even if they could run without obstructing anything else"),
ECVF_Default)
};
}
}
//----------------------------------------------------------------------//
// FMassExecutionRequirements
//----------------------------------------------------------------------//
void FMassExecutionRequirements::Append(const FMassExecutionRequirements& Other)
{
for (int i = 0; i < EMassAccessOperation::MAX; ++i)
{
Fragments[i] += Other.Fragments[i];
ChunkFragments[i] += Other.ChunkFragments[i];
SharedFragments[i] += Other.SharedFragments[i];
RequiredSubsystems[i] += Other.RequiredSubsystems[i];
}
ConstSharedFragments.Read += Other.ConstSharedFragments.Read;
RequiredAllTags += Other.RequiredAllTags;
RequiredAnyTags += Other.RequiredAnyTags;
RequiredNoneTags += Other.RequiredNoneTags;
// note that we're deliberately ignoring optional tags, they play no role here.
// signal that it requires recalculation;
ResourcesUsedCount = INDEX_NONE;
}
void FMassExecutionRequirements::CountResourcesUsed()
{
ResourcesUsedCount = ConstSharedFragments.Read.CountStoredTypes();
for (int i = 0; i < EMassAccessOperation::MAX; ++i)
{
ResourcesUsedCount += Fragments[i].CountStoredTypes();
ResourcesUsedCount += ChunkFragments[i].CountStoredTypes();
ResourcesUsedCount += SharedFragments[i].CountStoredTypes();
ResourcesUsedCount += RequiredSubsystems[i].CountStoredTypes();
}
}
int32 FMassExecutionRequirements::GetTotalBitsUsedCount()
{
CountResourcesUsed();
return ResourcesUsedCount + RequiredAllTags.CountStoredTypes()
+ RequiredAnyTags.CountStoredTypes() + RequiredNoneTags.CountStoredTypes();
}
bool FMassExecutionRequirements::IsEmpty() const
{
return Fragments.IsEmpty() && ChunkFragments.IsEmpty()
&& SharedFragments.IsEmpty() && ConstSharedFragments.IsEmpty() && RequiredSubsystems.IsEmpty()
&& RequiredAllTags.IsEmpty() && RequiredAnyTags.IsEmpty() && RequiredNoneTags.IsEmpty();
}
FMassArchetypeCompositionDescriptor FMassExecutionRequirements::AsCompositionDescriptor() const
{
return FMassArchetypeCompositionDescriptor(Fragments.Read + Fragments.Write
, RequiredAllTags + RequiredAnyTags
, ChunkFragments.Read + ChunkFragments.Write
, SharedFragments.Read + SharedFragments.Write
, ConstSharedFragments.Read);
}
//----------------------------------------------------------------------//
// FProcessorDependencySolver::FResourceUsage
//----------------------------------------------------------------------//
FMassProcessorDependencySolver::FResourceUsage::FResourceUsage(const TArray<FNode>& InAllNodes)
: AllNodesView(InAllNodes)
{
for (int i = 0; i < EMassAccessOperation::MAX; ++i)
{
FragmentsAccess[i].Access.AddZeroed(FMassFragmentBitSet::GetMaxNum());
ChunkFragmentsAccess[i].Access.AddZeroed(FMassChunkFragmentBitSet::GetMaxNum());
SharedFragmentsAccess[i].Access.AddZeroed(FMassSharedFragmentBitSet::GetMaxNum());
RequiredSubsystemsAccess[i].Access.AddZeroed(FMassExternalSubsystemBitSet::GetMaxNum());
}
}
template<typename TBitSet>
void FMassProcessorDependencySolver::FResourceUsage::HandleElementType(TMassExecutionAccess<FResourceAccess>& ElementAccess
, const TMassExecutionAccess<TBitSet>& TestedRequirements, FMassProcessorDependencySolver::FNode& InOutNode, const int32 NodeIndex)
{
using UE::Mass::Private::DoArchetypeContainersOverlap;
// when considering subsystem access we don't care about archetypes, so we cache the information whether
// we're dealing with subsystems and use that to potentially short-circuit the checks below.
constexpr bool bSubsystems = std::is_same_v<TBitSet, FMassExternalSubsystemBitSet>;
// for every bit set in TestedRequirements we do the following:
// 1. For every read-only requirement we make InOutNode depend on the currently stored Writer of this resource
// - note that this operation is not destructive, meaning we don't destructively consume the data, since all
// subsequent read access to the given resource will also depend on the Writer
// - note 2: we also fine tune what we store as a dependency for InOutNode by checking if InOutNode's archetype
// overlap with whoever the current Writer is
// - this will result in InOutNode wait for the current Writer to finish before starting its own work and
// that's exactly what we need to do to avoid accessing data while it's potentially being written
// 2. For every read-write requirement we make InOutNode depend on all the readers and writers currently stored.
// - once that's done we clean currently stored Readers and Writers since every subsequent operation on this
// resource will be blocked by currently considered InOutNode (as the new Writer)
// - again, we do check corresponding archetype collections overlap
// - similarly to the read operation waiting on write operations in pt 1. we want to hold off the write
// operations to be performed by InOutNode until all currently registered (and conflicting) writers and readers
// are done with their operations
// 3. For all accessed resources we store information that InOutNode is accessing it
// - we do this so that the following nodes know that they'll have to wait for InOutNode if an access
// conflict arises.
// 1. For every read only requirement we make InOutNode depend on the currently stored Writer of this resource
for (auto It = TestedRequirements.Read.GetIndexIterator(); It; ++It)
{
for (int32 UserIndex : ElementAccess.Write.Access[*It].Users)
{
if (bSubsystems || DoArchetypeContainersOverlap(AllNodesView[UserIndex].ValidArchetypes, InOutNode.ValidArchetypes))
{
InOutNode.OriginalDependencies.Add(UserIndex);
}
}
}
// 2. For every read-write requirement we make InOutNode depend on all the readers and writers currently stored.
for (auto It = TestedRequirements.Write.GetIndexIterator(); It; ++It)
{
for (int32 i = ElementAccess.Read.Access[*It].Users.Num() - 1; i >= 0; --i)
{
const int32 UserIndex = ElementAccess.Read.Access[*It].Users[i];
if (bSubsystems || DoArchetypeContainersOverlap(AllNodesView[UserIndex].ValidArchetypes, InOutNode.ValidArchetypes))
{
InOutNode.OriginalDependencies.Add(UserIndex);
ElementAccess.Read.Access[*It].Users.RemoveAtSwap(i);
}
}
for (int32 i = ElementAccess.Write.Access[*It].Users.Num() - 1; i >= 0; --i)
{
const int32 UserIndex = ElementAccess.Write.Access[*It].Users[i];
if (bSubsystems || DoArchetypeContainersOverlap(AllNodesView[UserIndex].ValidArchetypes, InOutNode.ValidArchetypes))
{
InOutNode.OriginalDependencies.Add(UserIndex);
ElementAccess.Write.Access[*It].Users.RemoveAtSwap(i);
}
}
}
// 3. For all accessed resources we store information that InOutNode is accessing it
for (auto It = TestedRequirements.Read.GetIndexIterator(); It; ++It)
{
// mark Element at index indicated by It as being used in mode EMassAccessOperation(i) by NodeIndex
ElementAccess.Read.Access[*It].Users.Add(NodeIndex);
}
for (auto It = TestedRequirements.Write.GetIndexIterator(); It; ++It)
{
// mark Element at index indicated by It as being used in mode EMassAccessOperation(i) by NodeIndex
ElementAccess.Write.Access[*It].Users.Add(NodeIndex);
}
}
template<typename TBitSet>
bool FMassProcessorDependencySolver::FResourceUsage::CanAccess(const TMassExecutionAccess<TBitSet>& StoredElements, const TMassExecutionAccess<TBitSet>& TestedElements)
{
// see if there's an overlap of tested write operations with existing read & write operations, as well as
// tested read operations with existing write operations
return !(
// if someone's already writing to what I want to write
TestedElements.Write.HasAny(StoredElements.Write)
// or if someone's already reading what I want to write
|| TestedElements.Write.HasAny(StoredElements.Read)
// or if someone's already writing what I want to read
|| TestedElements.Read.HasAny(StoredElements.Write)
);
}
bool FMassProcessorDependencySolver::FResourceUsage::HasArchetypeConflict(TMassExecutionAccess<FResourceAccess> ElementAccess, const TArray<FMassArchetypeHandle>& InArchetypes) const
{
using UE::Mass::Private::DoArchetypeContainersOverlap;
// this function is being run when we've already determined there's an access conflict on given ElementsAccess,
// meaning whoever's asking is trying to access Elements that are already being used. We can still grant access
// though provided that none of the current users of Element access the same archetypes the querier does (as provided
// by InArchetypes).
// @todo this operation could be even more efficient and precise if we tracked which operation (read/write) and which
// specific Element were conflicting and the we could limit the check to that. That would however significantly
// complicate the code and would require a major refactor to keep things clean.
for (int i = 0; i < EMassAccessOperation::MAX; ++i)
{
for (const FResourceUsers& Resource : ElementAccess[i].Access)
{
for (const int32 UserIndex : Resource.Users)
{
if (DoArchetypeContainersOverlap(AllNodesView[UserIndex].ValidArchetypes, InArchetypes))
{
return true;
}
}
}
}
return false;
}
bool FMassProcessorDependencySolver::FResourceUsage::CanAccessRequirements(const FMassExecutionRequirements& TestedRequirements, const TArray<FMassArchetypeHandle>& InArchetypes) const
{
// note that on purpose we're not checking ConstSharedFragments - those are always only read, no danger of conflicting access
bool bCanAccess = (CanAccess<FMassFragmentBitSet>(Requirements.Fragments, TestedRequirements.Fragments) || !HasArchetypeConflict(FragmentsAccess, InArchetypes))
&& (CanAccess<FMassChunkFragmentBitSet>(Requirements.ChunkFragments, TestedRequirements.ChunkFragments) || !HasArchetypeConflict(ChunkFragmentsAccess, InArchetypes))
&& (CanAccess<FMassSharedFragmentBitSet>(Requirements.SharedFragments, TestedRequirements.SharedFragments) || !HasArchetypeConflict(SharedFragmentsAccess, InArchetypes))
&& CanAccess<FMassExternalSubsystemBitSet>(Requirements.RequiredSubsystems, TestedRequirements.RequiredSubsystems);
return bCanAccess;
}
void FMassProcessorDependencySolver::FResourceUsage::SubmitNode(const int32 NodeIndex, FNode& InOutNode)
{
HandleElementType<FMassFragmentBitSet>(FragmentsAccess, InOutNode.Requirements.Fragments, InOutNode, NodeIndex);
HandleElementType<FMassChunkFragmentBitSet>(ChunkFragmentsAccess, InOutNode.Requirements.ChunkFragments, InOutNode, NodeIndex);
HandleElementType<FMassSharedFragmentBitSet>(SharedFragmentsAccess, InOutNode.Requirements.SharedFragments, InOutNode, NodeIndex);
HandleElementType<FMassExternalSubsystemBitSet>(RequiredSubsystemsAccess, InOutNode.Requirements.RequiredSubsystems, InOutNode, NodeIndex);
// note that on purpose we're not pushing ConstSharedFragments - those are always only read, no danger of conflicting access
// so there's no point in tracking them
Requirements.Append(InOutNode.Requirements);
}
//----------------------------------------------------------------------//
// FProcessorDependencySolver::FNode
//----------------------------------------------------------------------//
bool FMassProcessorDependencySolver::FNode::IncreaseWaitingNodesCount(TArrayView<FMassProcessorDependencySolver::FNode> InAllNodes
, const int32 IterationsLimit, TArray<int32>& OutCycleIndices)
{
// cycle-protection check. If true it means we have a cycle and the whole algorithm result will be unreliable
if (IterationsLimit < 0)
{
OutCycleIndices.Add(NodeIndex);
return false;
}
++TotalWaitingNodes;
for (const int32 DependencyIndex : OriginalDependencies)
{
check(&InAllNodes[DependencyIndex] != this);
if (InAllNodes[DependencyIndex].IncreaseWaitingNodesCount(InAllNodes, IterationsLimit - 1, OutCycleIndices) == false)
{
OutCycleIndices.Add(NodeIndex);
return false;
}
}
return true;
}
bool FMassProcessorDependencySolver::FNode::IncreaseWaitingNodesCountAndPriority(TArrayView<FNode> InAllNodes, const int32 IterationsLimit
, TArray<int32>& OutCycleIndices, const int32 InChildPriority)
{
// cycle-protection check. If true it means we have a cycle and the whole algorithm result will be unreliable
if (IterationsLimit < 0)
{
OutCycleIndices.Add(NodeIndex);
return false;
}
++TotalWaitingNodes;
UpdateExecutionPriority(InChildPriority);
for (const int32 DependencyIndex : OriginalDependencies)
{
check(&InAllNodes[DependencyIndex] != this);
if (InAllNodes[DependencyIndex].IncreaseWaitingNodesCountAndPriority(InAllNodes, IterationsLimit - 1, OutCycleIndices, MaxExecutionPriority) == false)
{
OutCycleIndices.Add(NodeIndex);
return false;
}
}
return true;
}
//----------------------------------------------------------------------//
// FProcessorDependencySolver
//----------------------------------------------------------------------//
FMassProcessorDependencySolver::FMassProcessorDependencySolver(TArrayView<UMassProcessor* const> InProcessors, const bool bIsGameRuntime)
: Processors(InProcessors)
, bGameRuntime(bIsGameRuntime)
{}
bool FMassProcessorDependencySolver::PerformSolverStep(FResourceUsage& ResourceUsage, TArray<int32>& InOutIndicesRemaining, TArray<int32>& OutNodeIndices)
{
int32 AcceptedNodeIndex = INDEX_NONE;
int32 FallbackAcceptedNodeIndex = INDEX_NONE;
const int32 HighestPriority = AllNodes[InOutIndicesRemaining[0]].MaxExecutionPriority;
for (int32 i = 0; i < InOutIndicesRemaining.Num(); ++i)
{
const int32 NodeIndex = InOutIndicesRemaining[i];
if (AllNodes[NodeIndex].TransientDependencies.Num() == 0)
{
// if we're solving dependencies for a single thread use we don't need to fine-tune the order based on resources nor archetypes
if (bSingleThreadTarget || ResourceUsage.CanAccessRequirements(AllNodes[NodeIndex].Requirements, AllNodes[NodeIndex].ValidArchetypes))
{
AcceptedNodeIndex = NodeIndex;
break;
}
else if (FallbackAcceptedNodeIndex == INDEX_NONE)
{
// if none of the nodes left can "cleanly" execute (i.e. without conflicting with already stored nodes)
// we'll just pick this one up and go with it.
FallbackAcceptedNodeIndex = NodeIndex;
}
else if (UE::Mass::Private::bPickHigherPriorityNodesRegardlessOfRequirements
&& AllNodes[NodeIndex].MaxExecutionPriority < HighestPriority)
{
// subsequent nodes are of lower execution priority, we break now and will use FallbackAcceptedNodeIndex
check(FallbackAcceptedNodeIndex != INDEX_NONE);
checkf(UE::Mass::Private::bProcessorExecutionPriorityEnabled == true
, TEXT("We never expect to hit this case when execution priorities are disabled - all nodes should have the same priority."))
break;
}
}
}
if (AcceptedNodeIndex != INDEX_NONE || FallbackAcceptedNodeIndex != INDEX_NONE)
{
const int32 NodeIndex = AcceptedNodeIndex != INDEX_NONE ? AcceptedNodeIndex : FallbackAcceptedNodeIndex;
FNode& Node = AllNodes[NodeIndex];
// Note that this is not an unexpected event and will happen during every dependency solving. It's a part
// of the algorithm. We initially look for all the things we can run without conflicting with anything else.
// But that can't last forever, at some point we'll end up in a situation where every node left waits for
// something that has been submitted already. Then we just pick one of the waiting ones (the one indicated by
// FallbackAcceptedNodeIndex), "run it" and proceed.
UE_CLOG(AcceptedNodeIndex == INDEX_NONE, LogMassDependencies, Verbose, TEXT("No dependency-free node can be picked, due to resource requirements. Picking %s as the next node.")
, *Node.Name.ToString());
ResourceUsage.SubmitNode(NodeIndex, Node);
InOutIndicesRemaining.RemoveSingle(NodeIndex);
OutNodeIndices.Add(NodeIndex);
for (const int32 DependencyIndex : Node.OriginalDependencies)
{
Node.SequencePositionIndex = FMath::Max(Node.SequencePositionIndex, AllNodes[DependencyIndex].SequencePositionIndex);
}
++Node.SequencePositionIndex;
for (const int32 RemainingNodeIndex : InOutIndicesRemaining)
{
AllNodes[RemainingNodeIndex].TransientDependencies.RemoveSingleSwap(NodeIndex, EAllowShrinking::No);
}
return true;
}
return false;
}
void FMassProcessorDependencySolver::CreateSubGroupNames(FName InGroupName, TArray<FString>& SubGroupNames)
{
// the function will convert composite group name into a series of progressively more precise group names
// so "A.B.C" will result in ["A", "A.B", "A.B.C"]
SubGroupNames.Reset();
FString GroupNameAsString = InGroupName.ToString();
FString TopGroupName;
while (GroupNameAsString.Split(TEXT("."), &TopGroupName, &GroupNameAsString))
{
SubGroupNames.Add(TopGroupName);
}
SubGroupNames.Add(GroupNameAsString);
for (int i = 1; i < SubGroupNames.Num(); ++i)
{
SubGroupNames[i] = FString::Printf(TEXT("%s.%s"), *SubGroupNames[i - 1], *SubGroupNames[i]);
}
}
int32 FMassProcessorDependencySolver::CreateNodes(UMassProcessor& Processor)
{
check(Processor.GetClass());
// for processors supporting multiple instances we use processor name rather than processor's class name for
// dependency calculations. This makes the user responsible for fine-tuning per-processor dependencies.
const FName ProcName = Processor.ShouldAllowMultipleInstances()
? Processor.GetFName()
: Processor.GetClass()->GetFName();
if (const int32* NodeIndexPtr = NodeIndexMap.Find(ProcName))
{
if (Processor.ShouldAllowMultipleInstances())
{
UE_LOG(LogMassDependencies, Warning, TEXT("%hs Processor %s, name %s, already registered. This processor class does suport duplicates, but individual instances need to have a unique name.")
, __FUNCTION__, *Processor.GetFullName(), *ProcName.ToString());
}
else
{
UE_LOG(LogMassDependencies, Warning, TEXT("%hs Processor %s already registered. Duplicates are not supported by this processor class.")
, __FUNCTION__, *ProcName.ToString());
}
return *NodeIndexPtr;
}
const FMassProcessorExecutionOrder& ExecutionOrder = Processor.GetExecutionOrder();
// first figure out the groups so that the group nodes come before the processor nodes, this is required for child
// nodes to inherit group's dependencies like in scenarios where some processor required to ExecuteBefore a given group
int32 ParentGroupNodeIndex = INDEX_NONE;
if (ExecutionOrder.ExecuteInGroup.IsNone() == false)
{
TArray<FString> AllGroupNames;
CreateSubGroupNames(ExecutionOrder.ExecuteInGroup, AllGroupNames);
check(AllGroupNames.Num() > 0);
for (const FString& GroupName : AllGroupNames)
{
const FName GroupFName(GroupName);
int32* LocalGroupIndex = NodeIndexMap.Find(GroupFName);
// group name hasn't been encountered yet - create it
if (LocalGroupIndex == nullptr)
{
int32 NewGroupNodeIndex = AllNodes.Num();
NodeIndexMap.Add(GroupFName, NewGroupNodeIndex);
FNode& GroupNode = AllNodes.Add_GetRef({ GroupFName, nullptr, NewGroupNodeIndex });
// just ignore depending on the dummy "root" node
if (ParentGroupNodeIndex != INDEX_NONE)
{
GroupNode.OriginalDependencies.Add(ParentGroupNodeIndex);
AllNodes[ParentGroupNodeIndex].SubNodeIndices.Add(NewGroupNodeIndex);
}
ParentGroupNodeIndex = NewGroupNodeIndex;
}
else
{
ParentGroupNodeIndex = *LocalGroupIndex;
}
}
}
const int32 NodeIndex = AllNodes.Num();
NodeIndexMap.Add(ProcName, NodeIndex);
FNode& ProcessorNode = AllNodes.Add_GetRef({ ProcName, &Processor, NodeIndex });
ProcessorNode.ExecuteAfter.Append(ExecutionOrder.ExecuteAfter);
ProcessorNode.ExecuteBefore.Append(ExecutionOrder.ExecuteBefore);
Processor.ExportRequirements(ProcessorNode.Requirements);
// we're clearing out information about the thread-safe subsystems since
// we don't need to consider them while tracking subsystem access for thread-safety purposes
ProcessorNode.Requirements.RequiredSubsystems.Write -= MultiThreadedSystemsBitSet;
ProcessorNode.Requirements.RequiredSubsystems.Read -= MultiThreadedSystemsBitSet;
ProcessorNode.Requirements.CountResourcesUsed();
ProcessorNode.MaxExecutionPriority = UE::Mass::Private::bProcessorExecutionPriorityEnabled
? Processor.GetExecutionPriority()
: 0;
if (ParentGroupNodeIndex > 0)
{
AllNodes[ParentGroupNodeIndex].SubNodeIndices.Add(NodeIndex);
}
return NodeIndex;
}
void FMassProcessorDependencySolver::BuildDependencies()
{
// at this point we have collected all the known processors and groups in AllNodes so we can transpose
// A.ExecuteBefore(B) type of dependencies into B.ExecuteAfter(A)
for (int32 NodeIndex = 0; NodeIndex < AllNodes.Num(); ++NodeIndex)
{
for (int i = 0; i < AllNodes[NodeIndex].ExecuteBefore.Num(); ++i)
{
const FName BeforeDependencyName = AllNodes[NodeIndex].ExecuteBefore[i];
int32 DependentNodeIndex = INDEX_NONE;
int32* DependentNodeIndexPtr = NodeIndexMap.Find(BeforeDependencyName);
if (DependentNodeIndexPtr == nullptr)
{
// missing dependency. Adding a "dummy" node representing those to still support ordering based on missing groups or processors
// For example, if Processor A and B declare dependency, respectively, "Before C" and "After C" we still
// expect A to come before B regardless of whether C exists or not.
DependentNodeIndex = AllNodes.Num();
NodeIndexMap.Add(BeforeDependencyName, DependentNodeIndex);
AllNodes.Add({ BeforeDependencyName, nullptr, DependentNodeIndex });
UE_LOG(LogMassDependencies, Log, TEXT("Unable to find dependency \"%s\" declared by %s. Creating a dummy dependency node.")
, *BeforeDependencyName.ToString(), *AllNodes[NodeIndex].Name.ToString());
}
else
{
DependentNodeIndex = *DependentNodeIndexPtr;
}
check(AllNodes.IsValidIndex(DependentNodeIndex));
AllNodes[DependentNodeIndex].ExecuteAfter.Add(AllNodes[NodeIndex].Name);
}
AllNodes[NodeIndex].ExecuteBefore.Reset();
}
// at this point all nodes contain:
// - single "original dependency" pointing at its parent group
// - ExecuteAfter populated with node names
// Now, for every Name in ExecuteAfter we do the following:
// if Name represents a processor, add it as "original dependency"
// else, if Name represents a group:
// - append all group's child node names to ExecuteAfter
//
for (int32 NodeIndex = 0; NodeIndex < AllNodes.Num(); ++NodeIndex)
{
for (int i = 0; i < AllNodes[NodeIndex].ExecuteAfter.Num(); ++i)
{
const FName AfterDependencyName = AllNodes[NodeIndex].ExecuteAfter[i];
int32* PrerequisiteNodeIndexPtr = NodeIndexMap.Find(AfterDependencyName);
int32 PrerequisiteNodeIndex = INDEX_NONE;
if (PrerequisiteNodeIndexPtr == nullptr)
{
// missing dependency. Adding a "dummy" node representing those to still support ordering based on missing groups or processors
// For example, if Processor A and B declare dependency, respectively, "Before C" and "After C" we still
// expect A to come before B regardless of whether C exists or not.
PrerequisiteNodeIndex = AllNodes.Num();
NodeIndexMap.Add(AfterDependencyName, PrerequisiteNodeIndex);
AllNodes.Add({ AfterDependencyName, nullptr, PrerequisiteNodeIndex });
UE_LOG(LogMassDependencies, Log, TEXT("Unable to find dependency \"%s\" declared by %s. Creating a dummy dependency node.")
, *AfterDependencyName.ToString(), *AllNodes[NodeIndex].Name.ToString());
}
else
{
PrerequisiteNodeIndex = *PrerequisiteNodeIndexPtr;
}
const FNode& PrerequisiteNode = AllNodes[PrerequisiteNodeIndex];
if (PrerequisiteNode.IsGroup())
{
for (int32 SubNodeIndex : PrerequisiteNode.SubNodeIndices)
{
AllNodes[NodeIndex].ExecuteAfter.AddUnique(AllNodes[SubNodeIndex].Name);
}
}
else
{
AllNodes[NodeIndex].OriginalDependencies.AddUnique(PrerequisiteNodeIndex);
}
}
// if this node is a group push all the dependencies down on all the children
// by design all child nodes come after group nodes so the child nodes' dependencies have not been processed yet
if (AllNodes[NodeIndex].IsGroup() && AllNodes[NodeIndex].SubNodeIndices.Num())
{
for (int32 PrerequisiteNodeIndex : AllNodes[NodeIndex].OriginalDependencies)
{
checkSlow(PrerequisiteNodeIndex != NodeIndex);
// in case of processor nodes we can store it directly
if (AllNodes[PrerequisiteNodeIndex].IsGroup() == false)
{
for (int32 ChildNodeIndex : AllNodes[NodeIndex].SubNodeIndices)
{
AllNodes[ChildNodeIndex].OriginalDependencies.AddUnique(PrerequisiteNodeIndex);
}
}
// special case - if dependency is a group and we haven't processed that group yet, we need to add it by name
else if (PrerequisiteNodeIndex > NodeIndex)
{
const FName& PrerequisiteName = AllNodes[PrerequisiteNodeIndex].Name;
for (int32 ChildNodeIndex : AllNodes[NodeIndex].SubNodeIndices)
{
AllNodes[ChildNodeIndex].ExecuteAfter.AddUnique(PrerequisiteName);
}
}
}
}
}
}
void FMassProcessorDependencySolver::LogNode(const FNode& Node, int Indent)
{
using UE::Mass::Private::NameViewToString;
if (Node.IsGroup())
{
UE_LOG(LogMassDependencies, Log, TEXT("%*s%s before:%s after:%s"), Indent, TEXT(""), *Node.Name.ToString()
, *NameViewToString(Node.ExecuteBefore)
, *NameViewToString(Node.ExecuteAfter));
for (const int32 NodeIndex : Node.SubNodeIndices)
{
LogNode(AllNodes[NodeIndex], Indent + 4);
}
}
else
{
CA_ASSUME(Node.Processor); // as implied by Node.IsGroup() == false
UE_LOG(LogMassDependencies, Log, TEXT("%*s%s before:%s after:%s"), Indent, TEXT(""), *Node.Name.ToString()
, *NameViewToString(Node.Processor->GetExecutionOrder().ExecuteBefore)
, *NameViewToString(Node.Processor->GetExecutionOrder().ExecuteAfter));
}
}
void FMassProcessorDependencySolver::Solve(TArray<FMassProcessorOrderInfo>& OutResult)
{
using UE::Mass::Private::NameViewToString;
if (AllNodes.Num() == 0)
{
return;
}
// for more efficient cycle detection and breaking it will be useful to know how many
// nodes do not depend on anything - we can use this number as a limit for the longest dependency chain
int32 TotalDependingNodes = 0;
for (FNode& Node : AllNodes)
{
Node.TransientDependencies = Node.OriginalDependencies;
Node.TotalWaitingNodes = 0;
TotalDependingNodes += (Node.OriginalDependencies.Num() > 0) ? 1 : 0;
}
TArray<int32> CycleIndices;
#if WITH_MASSENTITY_DEBUG
TArray<uint32> ReportedCycleHashes;
#endif // WITH_MASSENTITY_DEBUG
TArray<int32> IndicesRemaining;
// @todo code duplication in this if-else block is temporary, will be reduced to one or the other
// once the bProcessorExecutionPriorityEnabled feature is accepted or removed.
if (UE::Mass::Private::bProcessorExecutionPriorityEnabled)
{
IndicesRemaining.Reserve(AllNodes.Num());
for (int32 NodeIndex = 0; NodeIndex < AllNodes.Num(); ++NodeIndex)
{
// skip all the group nodes, all group dependencies have already been converted to individual processor dependencies
if (AllNodes[NodeIndex].IsGroup() == false)
{
IndicesRemaining.Add(NodeIndex);
if (AllNodes[NodeIndex].IncreaseWaitingNodesCountAndPriority(AllNodes, TotalDependingNodes, CycleIndices) == false)
{
#if WITH_MASSENTITY_DEBUG
// we have a cycle. Report it here
UE::Mass::Private::LogCycle(AllNodes, CycleIndices, ReportedCycleHashes);
#endif // WITH_MASSENTITY_DEBUG
CycleIndices.Reset();
}
}
}
IndicesRemaining.Sort([this](const int32 IndexA, const int32 IndexB){
return AllNodes[IndexA].MaxExecutionPriority > AllNodes[IndexB].MaxExecutionPriority
|| (AllNodes[IndexA].MaxExecutionPriority == AllNodes[IndexB].MaxExecutionPriority
&& AllNodes[IndexA].TotalWaitingNodes > AllNodes[IndexB].TotalWaitingNodes);
});
}
else
{
IndicesRemaining.Reserve(AllNodes.Num());
for (int32 NodeIndex = 0; NodeIndex < AllNodes.Num(); ++NodeIndex)
{
// skip all the group nodes, all group dependencies have already been converted to individual processor dependencies
if (AllNodes[NodeIndex].IsGroup() == false)
{
IndicesRemaining.Add(NodeIndex);
if (AllNodes[NodeIndex].IncreaseWaitingNodesCount(AllNodes, TotalDependingNodes, CycleIndices) == false)
{
#if WITH_MASSENTITY_DEBUG
// we have a cycle. Report it here
UE::Mass::Private::LogCycle(AllNodes, CycleIndices, ReportedCycleHashes);
#endif // WITH_MASSENTITY_DEBUG
CycleIndices.Reset();
}
}
}
IndicesRemaining.Sort([this](const int32 IndexA, const int32 IndexB){
return AllNodes[IndexA].TotalWaitingNodes > AllNodes[IndexB].TotalWaitingNodes;
});
}
// this is where we'll be tracking what's being accessed by whom
FResourceUsage ResourceUsage(AllNodes);
TArray<int32> SortedNodeIndices;
SortedNodeIndices.Reserve(AllNodes.Num());
while (IndicesRemaining.Num())
{
const bool bStepSuccessful = PerformSolverStep(ResourceUsage, IndicesRemaining, SortedNodeIndices);
if (bStepSuccessful == false)
{
UE_LOG(LogMassDependencies, Error, TEXT("Encountered processing dependency cycle - cutting the chain at an arbitrary location."));
// remove first dependency
// note that if we're in a cycle handling scenario every node does have some dependencies left
const int32 DependencyNodeIndex = AllNodes[IndicesRemaining[0]].TransientDependencies.Pop(EAllowShrinking::No);
// we need to remove this dependency from original dependencies as well, otherwise we'll still have the cycle
// in the data being produces as a result of the whole algorithm
AllNodes[IndicesRemaining[0]].OriginalDependencies.Remove(DependencyNodeIndex);
}
}
// now we have the desired order in SortedNodeIndices. We have to traverse it to add to OutResult
for (int i = 0; i < SortedNodeIndices.Num(); ++i)
{
const int32 NodeIndex = SortedNodeIndices[i];
TArray<FName> DependencyNames;
for (const int32 DependencyIndex : AllNodes[NodeIndex].OriginalDependencies)
{
DependencyNames.AddUnique(AllNodes[DependencyIndex].Name);
}
// at this point we expect SortedNodeIndices to only point to regular processors (i.e. no groups)
if (ensure(AllNodes[NodeIndex].Processor != nullptr))
{
OutResult.Add({ AllNodes[NodeIndex].Name, AllNodes[NodeIndex].Processor, FMassProcessorOrderInfo::EDependencyNodeType::Processor, DependencyNames, AllNodes[NodeIndex].SequencePositionIndex });
}
}
}
void FMassProcessorDependencySolver::ResolveDependencies(TArray<FMassProcessorOrderInfo>& OutResult, TSharedPtr<FMassEntityManager> EntityManager, FMassProcessorDependencySolver::FResult* InOutOptionalResult)
{
TRACE_CPUPROFILER_EVENT_SCOPE_STR("Mass ResolveDependencies");
if (Processors.Num() == 0)
{
return;
}
FScopedCategoryAndVerbosityOverride LogOverride(TEXT("LogMass"), ELogVerbosity::Log);
if (InOutOptionalResult)
{
DependencyGraphFileName = InOutOptionalResult->DependencyGraphFileName;
}
UE_LOG(LogMassDependencies, Log, TEXT("Gathering dependencies data:"));
AllNodes.Reset();
NodeIndexMap.Reset();
// as the very first node we add a "root" node that represents the "top level group" and also simplifies the rest
// of the lookup code - if a processor declares it's in group None or depends on Node it we don't need to check that
// explicitly.
AllNodes.Add(FNode(FName(), nullptr, 0));
NodeIndexMap.Add(FName(), 0);
const bool bCreateVirtualArchetypes = (!EntityManager);
if (bCreateVirtualArchetypes)
{
// create FMassEntityManager instance that we'll use to sort out processors' overlaps
// the idea for this is that for every processor we have we create an archetype matching given processor's requirements.
// Once that's done we have a collection of "virtual" archetypes our processors expect. Then we ask every processor
// to cache the archetypes they'd accept, using processors' owned queries. The idea is that some of the nodes will
// end up with more than just the virtual archetype created for that specific node. The practice proved the idea correct.
EntityManager = MakeShareable(new FMassEntityManager());
}
GatherSubsystemInformation(EntityManager->GetTypeManager());
// gather the processors information first
for (UMassProcessor* Processor : Processors)
{
if (Processor == nullptr)
{
UE_LOG(LogMassDependencies, Warning, TEXT("%s nullptr found in Processors collection being processed"), ANSI_TO_TCHAR(__FUNCTION__));
continue;
}
const int32 ProcessorNodeIndex = CreateNodes(*Processor);
if (bCreateVirtualArchetypes)
{
// this line is a part of a nice trick we're doing here utilizing EntityManager's archetype creation based on
// what each processor expects, and EntityQuery's capability to cache archetypes matching its requirements (used below)
EntityManager->CreateArchetype(AllNodes[ProcessorNodeIndex].Requirements.AsCompositionDescriptor());
}
}
UE_LOG(LogMassDependencies, Verbose, TEXT("Pruning processors..."));
int32 PrunedProcessorsCount = 0;
for (FNode& Node : AllNodes)
{
if (Node.IsGroup() == false)
{
CA_ASSUME(Node.Processor); // as implied by Node.IsGroup() == false
const bool bDoQueryBasedPruning = Node.Processor->ShouldAllowQueryBasedPruning(bGameRuntime);
// we gather archetypes for processors that have queries OR allow query-based pruning.
// The main point of this condition is to allow calling GetArchetypesMatchingOwnedQueries
// on pruning-supporting processors, while having no queries - that will emmit a warning
// that will let the user know their processor is misconfigured.
// We do collect archetype information for the processors that never get pruned because we're
// using this information for the dependency calculations, regardless of ShouldAllowQueryBasedPruning
if (bDoQueryBasedPruning || Node.Processor->GetOwnedQueriesNum())
{
// for each processor-representing node we cache information on which archetypes among the once we've created
// above (see the EntityManager.CreateArchetype call in the previous loop) match this processor.
Node.Processor->GetArchetypesMatchingOwnedQueries(*EntityManager.Get(), Node.ValidArchetypes);
}
// prune the archetype-less processors
if (Node.ValidArchetypes.Num() == 0 && bDoQueryBasedPruning)
{
UE_LOG(LogMassDependencies, Verbose, TEXT("\t%s"), *Node.Processor->GetName());
if (InOutOptionalResult)
{
InOutOptionalResult->PrunedProcessors.Add(Node.Processor);
}
// clearing out the processor will result in the rest of the algorithm to treat it as a group - we still
// want to preserve the configured ExecuteBefore and ExecuteAfter dependencies
Node.Processor = nullptr;
++PrunedProcessorsCount;
}
}
}
UE_LOG(LogMassDependencies, Verbose, TEXT("Number of processors pruned: %d"), PrunedProcessorsCount);
check(AllNodes.Num());
LogNode(AllNodes[0]);
BuildDependencies();
// now none of the processor nodes depend on groups - we replaced these dependencies with depending directly
// on individual processors. However, we keep the group nodes around since we store the dependencies via index, so
// removing nodes would mess that up. Solve below ignores group nodes and OutResult will not have any groups once its done.
Solve(OutResult);
UE_LOG(LogMassDependencies, Verbose, TEXT("Dependency order:"));
for (const FMassProcessorOrderInfo& Info : OutResult)
{
UE_LOG(LogMassDependencies, Verbose, TEXT("\t%s"), *Info.Name.ToString());
}
int32 MaxSequenceLength = 0;
for (FNode& Node : AllNodes)
{
MaxSequenceLength = FMath::Max(MaxSequenceLength, Node.SequencePositionIndex);
}
UE_LOG(LogMassDependencies, Verbose, TEXT("Max sequence length: %d"), MaxSequenceLength);
if (InOutOptionalResult)
{
InOutOptionalResult->MaxSequenceLength = MaxSequenceLength;
InOutOptionalResult->ArchetypeDataVersion = EntityManager->GetArchetypeDataVersion();
}
}
bool FMassProcessorDependencySolver::IsResultUpToDate(const FMassProcessorDependencySolver::FResult& InResult, TSharedPtr<FMassEntityManager> EntityManager)
{
if (InResult.PrunedProcessors.Num() == 0
|| !EntityManager
|| InResult.ArchetypeDataVersion == EntityManager->GetArchetypeDataVersion())
{
return true;
}
// Would be more efficient if we had a common place where all processors live, both active and inactive, so that we can utilize those.
for (UMassProcessor* PrunedProcessor : InResult.PrunedProcessors)
{
if (PrunedProcessor && PrunedProcessor->DoesAnyArchetypeMatchOwnedQueries(*EntityManager.Get()))
{
return false;
}
}
return true;
}
void FMassProcessorDependencySolver::GatherSubsystemInformation(const UE::Mass::FTypeManager& TypeManager)
{
using namespace UE::Mass;
if (TypeManager.IsEmpty())
{
return;
}
for (FTypeManager::FSubsystemTypeConstIterator SubsystemTypeIterator = TypeManager.MakeSubsystemIterator(); SubsystemTypeIterator; ++SubsystemTypeIterator)
{
if (const FTypeInfo* TypeInfo = TypeManager.GetTypeInfo(*SubsystemTypeIterator))
{
const FSubsystemTypeTraits* SubsystemTraits = TypeInfo->GetAsSystemTraits();
check(SubsystemTraits);
if (SubsystemTraits->bThreadSafeWrite)
{
const UClass* SubsystemClass = SubsystemTypeIterator->GetClass();
check(SubsystemClass);
MultiThreadedSystemsBitSet.Add(*SubsystemClass);
}
}
}
}
#undef LOCTEXT_NAMESPACE