// Copyright Epic Games, Inc. All Rights Reserved. #include "MuT/CodeOptimiser.h" #include "MuT/ErrorLog.h" #include "MuT/AST.h" #include "MuT/ASTOpInstanceAdd.h" #include "MuT/ASTOpConditional.h" #include "MuT/ASTOpSwitch.h" #include "MuT/ASTOpConstantResource.h" #include "MuT/ASTOpConstantBool.h" #include "MuT/ASTOpConstantColor.h" #include "MuT/ASTOpMeshApplyShape.h" #include "MuT/ASTOpMeshBindShape.h" #include "MuT/ASTOpMeshClipMorphPlane.h" #include "MuT/ASTOpMeshApplyPose.h" #include "MuT/ASTOpMeshPrepareLayout.h" #include "MuT/ASTOpMeshAddMetadata.h" #include "MuT/ASTOpImageRasterMesh.h" #include "MuT/ASTOpReferenceResource.h" #include "MuR/ModelPrivate.h" #include "MuR/SystemPrivate.h" #include "MuR/Operations.h" #include "MuR/OpMeshMerge.h" #include "MuR/MutableRuntimeModule.h" #include "Tasks/Task.h" #include "Math/NumericLimits.h" namespace mu { namespace { struct FMeshEntry { TSharedPtr Mesh; Ptr Op; bool operator==(const FMeshEntry& o) const { return Mesh == o.Mesh || *Mesh ==*o.Mesh; } }; struct FImageEntry { TSharedPtr Image; Ptr Op; bool operator==(const FImageEntry& o) const { return Image == o.Image || *Image == *o.Image; } }; struct FLayoutEntry { TSharedPtr< const FLayout> Layout; Ptr Op; bool operator==(const FLayoutEntry& o) const { return Layout == o.Layout || *Layout == *o.Layout; } }; struct custom_mesh_equal { bool operator()( const TSharedPtr& a, const TSharedPtr& b ) const { return a==b || *a==*b; } }; struct custom_image_equal { bool operator()( const TSharedPtr& a, const TSharedPtr& b ) const { return a==b || *a==*b; } }; struct custom_layout_equal { bool operator()( const TSharedPtr& a, const TSharedPtr& b ) const { return a==b || *a==*b; } }; } bool DuplicatedDataRemoverAST( ASTOpList& roots ) { MUTABLE_CPUPROFILER_SCOPE(DuplicatedDataRemoverAST); TArray> AllMeshOps; TArray> AllImageOps; TArray> AllLayoutOps; bool bModified = false; // Gather constants { MUTABLE_CPUPROFILER_SCOPE(Gather); ASTOp::Traverse_TopRandom_Unique_NonReentrant( roots, [&](Ptr n) { switch ( n->GetOpType() ) { case EOpType::ME_CONSTANT: { ASTOpConstantResource* typedNode = static_cast(n.get()); AllMeshOps.Add(typedNode); break; } case EOpType::IM_CONSTANT: { ASTOpConstantResource* typedNode = static_cast(n.get()); AllImageOps.Add(typedNode); break; } case EOpType::LA_CONSTANT: { ASTOpConstantResource* typedNode = static_cast(n.get()); AllLayoutOps.Add(typedNode); break; } // These should be part of the duplicated code removal, in AST. // // Names // case EOpType::IN_ADDMESH: // case EOpType::IN_ADDIMAGE: // case EOpType::IN_ADDVECTOR: // case EOpType::IN_ADDSCALAR: // case EOpType::IN_ADDCOMPONENT: // case EOpType::IN_ADDSURFACE: default: break; } return true; }); } // Compare meshes { MUTABLE_CPUPROFILER_SCOPE(CompareMeshes); TMultiMap< SIZE_T, FMeshEntry > Meshes; for (Ptr& typedNode : AllMeshOps) { SIZE_T Key = typedNode->GetValueHash(); Ptr Found; TArray> Candidates; Meshes.MultiFindPointer(Key, Candidates, false); if (!Candidates.IsEmpty()) { TSharedPtr mesh = StaticCastSharedPtr(typedNode->GetValue()); for (FMeshEntry* It : Candidates) { if (!It->Mesh) { It->Mesh = StaticCastSharedPtr(It->Op->GetValue()); } if (custom_mesh_equal()(mesh, It->Mesh)) { Found = It->Op; break; } } } if (Found) { ASTOp::Replace(typedNode, Found); bModified = true; } else { // The mesh will be loaded only if it needs to be compared FMeshEntry e; e.Op = typedNode; Meshes.Add(Key, e); } } } // Compare images { MUTABLE_CPUPROFILER_SCOPE(CompareImages); TMultiMap< SIZE_T, FImageEntry > Images; for (Ptr& typedNode : AllImageOps) { SIZE_T Key = typedNode->GetValueHash(); Ptr Found; TArray> Candidates; Images.MultiFindPointer(Key, Candidates, false); if (!Candidates.IsEmpty()) { TSharedPtr image = StaticCastSharedPtr(typedNode->GetValue()); for (FImageEntry* It: Candidates) { if (!It->Image) { It->Image = StaticCastSharedPtr(It->Op->GetValue()); } if (custom_image_equal()(image, It->Image)) { Found = It->Op; break; } } } if (Found) { ASTOp::Replace(typedNode, Found); bModified = true; } else { // The image will be loaded only if it needs to be compared FImageEntry e; e.Op = typedNode; Images.Add(Key, e); } } } // Compare layouts { MUTABLE_CPUPROFILER_SCOPE(CompareLayouts); TMultiMap< SIZE_T, FLayoutEntry > Layouts; for (Ptr& typedNode : AllLayoutOps) { SIZE_T Key = typedNode->GetValueHash(); Ptr Found; TArray> Candidates; Layouts.MultiFindPointer(Key, Candidates, false); if (!Candidates.IsEmpty()) { TSharedPtr layout = StaticCastSharedPtr(typedNode->GetValue()); for (FLayoutEntry* It : Candidates) { if (!It->Layout) { It->Layout = StaticCastSharedPtr(It->Op->GetValue()); } if (custom_layout_equal()(layout, It->Layout)) { Found = It->Op; break; } } } if (Found) { ASTOp::Replace(typedNode, Found); bModified = true; } else { FLayoutEntry e; e.Op = typedNode; Layouts.Add(Key, e); } } } return bModified; } bool DuplicatedCodeRemoverAST( ASTOpList& roots ) { MUTABLE_CPUPROFILER_SCOPE(DuplicatedCodeRemoverAST); bool bModified = false; struct FKeyFuncs : BaseKeyFuncs, Ptr, false> { static KeyInitType GetSetKey(ElementInitType Element) { return Element; } static bool Matches(const Ptr& lhs, const Ptr& rhs) { return lhs == rhs || *lhs == *rhs; } static uint32 GetKeyHash(const Ptr& Key) { return Key->Hash(); } }; // Visited nodes, per type TSet, FKeyFuncs, TInlineSetAllocator<32>> Visited[int32(EOpType::COUNT)]; ASTOp::Traverse_BottomUp_Unique_NonReentrant( roots, [&bModified,&Visited, &roots] (Ptr& n) { TSet, FKeyFuncs, TInlineSetAllocator<32>>& Container = Visited[(int32)n->GetOpType()]; // Insert will tell us if it was already there bool bIsAlreadyInSet = false; Ptr& Found = Container.FindOrAdd(n, &bIsAlreadyInSet); if( bIsAlreadyInSet) { // It wasn't inserted, so it was already there ASTOp::Replace(n, Found); // Is it one of the roots? Then we also need to update it for (Ptr& Root : roots) { if (Root == n) { Root = Found; } } bModified = true; } }); return bModified; } class FConstantTask { public: // input Ptr Source; FProxyFileContext* DiskCacheContext = nullptr; int32 ImageCompressionQuality = 0; int32 OptimizationPass = 0; FReferencedMeshResourceFunc ReferencedMeshResourceProvider; FReferencedImageResourceFunc ReferencedImageResourceProvider; // Intermediate Ptr SourceCloned; // Result Ptr Result; public: FConstantTask( const Ptr& InSource, const CompilerOptions::Private* InOptions, int32 InOptimizationPass ) { OptimizationPass = InOptimizationPass; Source = InSource; DiskCacheContext = InOptions->OptimisationOptions.DiskCacheContext; ImageCompressionQuality = InOptions->ImageCompressionQuality; ReferencedMeshResourceProvider = InOptions->OptimisationOptions.ReferencedMeshResourceProvider; ReferencedImageResourceProvider = InOptions->OptimisationOptions.ReferencedImageResourceProvider; } void Run(FImageOperator ImOp) { MUTABLE_CPUPROFILER_SCOPE(ConstantTask_Run); // This runs in a worker thread EOpType type = SourceCloned->GetOpType(); EDataType DataType = GetOpDataType(type); FSettings Settings; Settings.SetProfile( false ); Settings.SetImageCompressionQuality( ImageCompressionQuality ); TSharedPtr System = MakeShared(Settings); System->GetPrivate()->ImagePixelFormatOverride = ImOp.FormatImageOverride; FSourceDataDescriptor SourceDataDescriptor; if (DataType == EDataType::Image || DataType == EDataType::Mesh) { SourceDataDescriptor = SourceCloned->GetSourceDataDescriptor(); check(!SourceDataDescriptor.IsInvalid()); } // Don't generate mips during linking here. FLinkerOptions LinkerOptions(ImOp); LinkerOptions.MinTextureResidentMipCount = 255; LinkerOptions.bSeparateImageMips = false; TSharedPtr model = MakeShared(); OP::ADDRESS at = ASTOp::FullLink(SourceCloned, model->GetPrivate()->Program, &LinkerOptions); FProgram::FState state; state.Root = at; model->GetPrivate()->Program.States.Add(state); TSharedPtr LocalParams = FModel::NewParameters(model); System->GetPrivate()->BeginBuild( model ); // Calculate the value and replace this op by a constant switch( DataType ) { case EDataType::Mesh: { MUTABLE_CPUPROFILER_SCOPE(ConstantMesh); TSharedPtr MeshBuild = System->GetPrivate()->BuildMesh(model, LocalParams.Get(), at, EMeshContentFlags::AllFlags); if (MeshBuild) { mu::Ptr ConstantOp = new ASTOpConstantResource(); ConstantOp->SourceDataDescriptor = SourceDataDescriptor; ConstantOp->Type = EOpType::ME_CONSTANT; ConstantOp->SetValue(MeshBuild, DiskCacheContext); Result = ConstantOp; } break; } case EDataType::Image: { MUTABLE_CPUPROFILER_SCOPE(ConstantImage); TSharedPtr pImage = System->GetPrivate()->BuildImage( model, LocalParams.Get(), at, 0, 0 ); if (pImage) { mu::Ptr ConstantOp = new ASTOpConstantResource(); ConstantOp->SourceDataDescriptor = SourceDataDescriptor; ConstantOp->Type = EOpType::IM_CONSTANT; ConstantOp->SetValue( pImage, DiskCacheContext ); Result = ConstantOp; } break; } case EDataType::Layout: { MUTABLE_CPUPROFILER_SCOPE(ConstantLayout); TSharedPtr pLayout = System->GetPrivate()->BuildLayout( model, LocalParams.Get(), at ); if (pLayout) { mu::Ptr constantOp = new ASTOpConstantResource(); constantOp->Type = EOpType::LA_CONSTANT; constantOp->SetValue( pLayout, DiskCacheContext); Result = constantOp; } break; } case EDataType::Bool: { MUTABLE_CPUPROFILER_SCOPE(ConstantBool); bool value = System->GetPrivate()->BuildBool( model, LocalParams.Get(), at ); Result = new ASTOpConstantBool(value); break; } case EDataType::Color: { MUTABLE_CPUPROFILER_SCOPE(ConstantBool); FVector4f ResultColor(0, 0, 0, 0); ResultColor = System->GetPrivate()->BuildColour( model, LocalParams.Get(), at ); { mu::Ptr ConstantOp = new ASTOpConstantColor(); ConstantOp->Value = ResultColor; Result = ConstantOp; } break; } case EDataType::Int: case EDataType::Scalar: case EDataType::String: case EDataType::Projector: // TODO break; default: break; } System->GetPrivate()->EndBuild(); } }; bool ConstantGenerator( const CompilerOptions::Private* InOptions, Ptr& Root, int32 Pass ) { MUTABLE_CPUPROFILER_SCOPE(ConstantGenerator); // don't do this if constant optimization has been disabled, usually for debugging. if (!InOptions->OptimisationOptions.bConstReduction) { return false; } // Gather the roots of all constant operations struct FConstantSubgraph { Ptr Root; UE::Tasks::FTaskEvent CompletedEvent; }; TArray< FConstantSubgraph > ConstantSubgraphs; ConstantSubgraphs.Reserve(256); { MUTABLE_CPUPROFILER_SCOPE(ConstantGenerator_GenerateTasks); ASTOp::Traverse_BottomUp_Unique(Root, [&ConstantSubgraphs, Pass] (Ptr& SubgraphRoot) { EOpType SubgraphType = SubgraphRoot->GetOpType(); bool bGetFromChildren = false; bool bIsConstantSubgraph = true; switch (SubgraphType) { case EOpType::BO_PARAMETER: case EOpType::NU_PARAMETER: case EOpType::SC_PARAMETER: case EOpType::CO_PARAMETER: case EOpType::PR_PARAMETER: case EOpType::IM_PARAMETER: case EOpType::ME_PARAMETER: case EOpType::MA_PARAMETER: bIsConstantSubgraph = false; break; default: // Propagate from children SubgraphRoot->ForEachChild([&bIsConstantSubgraph](ASTChild& c) { if (c) { bIsConstantSubgraph = bIsConstantSubgraph && c->bIsConstantSubgraph; } }); break; } SubgraphRoot->bIsConstantSubgraph = bIsConstantSubgraph; // We avoid generating constants for these operations, to avoid the memory explosion. // TODO: Make compiler options for some of them // TODO: Some of them are worth if the code below them is unique. bool bHasSpecialOpInSubgraph = false; switch (SubgraphType) { case EOpType::IM_BLANKLAYOUT: case EOpType::IM_COMPOSE: case EOpType::ME_MERGE: case EOpType::ME_CLIPWITHMESH: case EOpType::ME_CLIPMORPHPLANE: case EOpType::ME_APPLYPOSE: case EOpType::ME_REMOVEMASK: case EOpType::ME_PREPARELAYOUT: case EOpType::IM_PLAINCOLOUR: bHasSpecialOpInSubgraph = true; break; case EOpType::ME_ADDMETADATA: { const ASTOpMeshAddMetadata* AddMetadata = static_cast(SubgraphRoot.get()); if (AddMetadata->Source && AddMetadata->Source->GetParentCount() == 1) { // We could resolve the constant if there are no other constraints since this op's mesh op is only referenced once. bGetFromChildren = true; } else { // Don't reduce constant to avoid duplicating mesh data. bHasSpecialOpInSubgraph = true; } break; } case EOpType::IM_RASTERMESH: { const ASTOpImageRasterMesh* Raster = static_cast(SubgraphRoot.get()); // If this operation is only rastering the mesh UVs, reduce it to constant. Otherwise avoid reducing it // for the case of a constant projector of a large set of possible images. We don't want to generate all the // projected version of the images beforehand. TODO: Make it a compile-time option? bHasSpecialOpInSubgraph = Raster->image.child().get() != nullptr; break; } case EOpType::LA_FROMMESH: case EOpType::ME_EXTRACTLAYOUTBLOCK: case EOpType::ME_APPLYLAYOUT: { // We want to reduce this type of operation regardless of it having special ops below. bHasSpecialOpInSubgraph = false; break; } case EOpType::ME_REFERENCE: case EOpType::IM_REFERENCE: // If we are in a reference-resolution optimization phase, then the ops are not special. if (Pass < 2) { bHasSpecialOpInSubgraph = true; } else { const ASTOpReferenceResource* Typed = static_cast(SubgraphRoot.get()); bHasSpecialOpInSubgraph = !Typed->bForceLoad; } break; default: bGetFromChildren = true; break; } if (bGetFromChildren) { // Propagate from children SubgraphRoot->ForEachChild([&](ASTChild& c) { if (c) { bHasSpecialOpInSubgraph = bHasSpecialOpInSubgraph || c->bHasSpecialOpInSubgraph; } }); } SubgraphRoot->bHasSpecialOpInSubgraph = bHasSpecialOpInSubgraph; bool bIsDataTypeThanCanTurnIntoConst = false; EDataType DataType = GetOpDataType(SubgraphType); switch (DataType) { case EDataType::Mesh: case EDataType::Image: case EDataType::Layout: case EDataType::Bool: case EDataType::Color: bIsDataTypeThanCanTurnIntoConst = true; break; default: break; } // See if it is worth generating this as constant // --------------------------------------------- bool bWorthGenerating = SubgraphRoot->bIsConstantSubgraph && !SubgraphRoot->bHasSpecialOpInSubgraph && !SubgraphRoot->IsConstantOp() && bIsDataTypeThanCanTurnIntoConst; if (bWorthGenerating) { bool bCanBeGenerated = true; // Check source data incompatiblities: when generating constants don't mix data that has different source descriptors (tags and other properties). if (DataType == EDataType::Image || DataType == EDataType::Mesh) { FSourceDataDescriptor SourceDescriptor = SubgraphRoot->GetSourceDataDescriptor(); if (SourceDescriptor.IsInvalid()) { bCanBeGenerated = false; } } if (bCanBeGenerated) { ConstantSubgraphs.Add({ SubgraphRoot, UE::Tasks::FTaskEvent(TEXT("MutableConstantSubgraph")) }); } } }); } auto GetRequisites = [&ConstantSubgraphs](const Ptr& SubgraphRoot, TArray< UE::Tasks::FTask, TInlineAllocator<8> >& OutRequisites) { MUTABLE_CPUPROFILER_SCOPE(ConstantGenerator_GetRequisites); TArray< Ptr > ScanRoots; ScanRoots.Add(SubgraphRoot); ASTOp::Traverse_TopDown_Unique_Imprecise(ScanRoots, [&SubgraphRoot, &OutRequisites, &ConstantSubgraphs](Ptr& ChildNode) { bool bRecurse = true; // Subgraph root? if (SubgraphRoot == ChildNode) { return bRecurse; } FConstantSubgraph* DependencyFound = ConstantSubgraphs.FindByPredicate([&ChildNode](const FConstantSubgraph& Candidate) { return Candidate.Root == ChildNode; }); if (DependencyFound) { bRecurse = false; OutRequisites.Add(DependencyFound->CompletedEvent); } return bRecurse; }); }; // Launch the tasks. UE::Tasks::FTask LaunchTask = UE::Tasks::Launch(TEXT("ConstantGeneratorLaunchTasks"), [&ConstantSubgraphs, &GetRequisites, Pass, InOptions]() { MUTABLE_CPUPROFILER_SCOPE(ConstantGenerator_LaunchTasks); FImageOperator ImOp = FImageOperator::GetDefault(InOptions->ImageFormatFunc); // Traverse list of constants to generate. It is ordered in a bottom-up way. int32 SubgraphCount = ConstantSubgraphs.Num(); for (int32 OrderIndex = 0; OrderIndex < SubgraphCount; ++OrderIndex) { int32 Index = SubgraphCount - 1 - OrderIndex; Ptr SubgraphRoot = ConstantSubgraphs[Index].Root; UE::Tasks::FTaskEvent& SubgraphCompletionEvent = ConstantSubgraphs[Index].CompletedEvent; bool bIsReference = false; EOpType SubgraphType = SubgraphRoot->GetOpType(); if (SubgraphType == EOpType::IM_REFERENCE || SubgraphType == EOpType::IM_CONSTANT) { uint32 ImageID = 0; if (SubgraphType == EOpType::IM_REFERENCE) { bIsReference = true; const ASTOpReferenceResource* Typed = static_cast(SubgraphRoot.get()); ImageID = Typed->ID; } else if (SubgraphType == EOpType::IM_CONSTANT) { const ASTOpConstantResource* Typed = static_cast(SubgraphRoot.get()); const FImage* Value = static_cast(Typed->GetValue().Get()); bIsReference = Value->IsReference(); if (bIsReference) { ImageID = Value->GetReferencedTexture(); } } // Instead of generating the constant we resolve the reference, which also replaces the ASTOp. if (bIsReference) { TSharedPtr< TSharedPtr > ResolveImage = MakeShared>(); constexpr bool bRunImmediatlyIfPossible = false; UE::Tasks::FTask ReferenceCompletion = InOptions->OptimisationOptions.ReferencedImageResourceProvider(ImageID, ResolveImage, bRunImmediatlyIfPossible); UE::Tasks::FTask CompleteTask = UE::Tasks::Launch(TEXT("MutableResolveComplete"), [SubgraphRoot, InOptions, ResolveImage]() { Ptr ConstantOp; { MUTABLE_CPUPROFILER_SCOPE(MutableResolveComplete_CreateConstant); ConstantOp = new ASTOpConstantResource; ConstantOp->Type = EOpType::IM_CONSTANT; { MUTABLE_CPUPROFILER_SCOPE(GetSourceDataDescriptor); ConstantOp->SourceDataDescriptor = SubgraphRoot->GetSourceDataDescriptor(); } ConstantOp->SetValue(*ResolveImage, InOptions->OptimisationOptions.DiskCacheContext); } { MUTABLE_CPUPROFILER_SCOPE(MutableResolveComplete_Replace); ASTOp::Replace(SubgraphRoot, ConstantOp); } }, ReferenceCompletion, LowLevelTasks::ETaskPriority::BackgroundNormal); SubgraphCompletionEvent.AddPrerequisites(CompleteTask); } } if (SubgraphType == EOpType::ME_REFERENCE || SubgraphType == EOpType::ME_CONSTANT) { uint32 MeshID = 0; FString MeshMorph; if (SubgraphType == EOpType::ME_REFERENCE) { bIsReference = true; const ASTOpReferenceResource* Typed = static_cast(SubgraphRoot.get()); MeshID = Typed->ID; } else if (SubgraphType == EOpType::ME_CONSTANT) { const ASTOpConstantResource* Typed = static_cast(SubgraphRoot.get()); const FMesh* Value = static_cast(Typed->GetValue().Get()); bIsReference = Value->IsReference(); if (bIsReference) { MeshID = Value->GetReferencedMesh(); MeshMorph = Value->GetReferencedMorph(); } } // Instead of generating the constant we resolve the reference, which also replaces the ASTOp. if (bIsReference) { TSharedPtr< TSharedPtr > ResolveMesh = MakeShared>(); constexpr bool bRunImmediatlyIfPossible = false; UE::Tasks::FTask ReferenceCompletion = InOptions->OptimisationOptions.ReferencedMeshResourceProvider(MeshID, MeshMorph, ResolveMesh, bRunImmediatlyIfPossible); UE::Tasks::FTask CompleteTask = UE::Tasks::Launch(TEXT("MutableResolveComplete"), [SubgraphRoot, InOptions, ResolveMesh]() { Ptr ConstantOp; { MUTABLE_CPUPROFILER_SCOPE(MutableResolveComplete_CreateConstant); ConstantOp = new ASTOpConstantResource; ConstantOp->Type = EOpType::ME_CONSTANT; { MUTABLE_CPUPROFILER_SCOPE(GetSourceDataDescriptor); ConstantOp->SourceDataDescriptor = SubgraphRoot->GetSourceDataDescriptor(); } ConstantOp->SetValue(*ResolveMesh, InOptions->OptimisationOptions.DiskCacheContext); } { MUTABLE_CPUPROFILER_SCOPE(MutableResolveComplete_Replace); ASTOp::Replace(SubgraphRoot, ConstantOp); } }, ReferenceCompletion, LowLevelTasks::ETaskPriority::BackgroundNormal); SubgraphCompletionEvent.AddPrerequisites(CompleteTask); } } if (!bIsReference) { // Scan for requisites TArray< UE::Tasks::FTask, TInlineAllocator<8> > Requisites; GetRequisites(SubgraphRoot, Requisites); TUniquePtr Task(new FConstantTask(SubgraphRoot, InOptions, Pass)); // Launch the preparation on the AST-modification pipe UE::Tasks::FTask CompleteTask = UE::Tasks::Launch(TEXT("MutableConstant"), [TaskPtr = MoveTemp(Task), ImOp]() { MUTABLE_CPUPROFILER_SCOPE(MutableConstantPrepare); // We need the clone because linking modifies ASTOp state and also to be safe for concurrency. TaskPtr->SourceCloned = ASTOp::DeepClone(TaskPtr->Source); TaskPtr->Run(ImOp); ASTOp::Replace(TaskPtr->Source, TaskPtr->Result); TaskPtr->Result = nullptr; TaskPtr->Source = nullptr; }, Requisites, LowLevelTasks::ETaskPriority::BackgroundHigh); SubgraphCompletionEvent.AddPrerequisites(CompleteTask); } ConstantSubgraphs[Index].Root = nullptr; SubgraphCompletionEvent.Trigger(); UE::Tasks::AddNested(SubgraphCompletionEvent); } }); // Wait for pending tasks { MUTABLE_CPUPROFILER_SCOPE(Waiting); LaunchTask.Wait(); } bool bSomethingModified = ConstantSubgraphs.Num() > 0; return bSomethingModified; } CodeOptimiser::CodeOptimiser(Ptr InOptions, TArray& InStates ) : States( InStates ) { Options = InOptions; } void CodeOptimiser::FullOptimiseAST( ASTOpList& roots, int32 Pass ) { bool bModified = true; int32 NumIterations = 0; while (bModified && (OptimizeIterationsLeft>0 || !NumIterations)) { bool bModifiedInInnerLoop = true; while (bModifiedInInnerLoop && (OptimizeIterationsLeft>0 || !NumIterations)) { --OptimizeIterationsLeft; ++NumIterations; UE_LOG(LogMutableCore, Verbose, TEXT("Main optimise iteration %d, left %d"), NumIterations, OptimizeIterationsLeft); bModifiedInInnerLoop = false; // All kind of optimisations that depend on the meaning of each operation // \TODO: We are doing it for all states. UE_LOG(LogMutableCore, Verbose, TEXT(" - semantic optimiser")); bModifiedInInnerLoop |= SemanticOptimiserAST(roots, Options->GetPrivate()->OptimisationOptions, Pass); ASTOp::LogHistogram(roots); UE_LOG(LogMutableCore, Verbose, TEXT(" - sink optimiser")); bModifiedInInnerLoop |= SinkOptimiserAST(roots, Options->GetPrivate()->OptimisationOptions); ASTOp::LogHistogram(roots); // Image size operations are treated separately UE_LOG(LogMutableCore, Verbose, TEXT(" - size optimiser")); bModifiedInInnerLoop |= SizeOptimiserAST(roots); } bModified = bModifiedInInnerLoop; UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated code remover")); bModified |= DuplicatedCodeRemoverAST(roots); //UE_LOG(LogMutableCore, Verbose, TEXT("(int) %s : %ld"), TEXT("ast size"), int64(ASTOp::CountNodes(roots))); ASTOp::LogHistogram(roots); UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated data remover")); bModified |= DuplicatedDataRemoverAST(roots); //UE_LOG(LogMutableCore, Verbose, TEXT("(int) %s : %ld"), TEXT("ast size"), int64(ASTOp::CountNodes(roots))); ASTOp::LogHistogram(roots); // Generate constants bool bModifiedInConstants = false; for (Ptr& Root : roots) { //UE_LOG(LogMutableCore, Verbose, TEXT("(int) %s : %ld"), TEXT("ast size"), int64(ASTOp::CountNodes(roots))); UE_LOG(LogMutableCore, Verbose, TEXT(" - constant generator")); // Constant subtree generation bModifiedInConstants |= ConstantGenerator(Options->GetPrivate(), Root, Pass); } ASTOp::LogHistogram(roots); if (bModifiedInConstants) { bModified = true; UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated data remover")); DuplicatedDataRemoverAST(roots); } //if (!bModified) { UE_LOG(LogMutableCore, Verbose, TEXT(" - logic optimiser")); bModified |= LocalLogicOptimiserAST(roots); } ASTOp::LogHistogram(roots); } } // The state represents if there is a parent operation requiring skeleton for current mesh subtree. class CollectAllMeshesForSkeletonVisitorAST : public Visitor_TopDown_Unique_Const { public: CollectAllMeshesForSkeletonVisitorAST( const ASTOpList& roots ) { Traverse( roots, false ); } // List of meshes that require a skeleton TArray> MeshesRequiringSkeleton; private: // Visitor_TopDown_Unique_Const interface bool Visit( const mu::Ptr& node ) override { // \todo: refine to avoid instruction branches with irrelevant skeletons. uint8_t currentProtected = GetCurrentState(); switch (node->GetOpType()) { case EOpType::ME_CONSTANT: { mu::Ptr typedOp = static_cast(node.get()); if (currentProtected) { MeshesRequiringSkeleton.AddUnique(typedOp); } return false; } case EOpType::ME_CLIPMORPHPLANE: { ASTOpMeshClipMorphPlane* typedOp = static_cast(node.get()); if (typedOp->VertexSelectionType == EClipVertexSelectionType::BoneHierarchy) { // We need the skeleton for the source mesh RecurseWithState( typedOp->Source.child(), true ); return false; } return true; } case EOpType::ME_APPLYPOSE: { ASTOpMeshApplyPose* typedOp = static_cast(node.get()); // We need the skeleton for both meshes RecurseWithState(typedOp->Base.child(), true); RecurseWithState(typedOp->Pose.child(), true); return false; } case EOpType::ME_BINDSHAPE: { ASTOpMeshBindShape* typedOp = static_cast(node.get()); if (typedOp->bReshapeSkeleton) { RecurseWithState(typedOp->Mesh.child(), true); return false; } break; } case EOpType::ME_APPLYSHAPE: { ASTOpMeshApplyShape* typedOp = static_cast(node.get()); if (typedOp->bReshapeSkeleton) { RecurseWithState(typedOp->Mesh.child(), true); return false; } break; } default: break; } return true; } }; // This stores an ADD_MESH op with the child meshes collected and the final skeleton to use // for this op. struct FAddMeshSkeleton { mu::Ptr AddMeshOp; TArray> ContributingMeshes; TSharedPtr FinalSkeleton; FAddMeshSkeleton( const mu::Ptr& InAddMeshOp, TArray>& InContributingMeshes, const TSharedPtr& InFinalSkeleton ) { AddMeshOp = InAddMeshOp; ContributingMeshes = MoveTemp(InContributingMeshes); FinalSkeleton = InFinalSkeleton; } }; void SkeletonCleanerAST( TArray>& roots, const FModelOptimizationOptions& options ) { // This collects all the meshes that require a skeleton because they are used in operations // that require it. CollectAllMeshesForSkeletonVisitorAST requireSkeletonCollector( roots ); TArray replacementsFound; ASTOp::Traverse_TopDown_Unique_Imprecise( roots, [&](mu::Ptr& at ) { // Only recurse instance construction ops. bool processChildren = GetOpDataType(at->GetOpType())== EDataType::Instance; if ( at->GetOpType() == EOpType::IN_ADDMESH ) { ASTOpInstanceAdd* typedNode = static_cast(at.get()); mu::Ptr meshRoot = typedNode->value.child(); if (meshRoot) { // Gather constant meshes contributing to the final mesh TArray> subtreeMeshes; TArray> tempRoots; tempRoots.Add(meshRoot); ASTOp::Traverse_TopDown_Unique_Imprecise( tempRoots, [&](mu::Ptr& lat ) { // \todo: refine to avoid instruction branches with irrelevant skeletons. if ( lat->GetOpType() == EOpType::ME_CONSTANT ) { mu::Ptr typedOp = static_cast(lat.get()); if ( subtreeMeshes.Find(typedOp) == INDEX_NONE ) { subtreeMeshes.Add(typedOp); } } return true; }); // Create a mesh just with the unified skeleton TSharedPtr FinalSkeleton = MakeShared(); for (const auto& meshAt: subtreeMeshes) { TSharedPtr pMesh = StaticCastSharedPtr(meshAt->GetValue()); TSharedPtr SourceSkeleton = pMesh ? pMesh->GetSkeleton() : nullptr; if (SourceSkeleton) { ExtendSkeleton(FinalSkeleton.Get(),SourceSkeleton.Get()); } } replacementsFound.Emplace( at, subtreeMeshes, FinalSkeleton ); } } return processChildren; }); // Iterate all meshes again ASTOp::Traverse_TopDown_Unique_Imprecise( roots, [&](mu::Ptr& at ) { if (at->GetOpType()==EOpType::ME_CONSTANT) { ASTOpConstantResource* typedOp = static_cast(at.get()); for(FAddMeshSkeleton& Rep: replacementsFound) { if (Rep.ContributingMeshes.Contains(at)) { TSharedPtr pMesh = StaticCastSharedPtr(typedOp->GetValue()); pMesh->CheckIntegrity(); TSharedPtr NewMesh = MakeShared(); bool bOutSuccess = false; MeshRemapSkeleton(NewMesh.Get(), pMesh.Get(), Rep.FinalSkeleton, bOutSuccess); if (bOutSuccess) { NewMesh->CheckIntegrity(); mu::Ptr newOp = new ASTOpConstantResource(); newOp->Type = EOpType::ME_CONSTANT; newOp->SetValue(NewMesh, options.DiskCacheContext); newOp->SourceDataDescriptor = at->GetSourceDataDescriptor(); ASTOp::Replace(at, newOp); } } } } return true; }); } void CodeOptimiser::Optimise() { MUTABLE_CPUPROFILER_SCOPE(Optimise); // Gather all the roots (one for each state) TArray> roots; for(const FStateCompilationData& s:States) { roots.Add(s.root); } //UE_LOG(LogMutableCore, Verbose, TEXT("(int) %s : %ld"), TEXT("ast size"), int64(ASTOp::CountNodes(roots))); if ( Options->GetPrivate()->OptimisationOptions.bEnabled ) { // We use 4 times the count because at the time we moved to sharing this count it // was being used 4 times, and we want to keep the tests consistent. int32 MaxIterations = Options->GetPrivate()->OptimisationOptions.MaxOptimisationLoopCount; OptimizeIterationsLeft = MaxIterations ? MaxIterations * 4 : TNumericLimits::Max(); // The first duplicated data remover has the special mission of removing // duplicated data (meshes) that may have been specified in the source // data, before we make it diverge because of different uses, like layout // creation UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated data remover")); DuplicatedDataRemoverAST( roots ); ASTOp::LogHistogram(roots); UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated code remover")); DuplicatedCodeRemoverAST( roots ); // Special optimization stages if ( Options->GetPrivate()->OptimisationOptions.bUniformizeSkeleton ) { UE_LOG(LogMutableCore, Verbose, TEXT(" - skeleton cleaner")); ASTOp::LogHistogram(roots); SkeletonCleanerAST( roots, Options->GetPrivate()->OptimisationOptions ); ASTOp::LogHistogram(roots); } // First optimisation stage. It tries to resolve all the image sizes. This is necessary // because some operations cannot be applied correctly until the image size is known // like the grow-map generation. bool bModified = true; int32 NumIterations = 0; while (bModified) { MUTABLE_CPUPROFILER_SCOPE(FirstStage); --OptimizeIterationsLeft; ++NumIterations; UE_LOG(LogMutableCore, Verbose, TEXT("First optimise iteration %d, left %d"), NumIterations, OptimizeIterationsLeft); bModified = false; UE_LOG(LogMutableCore, Verbose, TEXT(" - size optimiser")); bModified |= SizeOptimiserAST( roots ); } // Main optimisation stage { MUTABLE_CPUPROFILER_SCOPE(MainStage); FullOptimiseAST( roots, 0 ); FullOptimiseAST( roots, 1 ); } // Constant resolution stage: resolve referenced assets. { MUTABLE_CPUPROFILER_SCOPE(ReferenceResolution); constexpr int32 Pass = 2; //FullOptimiseAST(roots, 2); // Generate constants for (Ptr& Root : roots) { // Constant subtree generation bModified = ConstantGenerator(Options->GetPrivate(), Root, Pass); } DuplicatedDataRemoverAST(roots); } // Main optimisation stage again for data-aware optimizations { MUTABLE_CPUPROFILER_SCOPE(FinalStage); FullOptimiseAST(roots, 0); ASTOp::LogHistogram(roots); FullOptimiseAST(roots, 1); ASTOp::LogHistogram(roots); } // Analyse mesh constants to see which of them are in optimised mesh formats, and set the flags. ASTOp::Traverse_BottomUp_Unique_NonReentrant( roots, [&](Ptr& n) { if (n->GetOpType()==EOpType::ME_CONSTANT) { ASTOpConstantResource* typed = static_cast(n.get()); auto pMesh = StaticCastSharedPtr(typed->GetValue()); pMesh->ResetStaticFormatFlags(); typed->SetValue( pMesh, Options->GetPrivate()->OptimisationOptions.DiskCacheContext); } }); ASTOp::LogHistogram(roots); // Reset the state root operations in case they have changed due to optimization for (int32 RootIndex = 0; RootIndex < States.Num(); ++RootIndex) { States[RootIndex].root = roots[RootIndex]; } { MUTABLE_CPUPROFILER_SCOPE(StatesStage); // Optimise for every state OptimiseStatesAST( ); // Optimise the data formats (TODO) //OperationFlagGenerator flagGen( pResult.get() ); } ASTOp::LogHistogram(roots); } // Minimal optimisation of constant subtrees else if ( Options->GetPrivate()->OptimisationOptions.bConstReduction ) { // The first duplicated data remover has the special mission of removing // duplicated data (meshes) that may have been specified in the source // data, before we make it diverge because of different uses, like layout // creation UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated data remover")); DuplicatedDataRemoverAST( roots ); UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated code remover")); DuplicatedCodeRemoverAST( roots ); // Constant resolution stage: resolve referenced assets. { MUTABLE_CPUPROFILER_SCOPE(ReferenceResolution); FullOptimiseAST(roots, 2); } for ( int32 StateIndex=0; StateIndex GetPrivate(), roots[StateIndex], Pass); } UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated data remover")); DuplicatedDataRemoverAST( roots ); UE_LOG(LogMutableCore, Verbose, TEXT(" - duplicated code remover")); DuplicatedCodeRemoverAST( roots ); // Reset the state root operations in case they have changed due to optimization for (int32 RootIndex = 0; RootIndex < States.Num(); ++RootIndex) { States[RootIndex].root = roots[RootIndex]; } } ASTOp::LogHistogram(roots); } }