// Copyright Epic Games, Inc. All Rights Reserved. #include "NaniteSkinningSceneExtension.h" #include "ViewDefinitions.h" #include "ScenePrivate.h" #include "RenderUtils.h" #include "SkeletalRenderPublic.h" #include "SkinningDefinitions.h" #include "ViewData.h" #include "SceneCulling/SceneCullingRenderer.h" // TODO: these are prototype macros for how we might expose SceneUB for direct binding. // If this becomes the way we want to expose this, then we should move this to shared headers. // There's still some machinery we _could_ add to make it work nicely as an API, e.g., interface to get the associated sub-UB & register a provider (or something). #define IMPLEMENT_STATIC_UNIFORM_BUFFER_SCENE_UB(StructType, MangledName) \ IMPLEMENT_STATIC_UNIFORM_BUFFER_SLOT(MangledName) \ IMPLEMENT_STATIC_UNIFORM_BUFFER_STRUCT(StructType, #MangledName, MangledName); /** * Implement a Scene UB sub-struct _with_ a global UB definition for binding stand-alone. */ #define IMPLEMENT_SCENE_UB_STRUCT_EX(StructType, FieldName, DefaultValueFactoryType) \ TSceneUniformBufferMemberRegistration SceneUB::FieldName { TEXT(#FieldName), DefaultValueFactoryType }; \ IMPLEMENT_STATIC_UNIFORM_BUFFER_SCENE_UB(StructType, SceneUbEx##FieldName) static TAutoConsoleVariable CVarNaniteTransformDataBufferMinSizeBytes( TEXT("r.Nanite.SkinningBuffers.TransformDataMinSizeBytes"), 4 * 1024, TEXT("The smallest size (in bytes) of the Nanite bone transform data buffer."), ECVF_ReadOnly | ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarNanitePrimitiveSkinningDataBufferMinSizeBytes( TEXT("r.Nanite.SkinningBuffers.HeaderDataMinSizeBytes"), 4 * 1024, TEXT("The smallest size (in bytes) of the Nanite per-primitive skinning header data buffer."), ECVF_ReadOnly | ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarNaniteTransformBufferAsyncUpdates( TEXT("r.Nanite.SkinningBuffers.AsyncUpdates"), true, TEXT("When non-zero, Nanite transform data buffer updates are updated asynchronously."), ECVF_RenderThreadSafe ); static int32 GNaniteTransformBufferForceFullUpload = 0; static FAutoConsoleVariableRef CVarNaniteTransformBufferForceFullUpload( TEXT("r.Nanite.SkinningBuffers.ForceFullUpload"), GNaniteTransformBufferForceFullUpload, TEXT("0: Do not force a full upload.\n") TEXT("1: Force one full upload on the next update.\n") TEXT("2: Force a full upload every frame."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarNaniteTransformBufferDefrag( TEXT("r.Nanite.SkinningBuffers.Defrag"), true, TEXT("Whether or not to allow defragmentation of the Nanite skinning buffers."), ECVF_RenderThreadSafe ); static int32 GNaniteTransformBufferForceDefrag = 0; static FAutoConsoleVariableRef CVarNaniteTransformBufferDefragForce( TEXT("r.Nanite.SkinningBuffers.Defrag.Force"), GNaniteTransformBufferForceDefrag, TEXT("0: Do not force a full defrag.\n") TEXT("1: Force one full defrag on the next update.\n") TEXT("2: Force a full defrag every frame."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarNaniteTransformBufferDefragLowWaterMark( TEXT("r.Nanite.SkinningBuffers.Defrag.LowWaterMark"), 0.375f, TEXT("Ratio of used to allocated memory at which to decide to defrag the Nanite skinning buffers."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarSkinningTransformProviders( TEXT("r.Skinning.TransformProviders"), true, TEXT("When set, transform providers are enabled (if registered)."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarNaniteSkinningDefaultAnimationMinScreenSize( TEXT("r.Nanite.Skinning.DefaultAnimationMinScreenSize"), 0.1f, TEXT("Default animation screen size to stop animating at, applies when the per-component value is 0.0."), ECVF_RenderThreadSafe ); BEGIN_UNIFORM_BUFFER_STRUCT(FNaniteSkinningParameters, RENDERER_API) SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, SkinningHeaders) SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, BoneHierarchy) SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, BoneObjectSpace) SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, BoneTransforms) END_UNIFORM_BUFFER_STRUCT() DECLARE_SCENE_UB_STRUCT(FNaniteSkinningParameters, NaniteSkinning, RENDERER_API) // Reference pose transform provider struct FTransformBlockHeader { uint32 BlockLocalIndex; uint32 BlockTransformCount; uint32 BlockTransformOffset; }; class FRefPoseTransformProviderCS : public FGlobalShader { public: static constexpr uint32 TransformsPerGroup = 64u; private: DECLARE_GLOBAL_SHADER(FRefPoseTransformProviderCS); SHADER_USE_PARAMETER_STRUCT(FRefPoseTransformProviderCS, FGlobalShader); BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene) SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, TransformBuffer) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, HeaderBuffer) END_SHADER_PARAMETER_STRUCT() static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return DoesPlatformSupportNanite(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); OutEnvironment.CompilerFlags.Add(CFLAG_WarningsAsErrors); OutEnvironment.CompilerFlags.Add(CFLAG_HLSL2021); OutEnvironment.SetDefine(TEXT("TRANSFORMS_PER_GROUP"), TransformsPerGroup); } }; IMPLEMENT_GLOBAL_SHADER(FRefPoseTransformProviderCS, "/Engine/Private/Skinning/TransformProviders.usf", "RefPoseProviderCS", SF_Compute); static FGuid RefPoseProviderId(REF_POSE_TRANSFORM_PROVIDER_GUID); static FGuid AnimRuntimeProviderId(ANIM_RUNTIME_TRANSFORM_PROVIDER_GUID); namespace Nanite { static void GetDefaultSkinningParameters(FNaniteSkinningParameters& OutParameters, FRDGBuilder& GraphBuilder) { auto DefaultBuffer = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultByteAddressBuffer(GraphBuilder, 4u)); OutParameters.SkinningHeaders = DefaultBuffer; OutParameters.BoneHierarchy = DefaultBuffer; OutParameters.BoneObjectSpace = DefaultBuffer; OutParameters.BoneTransforms = DefaultBuffer; } IMPLEMENT_SCENE_EXTENSION(FSkinningSceneExtension); bool FSkinningSceneExtension::ShouldCreateExtension(FScene& InScene) { return NaniteSkinnedMeshesSupported() && DoesRuntimeSupportNanite(GetFeatureLevelShaderPlatform(InScene.GetFeatureLevel()), true, true); } FSkinningSceneExtension::FSkinningSceneExtension(FScene& InScene) : ISceneExtension(InScene) { UpdateTimerHandle = FTSTicker::GetCoreTicker().AddTicker(FTickerDelegate::CreateRaw(this, &FSkinningSceneExtension::Tick)); } FSkinningSceneExtension::~FSkinningSceneExtension() { FTSTicker::GetCoreTicker().RemoveTicker(UpdateTimerHandle); } void FSkinningSceneExtension::InitExtension(FScene& InScene) { // Determine if we want to be initially enabled or disabled const bool bNaniteEnabled = UseNanite(GetFeatureLevelShaderPlatform(InScene.GetFeatureLevel())); SetEnabled(bNaniteEnabled); // Register animation runtime and reference pose transform providers if (auto TransformProvider = Scene.GetExtensionPtr()) { TransformProvider->RegisterProvider( GetRefPoseProviderId(), FSkinningTransformProvider::FOnProvideTransforms::CreateStatic(&FSkinningSceneExtension::ProvideRefPoseTransforms) ); TransformProvider->RegisterProvider( GetAnimRuntimeProviderId(), FSkinningTransformProvider::FOnProvideTransforms::CreateStatic(&FSkinningSceneExtension::ProvideAnimRuntimeTransforms) ); } } ISceneExtensionUpdater* FSkinningSceneExtension::CreateUpdater() { return new FUpdater(*this); } ISceneExtensionRenderer* FSkinningSceneExtension::CreateRenderer(FSceneRendererBase& InSceneRenderer, const FEngineShowFlags& EngineShowFlags) { // We only need to create renderers when we're enabled if (!IsEnabled()) { return nullptr; } return new FRenderer(InSceneRenderer, *this); } void FSkinningSceneExtension::SetEnabled(bool bEnabled) { if (bEnabled != IsEnabled()) { if (bEnabled) { Buffers = MakeUnique(); } else { Buffers = nullptr; HierarchyAllocator.Reset(); TransformAllocator.Reset(); HeaderData.Reset(); } } } void FSkinningSceneExtension::FinishSkinningBufferUpload( FRDGBuilder& GraphBuilder, FNaniteSkinningParameters* OutParams ) { if (!IsEnabled()) { return; } FRDGBufferRef HeaderBuffer = nullptr; FRDGBufferRef BoneHierarchyBuffer = nullptr; FRDGBufferRef BoneObjectSpaceBuffer = nullptr; FRDGBufferRef TransformBuffer = nullptr; // Sync on upload tasks UE::Tasks::Wait( MakeArrayView( { TaskHandles[UploadHeaderDataTask], TaskHandles[UploadHierarchyDataTask], TaskHandles[UploadTransformDataTask] } ) ); const uint32 MinHeaderDataSize = (HeaderData.GetMaxIndex() + 1); const uint32 MinTransformDataSize = TransformAllocator.GetMaxSize(); const uint32 MinHierarchyDataSize = HierarchyAllocator.GetMaxSize(); const uint32 MinObjectSpaceDataSize = ObjectSpaceAllocator.GetMaxSize(); RDG_GPU_MASK_SCOPE(GraphBuilder, FRHIGPUMask::All()); if (Uploader.IsValid()) { HeaderBuffer = Uploader->HeaderDataUploader.ResizeAndUploadTo( GraphBuilder, Buffers->HeaderDataBuffer, MinHeaderDataSize ); BoneHierarchyBuffer = Uploader->BoneHierarchyUploader.ResizeAndUploadTo( GraphBuilder, Buffers->BoneHierarchyBuffer, MinHierarchyDataSize ); BoneObjectSpaceBuffer = Uploader->BoneObjectSpaceUploader.ResizeAndUploadTo( GraphBuilder, Buffers->BoneObjectSpaceBuffer, MinObjectSpaceDataSize ); TransformBuffer = Uploader->TransformDataUploader.ResizeAndUploadTo( GraphBuilder, Buffers->TransformDataBuffer, MinTransformDataSize ); Uploader = nullptr; } else { HeaderBuffer = Buffers->HeaderDataBuffer.ResizeBufferIfNeeded(GraphBuilder, MinHeaderDataSize); BoneHierarchyBuffer = Buffers->BoneHierarchyBuffer.ResizeBufferIfNeeded(GraphBuilder, MinHierarchyDataSize); BoneObjectSpaceBuffer = Buffers->BoneObjectSpaceBuffer.ResizeBufferIfNeeded(GraphBuilder, MinObjectSpaceDataSize); TransformBuffer = Buffers->TransformDataBuffer.ResizeBufferIfNeeded(GraphBuilder, MinTransformDataSize); } if (OutParams != nullptr) { OutParams->SkinningHeaders = GraphBuilder.CreateSRV(HeaderBuffer); OutParams->BoneHierarchy = GraphBuilder.CreateSRV(BoneHierarchyBuffer); OutParams->BoneObjectSpace = GraphBuilder.CreateSRV(BoneObjectSpaceBuffer); OutParams->BoneTransforms = GraphBuilder.CreateSRV(TransformBuffer); } } void FSkinningSceneExtension::PerformSkinning( FNaniteSkinningParameters& Parameters, FRDGBuilder& GraphBuilder ) { RDG_EVENT_SCOPE(GraphBuilder, "NaniteSkinning"); const float CurrentDeltaTime = TickState->DeltaTime; TickState->DeltaTime = 0.0f; if (auto TransformProvider = Scene.GetExtensionPtr()) { if (HeaderData.Num() > 0 && CVarSkinningTransformProviders.GetValueOnRenderThread()) { FPrimitiveSceneInfo** Primitives = GraphBuilder.AllocPODArray(HeaderData.Num()); uint32* TransformOffsets = GraphBuilder.AllocPODArray(HeaderData.Num()); TArray PrimitivesToRangeIndex; PrimitivesToRangeIndex.AddUninitialized(HeaderData.Num()); uint32 TotalOffset = 0; // TODO: Optimize further (incremental tracking of primitives within provider extension?) // The current assumption is that skinned primitive counts should be fairly low, and heavy // instancing would be used. If we need a ton of primitives, revisit this algorithm. const TArray ProviderIds = TransformProvider->GetProviderIds(); checkf(ProviderIds.Num() < 256, TEXT("The number of provider ids exceeds storage capacity for PrimitivesToRangeIndex.")); TArray> Ranges; Ranges.Reserve(ProviderIds.Num()); for (const FGuid& ProviderId : ProviderIds) { FSkinningTransformProvider::FProviderRange& Range = Ranges.Emplace_GetRef(); Range.Id = ProviderId; Range.Count = 0; Range.Offset = 0; } uint32 PrimitiveCount = 0; for (typename TSparseArray::TConstIterator It(HeaderData); It; ++It) { const FHeaderData& Header = *It; int32 RangeIndex = 0; for (; RangeIndex < Ranges.Num(); ++RangeIndex) { FSkinningTransformProvider::FProviderRange& Range = Ranges[RangeIndex]; if (Header.ProviderId == Range.Id) { ++Range.Count; break; } } check(RangeIndex != Ranges.Num()); PrimitivesToRangeIndex[PrimitiveCount] = RangeIndex; Primitives[PrimitiveCount] = Header.PrimitiveSceneInfo; TransformOffsets[PrimitiveCount] = Header.TransformBufferOffset; ++PrimitiveCount; } uint32 IndirectionCount = 0; for (FSkinningTransformProvider::FProviderRange& Range : Ranges) { Range.Offset = IndirectionCount; IndirectionCount += Range.Count; Range.Count = 0; } FUintVector2* PrimitiveIndices = GraphBuilder.AllocPODArray(IndirectionCount); for (uint32 PrimitiveIndex = 0; PrimitiveIndex < PrimitiveCount; ++PrimitiveIndex) { FSkinningTransformProvider::FProviderRange& Range = Ranges[PrimitivesToRangeIndex[PrimitiveIndex]]; PrimitiveIndices[Range.Offset + Range.Count] = FUintVector2(PrimitiveIndex, TransformOffsets[PrimitiveIndex] * sizeof(FCompressedBoneTransform)); ++Range.Count; } TConstArrayView PrimitivesView(Primitives, PrimitiveCount); TConstArrayView IndiciesView(PrimitiveIndices, IndirectionCount); FSkinningTransformProvider::FProviderContext Context( PrimitivesView, IndiciesView, CurrentDeltaTime, GraphBuilder, Parameters.BoneTransforms->GetParent() ); TransformProvider->Broadcast(Ranges, Context); } } } bool FSkinningSceneExtension::ProcessBufferDefragmentation() { // Consolidate spans ObjectSpaceAllocator.Consolidate(); HierarchyAllocator.Consolidate(); TransformAllocator.Consolidate(); // Decide to defragment the buffer when the used size dips below a certain multiple of the max used size. // Since the buffer allocates in powers of two, we pick the mid point between 1/4 and 1/2 in hopes to prevent // thrashing when usage is close to a power of 2. // // NOTES: // * We only currently use the state of the transform buffer's fragmentation to decide to defrag all buffers // * Rather than trying to minimize number of moves/uploads, we just realloc and re-upload everything. This // could be implemented in a more efficient manner if the current method proves expensive. const bool bAllowDefrag = CVarNaniteTransformBufferDefrag.GetValueOnRenderThread(); static const int32 MinTransformBufferCount = CVarNaniteTransformDataBufferMinSizeBytes.GetValueOnRenderThread() / sizeof(FCompressedBoneTransform); const float LowWaterMarkRatio = CVarNaniteTransformBufferDefragLowWaterMark.GetValueOnRenderThread(); const int32 EffectiveMaxSize = FMath::RoundUpToPowerOfTwo(TransformAllocator.GetMaxSize()); const int32 LowWaterMark = uint32(EffectiveMaxSize * LowWaterMarkRatio); const int32 UsedSize = TransformAllocator.GetSparselyAllocatedSize(); if (!bAllowDefrag) { return false; } // Check to force a defrag const bool bForceDefrag = GNaniteTransformBufferForceDefrag != 0; if (GNaniteTransformBufferForceDefrag == 1) { GNaniteTransformBufferForceDefrag = 0; } if (!bForceDefrag && (EffectiveMaxSize <= MinTransformBufferCount || UsedSize > LowWaterMark)) { // No need to defragment return false; } ObjectSpaceAllocator.Reset(); HierarchyAllocator.Reset(); TransformAllocator.Reset(); for (auto& Data : HeaderData) { if (Data.TransformBufferOffset != INDEX_NONE) { Data.TransformBufferOffset = INDEX_NONE; Data.TransformBufferCount = 0; } if (Data.HierarchyBufferOffset != INDEX_NONE) { Data.HierarchyBufferOffset = INDEX_NONE; Data.HierarchyBufferCount = 0; } if (Data.ObjectSpaceBufferOffset != INDEX_NONE) { Data.ObjectSpaceBufferOffset = INDEX_NONE; Data.ObjectSpaceBufferCount = 0; } } return true; } bool FSkinningSceneExtension::Tick(float InDeltaTime) { TRACE_CPUPROFILER_EVENT_SCOPE(FSkinningSceneExtension::Tick); FVector NewCameraLocation = FVector::ZeroVector; if (UWorld* World = GetWorld()) { if (auto PlayerController = World->GetFirstPlayerController()) { FRotator CameraRotation; PlayerController->GetPlayerViewPoint(NewCameraLocation, CameraRotation); } else { FVector LocationSum = FVector::Zero(); if (World->ViewLocationsRenderedLastFrame.Num() > 0) { for (const auto& Location : World->ViewLocationsRenderedLastFrame) { LocationSum += Location; } NewCameraLocation = LocationSum / World->ViewLocationsRenderedLastFrame.Num(); } } } // Takes a reference to keep the timer around since the update happens on the GT timeline. ENQUEUE_RENDER_COMMAND(FTickSkinningSceneExtension) ([TickState = TickState, InDeltaTime, NewCameraLocation](FRHICommandListImmediate& RHICmdList) { TickState->DeltaTime += InDeltaTime; TickState->CameraLocation = NewCameraLocation; }); return true; } UWorld* FSkinningSceneExtension::GetWorld() const { return Scene.GetWorld(); } void FSkinningSceneExtension::WaitForHeaderDataUpdateTasks() const { UE::Tasks::Wait(MakeArrayView( { TaskHandles[FreeBufferSpaceTask], TaskHandles[InitHeaderDataTask] } )); } FSkinningSceneExtension::FBuffers::FBuffers() : HeaderDataBuffer(CVarNanitePrimitiveSkinningDataBufferMinSizeBytes.GetValueOnAnyThread() >> 2u, TEXT("Nanite.SkinningHeaders")) , BoneHierarchyBuffer(CVarNaniteTransformDataBufferMinSizeBytes.GetValueOnAnyThread() >> 2u, TEXT("Nanite.BoneHierarchy")) , BoneObjectSpaceBuffer(CVarNaniteTransformDataBufferMinSizeBytes.GetValueOnAnyThread() >> 2u, TEXT("Nanite.BoneObjectSpace")) , TransformDataBuffer(CVarNaniteTransformDataBufferMinSizeBytes.GetValueOnAnyThread() >> 2u, TEXT("Nanite.BoneTransforms")) { } FSkinningSceneExtension::FUpdater::FUpdater(FSkinningSceneExtension& InSceneData) : SceneData(&InSceneData) , bEnableAsync(CVarNaniteTransformBufferAsyncUpdates.GetValueOnRenderThread()) { } void FSkinningSceneExtension::FUpdater::End() { // Ensure these tasks finish before we fall out of scope. // NOTE: This should be unnecessary if the updater shares the graph builder's lifetime but we don't enforce that SceneData->SyncAllTasks(); } void FSkinningSceneExtension::FUpdater::PreSceneUpdate(FRDGBuilder& GraphBuilder, const FScenePreUpdateChangeSet& ChangeSet, FSceneUniformBuffer& SceneUniforms) { // If there was a pending upload from a prior update (due to the buffer never being used), finish the upload now. // This keeps the upload entries from growing unbounded and prevents any undefined behavior caused by any // updates that overlap primitives. SceneData->FinishSkinningBufferUpload(GraphBuilder); // Update whether or not we are enabled based on in Nanite is enabled const bool bNaniteEnabled = UseNanite(GetFeatureLevelShaderPlatform(SceneData->Scene.GetFeatureLevel())); SceneData->SetEnabled(bNaniteEnabled); if (!SceneData->IsEnabled()) { return; } SceneData->TaskHandles[FreeBufferSpaceTask] = GraphBuilder.AddSetupTask( [this, RemovedList = ChangeSet.RemovedPrimitiveIds] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::FreeBufferSpace); // Remove and free transform data for removed primitives // NOTE: Using the ID list instead of the primitive list since we're in an async task for (const auto& PersistentIndex : RemovedList) { if (SceneData->HeaderData.IsValidIndex(PersistentIndex.Index)) { FSkinningSceneExtension::FHeaderData& Data = SceneData->HeaderData[PersistentIndex.Index]; if (Data.ObjectSpaceBufferOffset != INDEX_NONE) { SceneData->ObjectSpaceAllocator.Free(Data.ObjectSpaceBufferOffset, Data.ObjectSpaceBufferCount); } if (Data.HierarchyBufferOffset != INDEX_NONE) { SceneData->HierarchyAllocator.Free(Data.HierarchyBufferOffset, Data.HierarchyBufferCount); } if (Data.TransformBufferOffset != INDEX_NONE) { SceneData->TransformAllocator.Free(Data.TransformBufferOffset, Data.TransformBufferCount); } SceneData->HeaderData.RemoveAt(PersistentIndex.Index); } } // Check to force a full upload by CVar // NOTE: Doesn't currently discern which scene to affect bForceFullUpload = GNaniteTransformBufferForceFullUpload != 0; if (GNaniteTransformBufferForceFullUpload == 1) { GNaniteTransformBufferForceFullUpload = 0; } bDefragging = SceneData->ProcessBufferDefragmentation(); bForceFullUpload |= bDefragging; }, UE::Tasks::ETaskPriority::Normal, bEnableAsync ); } void FSkinningSceneExtension::FUpdater::PostSceneUpdate(FRDGBuilder& GraphBuilder, const FScenePostUpdateChangeSet& ChangeSet) { if (!SceneData->IsEnabled()) { return; } TRACE_CPUPROFILER_EVENT_SCOPE(FSkinningSceneExtension::FUpdater::PostSceneUpdate); // Cache the updated PrimitiveSceneInfos (this is safe as long as we only access it in updater funcs and RDG setup tasks) AddedList = ChangeSet.AddedPrimitiveSceneInfos; // Kick off a task to initialize added transform ranges if (AddedList.Num() > 0) { SceneData->TaskHandles[InitHeaderDataTask] = GraphBuilder.AddSetupTask( [this] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::InitHeaderData); // Skip any non-Nanite primitives, or rigid Nanite primitives for (auto PrimitiveSceneInfo : AddedList) { if (!PrimitiveSceneInfo->Proxy->IsNaniteMesh()) { continue; } auto* NaniteProxy = static_cast(PrimitiveSceneInfo->Proxy); if (!NaniteProxy->IsSkinnedMesh() || !NaniteProxy->IsVisibleInNanite()) { continue; } auto* SkinnedProxy = static_cast(NaniteProxy); const int32 PersistentIndex = PrimitiveSceneInfo->GetPersistentIndex().Index; FHeaderData NewHeader; NewHeader.InstanceSceneDataOffset = PrimitiveSceneInfo->GetInstanceSceneDataOffset(); NewHeader.NumInstanceSceneDataEntries = PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(); NewHeader.ProviderId = SkinnedProxy->GetTransformProviderId(); NewHeader.PrimitiveSceneInfo = PrimitiveSceneInfo; NewHeader.MaxTransformCount = SkinnedProxy->GetMaxBoneTransformCount(); NewHeader.MaxInfluenceCount = SkinnedProxy->GetMaxBoneInfluenceCount(); NewHeader.UniqueAnimationCount = SkinnedProxy->GetUniqueAnimationCount(); NewHeader.bHasScale = SkinnedProxy->HasScale(); SceneData->HeaderData.EmplaceAt(PersistentIndex, NewHeader); if (!bForceFullUpload) { DirtyPrimitiveList.Add(PersistentIndex); } } }, SceneData->TaskHandles[FreeBufferSpaceTask], UE::Tasks::ETaskPriority::Normal, bEnableAsync ); } } static bool IsValidSkinnedSceneInfo(const FPrimitiveSceneInfo* SceneInfo) { if (SceneInfo == nullptr || SceneInfo->Proxy == nullptr) { return false; } if (!SceneInfo->Proxy->IsNaniteMesh() || !SceneInfo->Proxy->IsSkinnedMesh()) { return false; } return true; } void FSkinningSceneExtension::FUpdater::PostMeshUpdate( FRDGBuilder& GraphBuilder, const TConstArrayView& UpdatedSceneInfoList ) { UpdateList = UpdatedSceneInfoList; if (SceneData->IsEnabled()) { // Gets the information needed from the primitive for skinning and allocates the appropriate space in the buffer // for the primitive's bone transforms auto AllocSpaceForPrimitive = [this](FHeaderData& Data) { auto* NaniteProxy = static_cast(Data.PrimitiveSceneInfo->Proxy); check(NaniteProxy->IsSkinnedMesh()); auto* SkinnedProxy = static_cast(NaniteProxy); Data.MaxTransformCount = SkinnedProxy->GetMaxBoneTransformCount(); Data.MaxInfluenceCount = SkinnedProxy->GetMaxBoneInfluenceCount(); Data.UniqueAnimationCount = SkinnedProxy->GetUniqueAnimationCount(); bool bRequireUpload = false; const uint32 ObjectSpaceNeededSize = Data.MaxTransformCount * SkinnedProxy->GetObjectSpaceFloatCount(); if (ObjectSpaceNeededSize != Data.ObjectSpaceBufferCount) { if (Data.ObjectSpaceBufferCount > 0) { SceneData->ObjectSpaceAllocator.Free(Data.ObjectSpaceBufferOffset, Data.ObjectSpaceBufferCount); } Data.ObjectSpaceBufferOffset = ObjectSpaceNeededSize > 0 ? SceneData->ObjectSpaceAllocator.Allocate(ObjectSpaceNeededSize) : INDEX_NONE; Data.ObjectSpaceBufferCount = ObjectSpaceNeededSize; if (!bForceFullUpload) { bRequireUpload = true; } } const uint32 HierarchyNeededSize = Data.MaxTransformCount; if (HierarchyNeededSize != Data.HierarchyBufferCount) { if (Data.HierarchyBufferCount > 0) { SceneData->HierarchyAllocator.Free(Data.HierarchyBufferOffset, Data.HierarchyBufferCount); } Data.HierarchyBufferOffset = HierarchyNeededSize > 0 ? SceneData->HierarchyAllocator.Allocate(HierarchyNeededSize) : INDEX_NONE; Data.HierarchyBufferCount = HierarchyNeededSize; if (!bForceFullUpload) { bRequireUpload = true; } } const uint32 TransformNeededSize = Data.UniqueAnimationCount * Data.MaxTransformCount * 2u; // Current and Previous if (bRequireUpload || (TransformNeededSize != Data.TransformBufferCount)) { if (Data.TransformBufferCount > 0) { SceneData->TransformAllocator.Free(Data.TransformBufferOffset, Data.TransformBufferCount); } Data.TransformBufferOffset = TransformNeededSize > 0 ? SceneData->TransformAllocator.Allocate(TransformNeededSize) : INDEX_NONE; Data.TransformBufferCount = TransformNeededSize; if (!bForceFullUpload) { bRequireUpload = true; } } if (bRequireUpload) { DirtyPrimitiveList.Add(Data.PrimitiveSceneInfo->GetPersistentIndex().Index); } }; // Kick off the allocate task (synced just prior to header uploads) SceneData->TaskHandles[AllocBufferSpaceTask] = GraphBuilder.AddSetupTask( [this, AllocSpaceForPrimitive] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::AllocBufferSpace); if (bDefragging) { for (auto& Data : SceneData->HeaderData) { AllocSpaceForPrimitive(Data); } } else { // Only check to reallocate space for primitives that have requested an update for (auto PrimitiveSceneInfo : UpdateList) { const int32 Index = PrimitiveSceneInfo->GetPersistentIndex().Index; if (!SceneData->HeaderData.IsValidIndex(Index)) { // Primitive in update list is either non-Nanite and/or not skinned continue; } AllocSpaceForPrimitive(SceneData->HeaderData[Index]); } } // Only create a new uploader here if one of the two dependent upload tasks will use it if (bForceFullUpload || DirtyPrimitiveList.Num() > 0 || UpdateList.Num() > 0) { SceneData->Uploader = MakeUnique(); } }, MakeArrayView( { SceneData->TaskHandles[FreeBufferSpaceTask], SceneData->TaskHandles[InitHeaderDataTask] } ), UE::Tasks::ETaskPriority::Normal, bEnableAsync ); auto UploadHeaderData = [this](const FHeaderData& Data) { const int32 PersistentIndex = Data.PrimitiveSceneInfo->GetPersistentIndex().Index; // Catch when/if no transform buffer data is allocated for a primitive we're tracking. // This should be indicative of a bug. ensure(Data.HierarchyBufferCount != INDEX_NONE && Data.TransformBufferCount != INDEX_NONE); check(SceneData->Uploader.IsValid()); // Sanity check SceneData->Uploader->HeaderDataUploader.Add(Data.Pack(), PersistentIndex); }; // Kick off the header data upload task (synced when accessing the buffer) SceneData->TaskHandles[UploadHeaderDataTask] = GraphBuilder.AddSetupTask( [this, UploadHeaderData] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::UploadHeaderData); if (bForceFullUpload) { for (auto& Data : SceneData->HeaderData) { UploadHeaderData(Data); } } else { // Sort the array so we can skip duplicate entries DirtyPrimitiveList.Sort(); int32 LastPersistentIndex = INDEX_NONE; for (auto PersistentIndex : DirtyPrimitiveList) { if (PersistentIndex != LastPersistentIndex && SceneData->HeaderData.IsValidIndex(PersistentIndex)) { UploadHeaderData(SceneData->HeaderData[PersistentIndex]); } LastPersistentIndex = PersistentIndex; } } }, MakeArrayView( { SceneData->TaskHandles[AllocBufferSpaceTask] } ), UE::Tasks::ETaskPriority::Normal, bEnableAsync ); auto UploadHierarchyData = [this](const FHeaderData& Data) { auto SkinnedProxy = static_cast(Data.PrimitiveSceneInfo->Proxy); const TArray& BoneHierarchy = SkinnedProxy->GetBoneHierarchy(); const TArray& BoneObjectSpace = SkinnedProxy->GetBoneObjectSpace(); const uint32 FloatCount = SkinnedProxy->GetObjectSpaceFloatCount(); check(BoneHierarchy.Num() == Data.MaxTransformCount); check(BoneObjectSpace.Num() == Data.MaxTransformCount * FloatCount); check(SceneData->Uploader.IsValid()); // Bone Hierarchy { auto UploadData = SceneData->Uploader->BoneHierarchyUploader.AddMultiple_GetRef( Data.HierarchyBufferOffset, Data.HierarchyBufferCount ); uint32* DstBoneHierarchyPtr = UploadData.GetData(); for (int32 BoneIndex = 0; BoneIndex < Data.MaxTransformCount; ++BoneIndex) { DstBoneHierarchyPtr[BoneIndex] = BoneHierarchy[BoneIndex]; } } // Bone Object Space { auto UploadData = SceneData->Uploader->BoneObjectSpaceUploader.AddMultiple_GetRef( Data.ObjectSpaceBufferOffset, Data.ObjectSpaceBufferCount ); float* DstBoneObjectSpacePtr = UploadData.GetData(); for (uint32 BoneFloatIndex = 0; BoneFloatIndex < (Data.MaxTransformCount * FloatCount); ++BoneFloatIndex) { DstBoneObjectSpacePtr[BoneFloatIndex] = BoneObjectSpace[BoneFloatIndex]; } } }; auto UploadTransformData = [this](const FHeaderData& Data, bool bProvidersEnabled) { auto SkinnedProxy = static_cast(Data.PrimitiveSceneInfo->Proxy); if (bProvidersEnabled && SkinnedProxy->GetTransformProviderId().IsValid()) { return; } // NOTE: This path is purely for debugging now - should also set "r.Nanite.SkinningBuffers.ForceFullUpload 2" to avoid caching artifacts check(SceneData->Uploader.IsValid()); auto UploadData = SceneData->Uploader->TransformDataUploader.AddMultiple_GetRef( Data.TransformBufferOffset, Data.TransformBufferCount ); check(Data.UniqueAnimationCount* Data.MaxTransformCount * 2u == Data.TransformBufferCount); FCompressedBoneTransform* DstCurrentBoneTransformsPtr = UploadData.GetData(); FCompressedBoneTransform* DstPreviousBoneTransformsPtr = DstCurrentBoneTransformsPtr + Data.MaxTransformCount; const uint32 StridedPtrStep = Data.MaxTransformCount * 2u; for (int32 UniqueAnimation = 0; UniqueAnimation < Data.UniqueAnimationCount; ++UniqueAnimation) { for (int32 TransformIndex = 0; TransformIndex < Data.MaxTransformCount; ++TransformIndex) { SetCompressedBoneTransformIdentity(DstCurrentBoneTransformsPtr[TransformIndex]); SetCompressedBoneTransformIdentity(DstPreviousBoneTransformsPtr[TransformIndex]); } DstCurrentBoneTransformsPtr += StridedPtrStep; DstPreviousBoneTransformsPtr += StridedPtrStep; } }; // Kick off the hierarchy data upload task (synced when accessing the buffer) SceneData->TaskHandles[UploadHierarchyDataTask] = GraphBuilder.AddSetupTask( [this, UploadHierarchyData] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::UploadHierarchyData); if (bForceFullUpload) { for (auto& Data : SceneData->HeaderData) { UploadHierarchyData(Data); } } else { for (auto PrimitiveSceneInfo : UpdateList) { const int32 PersistentIndex = PrimitiveSceneInfo->GetPersistentIndex().Index; if (!SceneData->HeaderData.IsValidIndex(PersistentIndex)) { // Primitive in update list is either non-Nanite and/or not skinned continue; } check(IsValidSkinnedSceneInfo(PrimitiveSceneInfo)); UploadHierarchyData(SceneData->HeaderData[PersistentIndex]); } } }, MakeArrayView({ SceneData->TaskHandles[AllocBufferSpaceTask] }), UE::Tasks::ETaskPriority::Normal, bEnableAsync ); // Kick off the transform data upload task (synced when accessing the buffer) SceneData->TaskHandles[UploadTransformDataTask] = GraphBuilder.AddSetupTask( [this, UploadTransformData] { TRACE_CPUPROFILER_EVENT_SCOPE(NaniteSkinning::UploadTransformData); const bool bProvidersEnabled = CVarSkinningTransformProviders.GetValueOnRenderThread(); if (bForceFullUpload) { for (auto& Data : SceneData->HeaderData) { UploadTransformData(Data, bProvidersEnabled); } } else { for (auto PrimitiveSceneInfo : UpdateList) { const int32 PersistentIndex = PrimitiveSceneInfo->GetPersistentIndex().Index; if (!SceneData->HeaderData.IsValidIndex(PersistentIndex)) { // Primitive in update list is either non-Nanite and/or not skinned continue; } check(IsValidSkinnedSceneInfo(PrimitiveSceneInfo)); UploadTransformData(SceneData->HeaderData[PersistentIndex], bProvidersEnabled); } } }, MakeArrayView({ SceneData->TaskHandles[AllocBufferSpaceTask] }), UE::Tasks::ETaskPriority::Normal, bEnableAsync ); if (!bEnableAsync) { // If disabling async, just finish the upload immediately SceneData->FinishSkinningBufferUpload(GraphBuilder); } } } class FNaniteSkinningUpdateViewDataCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FNaniteSkinningUpdateViewDataCS); SHADER_USE_PARAMETER_STRUCT(FNaniteSkinningUpdateViewDataCS, FGlobalShader) public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FNaniteSkinningParameters, Scene_NaniteSkinning) SHADER_PARAMETER_STRUCT_INCLUDE(FGPUSceneResourceParameters, GPUScene) SHADER_PARAMETER_STRUCT_INCLUDE(RendererViewData::FWriterParameters, ViewDataParametersWriter) SHADER_PARAMETER_STRUCT_INCLUDE( FInstanceHierarchyParameters, InstanceHierarchyParameters ) SHADER_PARAMETER_RDG_BUFFER_SRV( StructuredBuffer< FUintVector2 >, InstanceWorkGroups ) SHADER_PARAMETER(float, DefaultAnimationMinScreenSize) RDG_BUFFER_ACCESS( IndirectArgs, ERHIAccess::IndirectArgs ) END_SHADER_PARAMETER_STRUCT() static constexpr int ThreadGroupSize = 64; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return DoesPlatformSupportNanite(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); OutEnvironment.SetDefine(TEXT("THREAD_GROUP_SIZE"), ThreadGroupSize); OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1); OutEnvironment.SetDefine(TEXT("USE_GLOBAL_GPU_SCENE_DATA"), 1); OutEnvironment.SetDefine(TEXT("VIEW_DATA_ACCESS_MODE"), VIEW_DATA_ACCESS_RW); // Don't access the global Scene uniform buffer but map to indivdual UBs for each used module. OutEnvironment.SetDefine(TEXT("USE_EXPLICIT_SCENE_UB_MODULES"), 1); OutEnvironment.CompilerFlags.Add(CFLAG_HLSL2021); } }; IMPLEMENT_GLOBAL_SHADER(FNaniteSkinningUpdateViewDataCS, "/Engine/Private/Nanite/NaniteSkinningUpdateViewData.usf", "NaniteSkinningUpdateViewDataCS", SF_Compute); class FNaniteSkinningUpdateChunkCullCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FNaniteSkinningUpdateChunkCullCS); SHADER_USE_PARAMETER_STRUCT(FNaniteSkinningUpdateChunkCullCS, FGlobalShader) public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_STRUCT_INCLUDE(RendererViewData::FWriterParameters, ViewDataParametersWriter) SHADER_PARAMETER_STRUCT_INCLUDE( FInstanceHierarchyParameters, InstanceHierarchyParameters ) SHADER_PARAMETER_RDG_BUFFER_UAV( RWStructuredBuffer< FUintVector2 >, OutInstanceWorkGroups ) SHADER_PARAMETER_RDG_BUFFER_UAV( RWBuffer< uint >, OutInstanceWorkArgs ) SHADER_PARAMETER(float, DefaultAnimationMinScreenSize) END_SHADER_PARAMETER_STRUCT() static constexpr int ThreadGroupSize = 64; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return DoesPlatformSupportNanite(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); OutEnvironment.SetDefine(TEXT("THREAD_GROUP_SIZE"), ThreadGroupSize); OutEnvironment.SetDefine(TEXT("VIEW_DATA_ACCESS_MODE"), VIEW_DATA_ACCESS_RW); // Don't access the global Scene uniform buffer but map to indivdual UBs for each used module. OutEnvironment.SetDefine(TEXT("USE_EXPLICIT_SCENE_UB_MODULES"), 1); OutEnvironment.CompilerFlags.Add(CFLAG_HLSL2021); } }; IMPLEMENT_GLOBAL_SHADER(FNaniteSkinningUpdateChunkCullCS, "/Engine/Private/Nanite/NaniteSkinningUpdateViewData.usf", "NaniteSkinningUpdateChunkCullCS", SF_Compute); void FSkinningSceneExtension::FRenderer::UpdateViewData(FRDGBuilder& GraphBuilder, const FRendererViewDataManager& ViewDataManager) { FSceneRendererBase& SceneRenderer = GetSceneRenderer(); FSceneCullingRenderer& SceneCullingRenderer = SceneRenderer.GetSceneExtensionsRenderers().GetRenderer(); FInstanceHierarchyParameters InstanceHierarchyParameters = SceneCullingRenderer.GetShaderParameters(GraphBuilder); int32 NumAllocatedChunks = InstanceHierarchyParameters.NumAllocatedChunks; // Create a buffer with enough space for all chunks FRDGBufferRef InstanceWorkGroupsRDG = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FUintVector2), NumAllocatedChunks), TEXT("NaniteSkinning.UpdateViewData.WorkGroups")); ERHIFeatureLevel::Type FeatureLevel = SceneData->Scene.GetFeatureLevel(); FRDGBufferRef InstanceWorkArgsRDG = CreateAndClearIndirectDispatchArgs1D(GraphBuilder, FeatureLevel, TEXT("NaniteSkinning.UpdateViewData.IndirectArgs")); { FNaniteSkinningUpdateChunkCullCS::FParameters* PassParameters = GraphBuilder.AllocParameters< FNaniteSkinningUpdateChunkCullCS::FParameters >(); PassParameters->InstanceHierarchyParameters = InstanceHierarchyParameters; PassParameters->DefaultAnimationMinScreenSize = CVarNaniteSkinningDefaultAnimationMinScreenSize.GetValueOnRenderThread(); PassParameters->OutInstanceWorkGroups = GraphBuilder.CreateUAV(InstanceWorkGroupsRDG); PassParameters->OutInstanceWorkArgs = GraphBuilder.CreateUAV(InstanceWorkArgsRDG); PassParameters->ViewDataParametersWriter = ViewDataManager.GetWriterShaderParameters(GraphBuilder); auto ComputeShader = GetGlobalShaderMap(FeatureLevel)->GetShader(); FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME( "NaniteSkinningUpdateViewDataChunks" ), ComputeShader, PassParameters, FComputeShaderUtils::GetGroupCount(NumAllocatedChunks, 64) ); } { FNaniteSkinningUpdateViewDataCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); //PassParameters->Scene = SceneCullingInfo.SceneUniformBuffer; PassParameters->GPUScene = SceneData->Scene.GPUScene.GetShaderParameters(GraphBuilder); FNaniteSkinningParameters* NaniteSkinningParameters = GraphBuilder.AllocParameters(); SceneData->FinishSkinningBufferUpload(GraphBuilder, NaniteSkinningParameters); PassParameters->Scene_NaniteSkinning = GraphBuilder.CreateUniformBuffer(NaniteSkinningParameters); PassParameters->ViewDataParametersWriter = ViewDataManager.GetWriterShaderParameters(GraphBuilder); PassParameters->InstanceHierarchyParameters = InstanceHierarchyParameters; PassParameters->DefaultAnimationMinScreenSize = CVarNaniteSkinningDefaultAnimationMinScreenSize.GetValueOnRenderThread(); PassParameters->IndirectArgs = InstanceWorkArgsRDG; PassParameters->InstanceWorkGroups = GraphBuilder.CreateSRV(InstanceWorkGroupsRDG); auto ComputeShader = GetGlobalShaderMap(SceneData->Scene.GetFeatureLevel())->GetShader(); FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME( "NaniteSkinningUpdateViewData" ), ComputeShader, PassParameters, PassParameters->IndirectArgs, 0 ); } } void FSkinningSceneExtension::FRenderer::UpdateSceneUniformBuffer( FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUniformBuffer ) { check(SceneData->IsEnabled()); FNaniteSkinningParameters Parameters; SceneData->FinishSkinningBufferUpload(GraphBuilder, &Parameters); SceneUniformBuffer.Set(SceneUB::NaniteSkinning, Parameters); SceneData->PerformSkinning(Parameters, GraphBuilder); } void FSkinningSceneExtension::GetSkinnedPrimitives(TArray& OutPrimitives) const { OutPrimitives.Reset(); if (!IsEnabled()) { return; } WaitForHeaderDataUpdateTasks(); OutPrimitives.Reserve(HeaderData.Num()); for (typename TSparseArray::TConstIterator It(HeaderData); It; ++It) { const FHeaderData& Header = *It; OutPrimitives.Add(Header.PrimitiveSceneInfo); } } const FSkinningTransformProvider::FProviderId& FSkinningSceneExtension::GetRefPoseProviderId() { return RefPoseProviderId; } const FSkinningTransformProvider::FProviderId& FSkinningSceneExtension::GetAnimRuntimeProviderId() { return AnimRuntimeProviderId; } void FSkinningSceneExtension::ProvideRefPoseTransforms(FSkinningTransformProvider::FProviderContext& Context) { const uint32 TransformsPerGroup = FRefPoseTransformProviderCS::TransformsPerGroup; // TODO: Optimize further uint32 BlockCount = 0; for (const FUintVector2& Indirection : Context.Indirections) { const FPrimitiveSceneInfo* Primitive = Context.Primitives[Indirection.X]; auto* SkinnedProxy = static_cast(Primitive->Proxy); const uint32 TransformCount = SkinnedProxy->GetMaxBoneTransformCount(); const uint32 AnimationCount = SkinnedProxy->GetUniqueAnimationCount(); BlockCount += FMath::DivideAndRoundUp(TransformCount * AnimationCount, TransformsPerGroup); } if (BlockCount == 0) { return; } FRDGBuilder& GraphBuilder = Context.GraphBuilder; FTransformBlockHeader* BlockHeaders = GraphBuilder.AllocPODArray(BlockCount); uint32 BlockWrite = 0; for (const FUintVector2& Indirection : Context.Indirections) { const FPrimitiveSceneInfo* Primitive = Context.Primitives[Indirection.X]; auto* SkinnedProxy = static_cast(Primitive->Proxy); const uint32 TransformCount = SkinnedProxy->GetMaxBoneTransformCount(); const uint32 AnimationCount = SkinnedProxy->GetUniqueAnimationCount(); const uint32 TotalTransformCount = TransformCount * AnimationCount; uint32 TransformWrite = Indirection.Y; const uint32 FullBlockCount = TotalTransformCount / TransformsPerGroup; for (uint32 BlockIndex = 0; BlockIndex < FullBlockCount; ++BlockIndex) { BlockHeaders[BlockWrite].BlockLocalIndex = BlockIndex; BlockHeaders[BlockWrite].BlockTransformCount = TransformsPerGroup; BlockHeaders[BlockWrite].BlockTransformOffset = TransformWrite; ++BlockWrite; TransformWrite += (TransformsPerGroup * 2 * sizeof(FCompressedBoneTransform)); } const uint32 PartialTransformCount = TotalTransformCount - (FullBlockCount * TransformsPerGroup); if (PartialTransformCount > 0) { BlockHeaders[BlockWrite].BlockLocalIndex = FullBlockCount; BlockHeaders[BlockWrite].BlockTransformCount = PartialTransformCount; BlockHeaders[BlockWrite].BlockTransformOffset = TransformWrite; ++BlockWrite; } } FRDGBufferRef BlockHeaderBuffer = CreateStructuredBuffer( GraphBuilder, TEXT("Skinning.RefPoseHeaders"), sizeof(FTransformBlockHeader), FMath::RoundUpToPowerOfTwo(FMath::Max(BlockCount, 1u)), BlockHeaders, sizeof(FTransformBlockHeader) * BlockCount, // The buffer data is allocated above on the RDG timeline ERDGInitialDataFlags::NoCopy ); FRefPoseTransformProviderCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); PassParameters->TransformBuffer = GraphBuilder.CreateUAV(Context.TransformBuffer); PassParameters->HeaderBuffer = GraphBuilder.CreateSRV(BlockHeaderBuffer); auto ComputeShader = GetGlobalShaderMap(GMaxRHIFeatureLevel)->GetShader(); FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME("RefPoseProvider"), ComputeShader, PassParameters, FIntVector(BlockCount, 1, 1) ); } BEGIN_SHADER_PARAMETER_STRUCT(FCopyBufferParameters, ) RDG_BUFFER_ACCESS(SrcBuffer, ERHIAccess::CopySrc) RDG_BUFFER_ACCESS(DstBuffer, ERHIAccess::CopyDest) END_SHADER_PARAMETER_STRUCT() void FSkinningSceneExtension::ProvideAnimRuntimeTransforms(FSkinningTransformProvider::FProviderContext& Context) { TRACE_CPUPROFILER_EVENT_SCOPE(FSkinningSceneExtension::ProvideAnimRuntimeTransforms); uint32 GlobalTransformCount = 0; for (const FUintVector2& Indirection : Context.Indirections) { const FPrimitiveSceneInfo* Primitive = Context.Primitives[Indirection.X]; auto* SkinnedProxy = static_cast(Primitive->Proxy); const uint32 TransformCount = SkinnedProxy->GetMaxBoneTransformCount(); const uint32 AnimationCount = SkinnedProxy->GetUniqueAnimationCount(); GlobalTransformCount += (TransformCount * AnimationCount) * 2; // Current and Previous } if (GlobalTransformCount == 0) { return; } FRDGBuilder& GraphBuilder = Context.GraphBuilder; struct FCopyCommand { uint32 DstOffset = 0; uint32 SrcOffset = 0; uint32 NumBytes = 0; }; auto& CopyCommands = *GraphBuilder.AllocObject>(); CopyCommands.Reserve(Context.Indirections.Num()); FRDGBufferRef SrcTransformBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateUploadDesc(sizeof(FCompressedBoneTransform), GlobalTransformCount), TEXT("Skinning.AnimTransforms")); FRHIBuffer* SrcTransformBufferRHI = GraphBuilder.ConvertToExternalBuffer(SrcTransformBuffer)->GetRHI(); GraphBuilder.AddCommandListSetupTask([&CopyCommands, Indirections = Context.Indirections, Primitives = Context.Primitives, SrcTransformBufferRHI, GlobalTransformCount] (FRHICommandList& RHICmdList) { TRACE_CPUPROFILER_EVENT_SCOPE(FSkinningSceneExtension::ProvideAnimRuntimeTransformsTask); FCompressedBoneTransform* Transforms = reinterpret_cast(RHICmdList.LockBuffer(SrcTransformBufferRHI, 0, sizeof(FCompressedBoneTransform) * GlobalTransformCount, RLM_WriteOnly)); uint32 TransformWrite = 0; for (const FUintVector2& Indirection : Indirections) { const FPrimitiveSceneInfo* Primitive = Primitives[Indirection.X]; auto* SkinnedProxy = static_cast(Primitive->Proxy); const uint32 TransformCount = SkinnedProxy->GetMaxBoneTransformCount(); const uint32 AnimationCount = SkinnedProxy->GetUniqueAnimationCount(); const uint32 FrameTransformCount = (TransformCount * AnimationCount); const uint32 TotalTransformCount = FrameTransformCount * 2; // Current and Previous // Fetch bone transforms from Nanite mesh object and upload to GPU (3x4 transposed) const TArray* SrcCurrentTransformsArray= SkinnedProxy->GetMeshObject()->GetCurrentBoneTransforms(); const TArray* SrcPreviousTransformsArray = SkinnedProxy->GetMeshObject()->GetPreviousBoneTransforms(); const bool bHasValidCurrentTransforms = SrcCurrentTransformsArray != nullptr && SrcCurrentTransformsArray->Num() == FrameTransformCount; const bool bHasValidPreviousTransforms = SrcPreviousTransformsArray != nullptr && SrcPreviousTransformsArray->Num() == FrameTransformCount; FCompressedBoneTransform* DstCurrentTransforms = Transforms + TransformWrite; FCompressedBoneTransform* DstPreviousTransforms = DstCurrentTransforms + TransformCount; FCopyCommand& Command = CopyCommands.Emplace_GetRef(); Command.SrcOffset = TransformWrite * sizeof(FCompressedBoneTransform); Command.DstOffset = Indirection.Y; Command.NumBytes = TotalTransformCount * sizeof(FCompressedBoneTransform); if (bHasValidCurrentTransforms) { const FMatrix3x4* SrcCurrentTransforms = SrcCurrentTransformsArray->GetData(); const FMatrix3x4* SrcPreviousTransforms = bHasValidPreviousTransforms ? SrcPreviousTransformsArray->GetData() : nullptr; const uint32 StridedPtrStep = TransformCount * 2u; for (uint32 UniqueAnimation = 0; UniqueAnimation < AnimationCount; ++UniqueAnimation) { #if USE_COMPRESSED_BONE_TRANSFORM for (uint32 i = 0; i < TransformCount; i++) { StoreCompressedBoneTransform(DstCurrentTransforms[i], SrcCurrentTransforms[i]); if (bHasValidPreviousTransforms) { StoreCompressedBoneTransform(DstPreviousTransforms[i], SrcPreviousTransforms[i]); } } #else FMemory::Memcpy(DstCurrentTransforms, SrcCurrentTransforms, sizeof(FCompressedBoneTransform) * TransformCount); if (bHasValidPreviousTransforms) { FMemory::Memcpy(DstPreviousTransforms, SrcPreviousTransforms, sizeof(FCompressedBoneTransform) * TransformCount); } #endif DstCurrentTransforms += StridedPtrStep; DstPreviousTransforms += StridedPtrStep; } // For the single animations without previous transforms we can avoid writing the previous transform part of the data since it's on the end. if (AnimationCount == 1 && !bHasValidPreviousTransforms) { Command.NumBytes = FrameTransformCount * sizeof(FCompressedBoneTransform); } } else { // Data is invalid, replace with reference pose for (uint32 TransformIndex = 0; TransformIndex < (TransformCount * AnimationCount); ++TransformIndex) { SetCompressedBoneTransformIdentity(DstCurrentTransforms[TransformIndex]); SetCompressedBoneTransformIdentity(DstPreviousTransforms[TransformIndex]); } } TransformWrite += TotalTransformCount; } RHICmdList.UnlockBuffer(SrcTransformBufferRHI); }, UE::Tasks::ETaskPriority::High); FCopyBufferParameters* PassParameters = GraphBuilder.AllocParameters(); PassParameters->SrcBuffer = SrcTransformBuffer; PassParameters->DstBuffer = Context.TransformBuffer; GraphBuilder.AddPass( RDG_EVENT_NAME("CopyBuffer (%s Size=%ubytes)", SrcTransformBuffer->Name, SrcTransformBuffer->Desc.GetSize()), PassParameters, ERDGPassFlags::Copy, [PassParameters, &CopyCommands](FRDGAsyncTask, FRHICommandList& RHICmdList) { for (const FCopyCommand& Command : CopyCommands) { RHICmdList.CopyBufferRegion(PassParameters->DstBuffer->GetRHI(), Command.DstOffset, PassParameters->SrcBuffer->GetRHI(), Command.SrcOffset, Command.NumBytes); } }); } } // Nanite IMPLEMENT_SCENE_UB_STRUCT_EX(FNaniteSkinningParameters, NaniteSkinning, Nanite::GetDefaultSkinningParameters);