// Copyright Epic Games, Inc. All Rights Reserved. // Implementation of Memory Allocation Strategies //----------------------------------------------------------------------------- // Include Files //----------------------------------------------------------------------------- #include "D3D12Allocation.h" #include "D3D12RHIPrivate.h" #include "Misc/BufferedOutputDevice.h" #include "HAL/PlatformStackWalk.h" #include "HAL/LowLevelMemTracker.h" #include "HAL/LowLevelMemStats.h" #include "ProfilingDebugging/MemoryTrace.h" // Fix for random GPU crashes on draw indirects on multiple IHVs. Force all indirect arg buffers as committed resources (see UE-115982) static int32 GD3D12AllowPoolAllocateIndirectArgBuffers = 1; static FAutoConsoleVariableRef CVarD3D12AllowPoolAllocateIndirectArgBuffers( TEXT("d3d12.AllowPoolAllocateIndirectArgBuffers"), GD3D12AllowPoolAllocateIndirectArgBuffers, TEXT("Allow indirect args to be pool allocated (otherwise they will be committed resources) (default: 0)"), ECVF_ReadOnly); #if D3D12RHI_SEGREGATED_TEXTURE_ALLOC static int32 GD3D12ReadOnlyTextureAllocatorMinPoolSize = 4 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMinPoolSize( TEXT("d3d12.ReadOnlyTextureAllocator.MinPoolSize"), GD3D12ReadOnlyTextureAllocatorMinPoolSize, TEXT("Minimum allocation granularity (in bytes) of each size list"), ECVF_ReadOnly); static int32 GD3D12ReadOnlyTextureAllocatorMinNumToPool = 8; static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMinNumToPool( TEXT("d3d12.ReadOnlyTextureAllocator.MinNumToPool"), GD3D12ReadOnlyTextureAllocatorMinNumToPool, TEXT("Texture pool of each size list must be large enough to store this") TEXT("many textures unless constrained by maximum allocation granularity"), ECVF_ReadOnly); static int32 GD3D12ReadOnlyTextureAllocatorMaxPoolSize = 20 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMaxPoolSize( TEXT("d3d12.ReadOnlyTextureAllocator.MaxPoolSize"), GD3D12ReadOnlyTextureAllocatorMaxPoolSize, TEXT("Maximum allocation granularity (in bytes) of each size list"), ECVF_ReadOnly); #endif #if USE_BUFFER_POOL_ALLOCATOR #if !defined(BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME) #define BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME 32*1024*1024 #endif #if !defined(BUFFER_POOL_DEFAULT_POOL_SIZE) #define BUFFER_POOL_DEFAULT_POOL_SIZE 32*1024*1024 #endif #if !defined(BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE) #define BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE 16*1024*1024 #endif #if !defined(BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE) #define BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE 32*1024*1024 #endif #if !defined(BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE) #define BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE 16*1024*1024 #endif static int32 GD3D12VRAMBufferPoolDefrag = 1; static FAutoConsoleVariableRef CVarD3D12VRAMBufferPoolDefrag( TEXT("d3d12.VRAMBufferPoolDefrag"), GD3D12VRAMBufferPoolDefrag, TEXT("Defrag the VRAM buffer pool"), ECVF_ReadOnly); static int32 GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame = BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME; static FAutoConsoleVariableRef CVarD3D12VRAMBufferPoolDefragMaxCopySizePerFrame( TEXT("d3d12.VRAMBufferPoolDefrag.MaxCopySizePerFrame"), GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame, TEXT("Max amount of data to copy during defragmentation in a single frame (default 32MB)"), ECVF_ReadOnly); #endif // USE_BUFFER_POOL_ALLOCATOR #if USE_TEXTURE_POOL_ALLOCATOR static int32 GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize = 64 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize( TEXT("d3d12.PoolAllocator.ReadOnlyTextureVRAMPoolSize"), GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize, TEXT("Pool size of a single VRAM ReadOnly Texture memory pool (default 64MB)"), ECVF_ReadOnly); static int32 GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize = 64 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12PoolAllocatorReadOnlyTextureMaxAllocationSize( TEXT("d3d12.PoolAllocator.ReadOnlyTextureMaxAllocationSize"), GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize, TEXT("Maximum size of a single allocation in the VRAM ReadOnly Texture pool allocator (default 64MB)"), ECVF_ReadOnly); static int32 GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize = 0 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12PoolAllocatorRTUAVTextureVRAMPoolSize( TEXT("d3d12.PoolAllocator.RTUAVTextureVRAMPoolSize"), GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize, TEXT("Pool size of a single VRAM RTUAV Texture memory pool (default 0MB - disabled)"), ECVF_ReadOnly); static int32 GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize = 0 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12PoolAllocatorRTUAVTextureMaxAllocationSize( TEXT("d3d12.PoolAllocator.RTUAVTextureMaxAllocationSize"), GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize, TEXT("Maximum size of a single allocation in the VRAM RTUAV Texture pool allocator (default 0MB - disabled)"), ECVF_ReadOnly); static int32 GD3D12VRAMTexturePoolDefrag = 1; static FAutoConsoleVariableRef CVarD3D12VRAMTexturePoolDefrag( TEXT("d3d12.VRAMTexturePoolDefrag"), GD3D12VRAMTexturePoolDefrag, TEXT("Defrag the VRAM Texture pool (enabled by default)"), ECVF_ReadOnly); static int32 GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame = 32 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12VRAMTexturePoolDefragMaxCopySizePerFrame( TEXT("d3d12.VRAMTexturePoolDefrag.MaxCopySizePerFrame"), GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame, TEXT("Max amount of data to copy during defragmentation in a single frame (default 32MB)"), ECVF_ReadOnly); #endif // USE_TEXTURE_POOL_ALLOCATOR static int32 GD3D12UploadHeapSmallBlockMaxAllocationSize = 64 * 1024; static FAutoConsoleVariableRef CVarD3D12UploadHeapSmallBlockMaxAllocationSize( TEXT("d3d12.UploadHeap.SmallBlock.MaxAllocationSize"), GD3D12UploadHeapSmallBlockMaxAllocationSize, TEXT("Maximum allocation size on the small block allocator for upload memory"), ECVF_ReadOnly); static int32 GD3D12UploadHeapSmallBlockPoolSize = 4 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12UploadHeapSmallBlockPoolSize( TEXT("d3d12.UploadHeap.SmallBlock.PoolSize"), GD3D12UploadHeapSmallBlockPoolSize, TEXT("Pool size for the upload memory small block allocator"), ECVF_ReadOnly); static int32 GD3D12UploadHeapBigBlockMaxAllocationSize = 64 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12UploadHeapBigBlockMaxAllocationSize( TEXT("d3d12.UploadHeap.BigBlock.MaxAllocationSize"), GD3D12UploadHeapBigBlockMaxAllocationSize, TEXT("Maximum allocation size on the big block allocator for upload memory"), ECVF_ReadOnly); static int32 GD3D12UploadHeapBigBlockPoolSize = 8 * 1024 * 1024; static FAutoConsoleVariableRef CVarD3D12UploadHeapBigBlockPoolSize( TEXT("d3d12.UploadHeap.BigBlock.PoolSize"), GD3D12UploadHeapBigBlockPoolSize, TEXT("Pool size for the upload memory big block allocator"), ECVF_ReadOnly); static int32 GD3D12FastConstantAllocatorPageSize = 64 * 1024; static FAutoConsoleVariableRef CVarD3D12FastConstantAllocatorPageSize( TEXT("d3d12.FastConstantAllocatorPageSize"), GD3D12FastConstantAllocatorPageSize, TEXT("Page size for the fast constant allocator"), ECVF_ReadOnly); #if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE static int32 GD3D12SegListTrackLeaks = 0; static FAutoConsoleVariableRef CVarD3D12SegListTrackLeaks( TEXT("d3d12.SegListTrackLeaks"), GD3D12SegListTrackLeaks, TEXT("1: Enable leak tracking in d3d12 seglist's"), ECVF_ReadOnly); #endif static int32 GD3D12FastAllocatorMinPagesToRetain = 5; static FAutoConsoleVariableRef CVarD3D12FastAllocatorMinPagesToRetain( TEXT("d3d12.FastAllocator.MinPagesToRetain"), GD3D12FastAllocatorMinPagesToRetain, TEXT("Minimum number of pages to retain. Pages below this limit will never be released. Pages above can be released after being unused for a certain number of frames."), ECVF_Default); DECLARE_LLM_MEMORY_STAT(TEXT("D3D12AllocatorUnused"), STAT_D3D12AllocatorUnusedLLM, STATGROUP_LLMFULL); LLM_DEFINE_TAG(D3D12AllocatorUnused, NAME_None, NAME_None, GET_STATFNAME(STAT_D3D12AllocatorUnusedLLM), GET_STATFNAME(STAT_EngineSummaryLLM)); DECLARE_LLM_MEMORY_STAT(TEXT("D3D12AllocatorWasted"), STAT_D3D12AllocatorWastedLLM, STATGROUP_LLMFULL); LLM_DEFINE_TAG(D3D12AllocatorWasted, NAME_None, NAME_None, GET_STATFNAME(STAT_D3D12AllocatorWastedLLM), GET_STATFNAME(STAT_EngineSummaryLLM)); namespace ED3D12AllocatorID { enum Type { DefaultBufferAllocator, DynamicHeapAllocator, TextureAllocator, DefaultBufferAllocatorFullResources }; }; //----------------------------------------------------------------------------- // Allocator Base //----------------------------------------------------------------------------- FD3D12ResourceAllocator::FD3D12ResourceAllocator(FD3D12Device* ParentDevice, FRHIGPUMask VisibleNodes, const FD3D12ResourceInitConfig& InInitConfig, const FString& Name, uint32 MaxSizeForPooling) : FD3D12DeviceChild(ParentDevice) , FD3D12MultiNodeGPUObject(ParentDevice->GetGPUMask(), VisibleNodes) , InitConfig(InInitConfig) , DebugName(Name) , Initialized(false) , MaximumAllocationSizeForPooling(MaxSizeForPooling) #if D3D12RHI_TRACK_DETAILED_STATS , SpaceAlignedUsed(0) , SpaceActualUsed(0) , NumBlocksInDeferredDeletionQueue(0) , PeakUsage(0) , FailedAllocationSpace(0) #endif { } FD3D12ResourceAllocator::~FD3D12ResourceAllocator() { } //----------------------------------------------------------------------------- // Buddy Allocator //----------------------------------------------------------------------------- FD3D12BuddyAllocator::FD3D12BuddyAllocator(FD3D12Device* ParentDevice, FRHIGPUMask VisibleNodes, const FD3D12ResourceInitConfig& InInitConfig, const FString& Name, EResourceAllocationStrategy InAllocationStrategy, uint32 MaxSizeForPooling, uint32 InMaxBlockSize, uint32 InMinBlockSize, HeapId InTraceParentHeapId) : FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, MaxSizeForPooling) , MaxBlockSize(InMaxBlockSize) , MinBlockSize(InMinBlockSize) , AllocationStrategy(InAllocationStrategy) , BackingHeap(nullptr) , LastUsedFrameFence(0) , TotalSizeUsed(0) , HeapFullMessageDisplayed(false) { #if UE_MEMORY_TRACE_ENABLED TraceHeapId = MemoryTrace_HeapSpec(InTraceParentHeapId, AllocationStrategy == EResourceAllocationStrategy::kPlacedResource ? TEXT("BuddyAllocator (PlacedResource)") : TEXT("BuddyAllocator (ManualSubAllocation)")); #endif // maxBlockSize should be evenly dividable by MinBlockSize and // maxBlockSize / MinBlockSize should be a power of two check((MaxBlockSize / MinBlockSize) * MinBlockSize == MaxBlockSize); // Evenly dividable check(0 == ((MaxBlockSize / MinBlockSize) & ((MaxBlockSize / MinBlockSize) - 1))); // Power of two MaxOrder = UnitSizeToOrder(SizeToUnitSize(MaxBlockSize)); Reset(); } void FD3D12BuddyAllocator::Initialize() { FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); if (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) { D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InitConfig.HeapType); HeapProps.CreationNodeMask = GetGPUMask().GetNative(); HeapProps.VisibleNodeMask = GetVisibilityMask().GetNative(); D3D12_HEAP_DESC Desc = {}; Desc.SizeInBytes = MaxBlockSize; Desc.Properties = HeapProps; Desc.Alignment = 0; Desc.Flags = InitConfig.HeapFlags; if (Adapter->IsHeapNotZeroedSupported()) { Desc.Flags |= FD3D12_HEAP_FLAG_CREATE_NOT_ZEROED; } ID3D12Heap* Heap = nullptr; { LLM_PLATFORM_SCOPE(ELLMTag::GraphicsPlatform); #if PLATFORM_WINDOWS // we are tracking allocations ourselves LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System); VERIFYD3D12RESULT(Adapter->GetD3DDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&Heap))); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, MaxBlockSize, ELLMTracker::Platform, ELLMAllocType::System); #else LLM_SCOPE_BYTAG(D3D12AllocatorUnused); VERIFYD3D12RESULT(Adapter->GetD3DDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&Heap))); #endif } BackingHeap = new FD3D12Heap(GetParentDevice(), GetVisibilityMask(), TraceHeapId); BackingHeap->SetHeap(Heap, TEXT("BuddyAllocator Heap")); // Only track resources that cannot be accessed on the CPU. if (IsGPUOnly(InitConfig.HeapType)) { BackingHeap->BeginTrackingResidency(Desc.SizeInBytes); } } else { { const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InitConfig.HeapType, GetGPUMask().GetNative(), GetVisibilityMask().GetNative()); #if PLATFORM_WINDOWS LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System); VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProps, GetGPUMask(), InitConfig.InitialResourceState, ED3D12ResourceStateMode::SingleState, InitConfig.InitialResourceState, MaxBlockSize, BackingResource.GetInitReference(), TEXT("Resource Allocator Underlying Buffer"), InitConfig.ResourceFlags)); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, MaxBlockSize, ELLMTracker::Platform, ELLMAllocType::System); #else LLM_SCOPE_BYTAG(D3D12AllocatorUnused); VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProps, GetGPUMask(), InitConfig.InitialResourceState, ED3D12ResourceStateMode::SingleState, InitConfig.InitialResourceState, MaxBlockSize, BackingResource.GetInitReference(), TEXT("Resource Allocator Underlying Buffer"), InitConfig.ResourceFlags)); #endif #if UE_MEMORY_TRACE_ENABLED MemoryTrace_MarkAllocAsHeap(BackingResource->GetGPUVirtualAddress(), TraceHeapId); #endif } if (IsCPUAccessible(InitConfig.HeapType)) { BackingResource->Map(); } } } void FD3D12BuddyAllocator::Destroy() { ReleaseAllResources(); } uint32 FD3D12BuddyAllocator::AllocateBlock(uint32 order) { uint32 offset; if (order > MaxOrder) { // Can't allocate a block that large check(false); // Crash to avoid infinite recursivity UE_LOG(LogD3D12RHI, Fatal, TEXT("Buddy Allocator cant allocate a block that large (order %d)"), order); } if (FreeBlocks[order].Num() == 0) { // No free nodes in the requested pool. Try to find a higher-order block and split it. uint32 left = AllocateBlock(order + 1); uint32 size = OrderToUnitSize(order); uint32 right = left + size; FreeBlocks[order].Add(right); // Add the right block to the free pool offset = left; // Return the left block } else { TSet::TConstIterator it(FreeBlocks[order]); offset = *it; // Remove the block from the free list FreeBlocks[order].Remove(*it); } return offset; } void FD3D12BuddyAllocator::DeallocateBlock(uint32 offset, uint32 order) { // See if the buddy block is free uint32 size = OrderToUnitSize(order); uint32 buddy = GetBuddyOffset(offset, size); uint32* it = FreeBlocks[order].Find(buddy); if (it != nullptr) { // Deallocate merged blocks DeallocateBlock(FMath::Min(offset, buddy), order + 1); // Remove the buddy from the free list FreeBlocks[order].Remove(*it); } else { // Add the block to the free list FreeBlocks[order].Add(offset); } } void FD3D12BuddyAllocator::Allocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation) { FScopeLock Lock(&CS); if (Initialized == false) { Initialize(); Initialized = true; } uint32 SizeToAllocate = SizeInBytes; // If the alignment doesn't match the block size if (Alignment != 0 && MinBlockSize % Alignment != 0) { SizeToAllocate = SizeInBytes + Alignment; } // Work out what size block is needed and allocate one const uint32 UnitSize = SizeToUnitSize(SizeToAllocate); const uint32 Order = UnitSizeToOrder(UnitSize); const uint32 Offset = AllocateBlock(Order); // This is the offset in MinBlockSize units const uint32 AllocSize = uint32(OrderToUnitSize(Order) * MinBlockSize); const uint32 AllocationBlockOffset = uint32(Offset * MinBlockSize); uint32 Padding = 0; if (Alignment != 0 && AllocationBlockOffset % Alignment != 0) { uint32 AlignedBlockOffset = AlignArbitrary(AllocationBlockOffset, Alignment); Padding = AlignedBlockOffset - AllocationBlockOffset; check((Padding + SizeInBytes) <= AllocSize) } INCREASE_ALLOC_COUNTER(SpaceAlignedUsed, AllocSize); INCREASE_ALLOC_COUNTER(SpaceActualUsed, SizeInBytes); #if PLATFORM_WINDOWS // Decrease only texture size so wasted amount stays in D3D12AllocatorUnused LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(SizeInBytes), ELLMTracker::Platform, ELLMAllocType::System); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorWasted, int64(AllocSize - SizeInBytes), ELLMTracker::Platform, ELLMAllocType::System); #endif TotalSizeUsed += AllocSize; #if D3D12RHI_TRACK_DETAILED_STATS if (SpaceActualUsed > PeakUsage) { PeakUsage = SpaceActualUsed; } #endif const uint32 AlignedOffsetFromResourceBase = AllocationBlockOffset + Padding; check((AlignedOffsetFromResourceBase % Alignment) == 0); // Setup the info that this allocator FD3D12BuddyAllocatorPrivateData& PrivateData = ResourceLocation.GetBuddyAllocatorPrivateData(); PrivateData.Order = Order; PrivateData.Offset = Offset; ResourceLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation); ResourceLocation.SetAllocator((FD3D12BaseAllocatorType*)this); ResourceLocation.SetSize(SizeInBytes); if (AllocationStrategy == EResourceAllocationStrategy::kManualSubAllocation) { ResourceLocation.SetOffsetFromBaseOfResource(AlignedOffsetFromResourceBase); ResourceLocation.SetResource(BackingResource); ResourceLocation.SetGPUVirtualAddress(BackingResource->GetGPUVirtualAddress() + AlignedOffsetFromResourceBase); if (IsCPUAccessible(InitConfig.HeapType)) { ResourceLocation.SetMappedBaseAddress((uint8*)BackingResource->GetResourceBaseAddress() + AlignedOffsetFromResourceBase); } } else { // Place resources are intialized elsewhere } // track the allocation #if !PLATFORM_WINDOWS { LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(AllocSize), ELLMTracker::Default, ELLMAllocType::System); } LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelAlloc(ELLMTracker::Default, ResourceLocation.GetAddressForLLMTracking(), AllocSize)); // Note: Disabling this LLM hook for Windows is due to a work-around in the way that d3d12 buffers are tracked // by LLM. LLM tracks buffer data in the UpdateBufferStats function because that is the easiest place to ensure that LLM // can be updated whenever a buffer is created or released. Unfortunately, some buffers allocate from this allocator // which means that the memory would be counted twice. Because of this the tracking had to be disabled here. // This does mean that non-buffer memory that goes through this allocator won't be tracked, so this does need a better solution. // see UpdateBufferStats for a more detailed explanation. #endif } bool FD3D12BuddyAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation) { FScopeLock Lock(&CS); if (CanAllocate(SizeInBytes, Alignment)) { Allocate(SizeInBytes, Alignment, ResourceLocation); return true; } else { INCREASE_ALLOC_COUNTER(FailedAllocationSpace, SizeInBytes); return false; } } void FD3D12BuddyAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation) { check(IsOwner(ResourceLocation)); // Blocks are cleaned up async so need a lock FScopeLock Lock(&CS); FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); RetiredBlock& Block = DeferredDeletionQueue.Emplace_GetRef(); Block.FrameFence = FrameFence.GetNextFenceToSignal(); FD3D12BuddyAllocatorPrivateData& PrivateData = ResourceLocation.GetBuddyAllocatorPrivateData(); Block.Data.Order = PrivateData.Order; Block.Data.Offset = PrivateData.Offset; Block.AllocationSize = ResourceLocation.GetSize(); // update the last used framce fence used during garbage collection LastUsedFrameFence = FMath::Max(LastUsedFrameFence, Block.FrameFence); if (ResourceLocation.GetResource()->IsPlacedResource()) { Block.PlacedResource = ResourceLocation.GetResource(); } INCREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1); // track the allocation #if !PLATFORM_WINDOWS // Note: Disabling this LLM hook for Windows is due to a work-around in the way that d3d12 buffers are tracked // by LLM. LLM tracks buffer data in the UpdateBufferStats function because that is the easiest place to ensure that LLM // can be updated whenever a buffer is created or released. Unfortunately, some buffers allocate from this allocator // which means that the memory would be counted twice. Because of this the tracking had to be disabled here. // This does mean that non-buffer memory that goes through this allocator won't be tracked, so this does need a better solution. // see UpdateBufferStats for a more detailed explanation. LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelFree(ELLMTracker::Default, ResourceLocation.GetAddressForLLMTracking())); { const uint32 AllocSize = uint32(OrderToUnitSize(Block.Data.Order) * MinBlockSize); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, int64(AllocSize), ELLMTracker::Default, ELLMAllocType::System); } #endif } void FD3D12BuddyAllocator::DeallocateInternal(RetiredBlock& Block) { DeallocateBlock(Block.Data.Offset, Block.Data.Order); const uint32 Size = uint32(OrderToUnitSize(Block.Data.Order) * MinBlockSize); DECREASE_ALLOC_COUNTER(SpaceAlignedUsed, Size); DECREASE_ALLOC_COUNTER(SpaceActualUsed, Block.AllocationSize); #if PLATFORM_WINDOWS LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, int64(Block.AllocationSize), ELLMTracker::Platform, ELLMAllocType::System); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorWasted, 0 - int64(Size - Block.AllocationSize), ELLMTracker::Platform, ELLMAllocType::System); #endif TotalSizeUsed -= Size; if (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) { // Release the resource check(Block.PlacedResource != nullptr); Block.PlacedResource->Release(); Block.PlacedResource = nullptr; } }; void FD3D12BuddyAllocator::CleanUpAllocations() { FScopeLock Lock(&CS); FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); uint32 PopCount = 0; for (int32 i = 0; i < DeferredDeletionQueue.Num(); i++) { RetiredBlock& Block = DeferredDeletionQueue[i]; if (FrameFence.IsFenceComplete(Block.FrameFence, /* bUpdateCachedFenceValue */ false)) { DeallocateInternal(Block); DECREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1); PopCount = i + 1; } else { break; } } if (PopCount) { // clear out all of the released blocks, don't allow the array to shrink DeferredDeletionQueue.RemoveAt(0, PopCount, EAllowShrinking::No); } } void FD3D12BuddyAllocator::ReleaseAllResources() { #if PLATFORM_WINDOWS LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System); LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(MaxBlockSize), ELLMTracker::Platform, ELLMAllocType::System); #endif #if UE_MEMORY_TRACE_ENABLED if (AllocationStrategy != EResourceAllocationStrategy::kPlacedResource) { // Free memory & heap to match alloc operations D3D12_GPU_VIRTUAL_ADDRESS GPUAddress = BackingResource ? BackingResource->GetGPUVirtualAddress() : 0; if (GPUAddress > 0) { MemoryTrace_UnmarkAllocAsHeap(GPUAddress, TraceHeapId); MemoryTrace_Free(GPUAddress, EMemoryTraceRootHeap::VideoMemory); } } #endif for (RetiredBlock& Block : DeferredDeletionQueue) { DeallocateInternal(Block); DECREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1); } DeferredDeletionQueue.Empty(); if (BackingResource) { ensure(BackingResource->GetRefCount() == 1 || GNumExplicitGPUsForRendering > 1); BackingResource = nullptr; } } void FD3D12BuddyAllocator::DumpAllocatorStats(class FOutputDevice& Ar) { #if defined(UE_BUILD_DEBUG) FBufferedOutputDevice BufferedOutput; { // This is the memory tracked inside individual allocation pools FD3D12DynamicRHI* D3DRHI = FD3D12DynamicRHI::GetD3DRHI(); FName categoryName(&DebugName.GetCharArray()[0]); BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("")); BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("Heap Size | MinBlock Size | Space Used | Peak Usage | Unpooled Allocations | Internal Fragmentation | Blocks in Deferred Delete Queue ")); BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("----------")); uint64 InternalFragmentation = SpaceAlignedUsed - SpaceActualUsed; BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("% 10i % 10i % 16i % 12i % 13i % 8i % 10I"), MaxBlockSize, MinBlockSize, SpaceAlignedUsed, PeakUsage, FailedAllocationSpace, InternalFragmentation, NumBlocksInDeferredDeletionQueue); } BufferedOutput.RedirectTo(Ar); #endif } void FD3D12BuddyAllocator::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount) { #if D3D12RHI_TRACK_DETAILED_STATS IOMemoryAllocated += MaxBlockSize; IOMemoryUsed += SpaceActualUsed; IOMemoryFree += (MaxBlockSize - SpaceAlignedUsed); IOAlignmentWaste += SpaceAlignedUsed - SpaceActualUsed; IOAllocatedPageCount++; if (MaxBlockSize == SpaceAlignedUsed) IOFullPageCount++; #endif } bool FD3D12BuddyAllocator::CanAllocate(uint32 size, uint32 alignment) { if (TotalSizeUsed == MaxBlockSize) { return false; } uint32 sizeToAllocate = size; // If the alignment doesn't match the block size if (alignment != 0 && MinBlockSize % alignment != 0) { sizeToAllocate = size + alignment; } uint32 blockSize = MaxBlockSize; for (int32 i = FreeBlocks.Num() - 1; i >= 0; i--) { if (FreeBlocks[i].Num() && blockSize >= sizeToAllocate) { return true; } // Halve the block size; blockSize = blockSize >> 1; if (blockSize < sizeToAllocate) return false; } return false; } void FD3D12BuddyAllocator::Reset() { // Clear the free blocks collection FreeBlocks.Empty(); // Initialize the pool with a free inner block of max inner block size FreeBlocks.SetNum(MaxOrder + 1); FreeBlocks[MaxOrder].Add((uint32)0); } //----------------------------------------------------------------------------- // Multi-Buddy Allocator //----------------------------------------------------------------------------- FD3D12MultiBuddyAllocator::FD3D12MultiBuddyAllocator(FD3D12Device* ParentDevice, FRHIGPUMask VisibleNodes, const FD3D12ResourceInitConfig& InInitConfig, const FString& Name, EResourceAllocationStrategy InAllocationStrategy, uint32 InMaxAllocationSize, uint32 InDefaultPoolSize, uint32 InMinBlockSize, HeapId InTraceParentHeapId) : FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, InMaxAllocationSize) , AllocationStrategy(InAllocationStrategy) , MinBlockSize(InMinBlockSize) , DefaultPoolSize(InDefaultPoolSize) { #if UE_MEMORY_TRACE_ENABLED TraceHeapId = MemoryTrace_HeapSpec(InTraceParentHeapId, *Name); #endif } FD3D12MultiBuddyAllocator::~FD3D12MultiBuddyAllocator() { Destroy(); } bool FD3D12MultiBuddyAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation) { FScopeLock Lock(&CS); for (int32 i = 0; i < Allocators.Num(); i++) { if (Allocators[i]->TryAllocate(SizeInBytes, Alignment, ResourceLocation)) { return true; } } Allocators.Add(CreateNewAllocator(SizeInBytes)); return Allocators.Last()->TryAllocate(SizeInBytes, Alignment, ResourceLocation); } void FD3D12MultiBuddyAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation) { //The sub-allocators should handle the deallocation check(false); } FD3D12BuddyAllocator* FD3D12MultiBuddyAllocator::CreateNewAllocator(uint32 InMinSizeInBytes) { check(InMinSizeInBytes <= MaximumAllocationSizeForPooling); uint32 AllocationSize = (InMinSizeInBytes > DefaultPoolSize) ? FMath::RoundUpToPowerOfTwo(InMinSizeInBytes) : DefaultPoolSize; return new FD3D12BuddyAllocator(GetParentDevice(), GetVisibilityMask(), InitConfig, DebugName, AllocationStrategy, AllocationSize, AllocationSize, MinBlockSize, TraceHeapId); } void FD3D12MultiBuddyAllocator::Initialize() { Allocators.Add(CreateNewAllocator(DefaultPoolSize)); } void FD3D12MultiBuddyAllocator::Destroy() { ReleaseAllResources(); } void FD3D12MultiBuddyAllocator::CleanUpAllocations(uint64 InFrameLag) { FScopeLock Lock(&CS); for (auto*& Allocator : Allocators) { Allocator->CleanUpAllocations(); } // Trim empty allocators if not used in last n frames FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); const uint64 CompletedFence = FrameFence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true); for (int32 i = (Allocators.Num() - 1); i >= 0; i--) { if (Allocators[i]->IsEmpty() && (Allocators[i]->GetLastUsedFrameFence() + InFrameLag <= CompletedFence)) { Allocators[i]->Destroy(); delete(Allocators[i]); Allocators.RemoveAt(i); } } } void FD3D12MultiBuddyAllocator::DumpAllocatorStats(class FOutputDevice& Ar) { //TODO } void FD3D12MultiBuddyAllocator::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount) { #if D3D12RHI_TRACK_DETAILED_STATS FScopeLock Lock(&CS); for (FD3D12BuddyAllocator* Allocator : Allocators) { Allocator->UpdateMemoryStats(IOMemoryAllocated, IOMemoryUsed, IOMemoryFree, IOAlignmentWaste, IOAllocatedPageCount, IOFullPageCount); } #endif } void FD3D12MultiBuddyAllocator::ReleaseAllResources() { for (int32 i = (Allocators.Num() - 1); i >= 0; i--) { if (Allocators[i]) { Allocators[i]->Destroy(); delete(Allocators[i]); } } Allocators.Empty(); } void FD3D12MultiBuddyAllocator::Reset() { } //----------------------------------------------------------------------------- // Bucket Allocator //----------------------------------------------------------------------------- FD3D12BucketAllocator::FD3D12BucketAllocator(FD3D12Device* ParentDevice, FRHIGPUMask VisibleNodes, const FD3D12ResourceInitConfig& InInitConfig, const FString& Name, uint64 InBlockRetentionFrameCount) : FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, 32 * 1024 * 1024), BlockRetentionFrameCount(InBlockRetentionFrameCount) {} bool FD3D12BucketAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation) { FScopeLock Lock(&CS); FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); // Size cannot be smaller than the requested alignment SizeInBytes = FMath::Max(SizeInBytes, Alignment); uint32 Bucket = BucketFromSize(SizeInBytes, BucketShift); check(Bucket < NumBuckets); uint32 BlockSize = BlockSizeFromBufferSize(SizeInBytes, BucketShift); // If some odd alignment is requested, make sure the block can fulfill it. if (BlockSize % Alignment != 0) { const uint32 AlignedSizeInBytes = SizeInBytes + Alignment; Bucket = BucketFromSize(AlignedSizeInBytes, BucketShift); BlockSize = BlockSizeFromBufferSize(AlignedSizeInBytes, BucketShift); } FD3D12BlockAllocatorPrivateData& Block = ResourceLocation.GetBlockAllocatorPrivateData(); // See if a block is already available in the bucket if (AvailableBlocks[Bucket].Dequeue(Block)) { check(Block.ResourceHeap); } else { // No blocks of the requested size are available so make one FD3D12Resource* Resource = nullptr; void* BaseAddress = nullptr; // Allocate a block check(BlockSize >= SizeInBytes); if (FAILED(Adapter->CreateBuffer(InitConfig.HeapType, GetGPUMask(), GetVisibilityMask(), SizeInBytes < MIN_HEAP_SIZE ? MIN_HEAP_SIZE : SizeInBytes, &Resource, TEXT("BucketAllocator"), InitConfig.ResourceFlags))) { return false; } // Track the resource so we know when to delete it SubAllocatedResources.Add(Resource); if (IsCPUAccessible(InitConfig.HeapType)) { BaseAddress = Resource->Map(); check(BaseAddress); check(BaseAddress == (uint8*)(((uint64)BaseAddress + Alignment - 1) & ~((uint64)Alignment - 1))); } // Init the block we will return Block.BucketIndex = Bucket; Block.Offset = 0; Block.ResourceHeap = Resource; Block.ResourceHeap->AddRef(); // Chop up the rest of the resource into reusable blocks if (BlockSize < MIN_HEAP_SIZE) { // Create additional available blocks that can be sub-allocated from the same resource for (uint32 Offset = BlockSize; Offset <= MIN_HEAP_SIZE - BlockSize; Offset += BlockSize) { FD3D12BlockAllocatorPrivateData NewBlock = {}; NewBlock.BucketIndex = Bucket; NewBlock.Offset = Offset; NewBlock.ResourceHeap = Resource; NewBlock.ResourceHeap->AddRef(); // Add the bucket to the available list AvailableBlocks[Bucket].Enqueue(NewBlock); } } } uint64 AlignedBlockOffset = Block.Offset; if (Alignment != 0 && AlignedBlockOffset % Alignment != 0) { AlignedBlockOffset = AlignArbitrary(AlignedBlockOffset, Alignment); } ResourceLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation); ResourceLocation.SetAllocator((FD3D12BaseAllocatorType*)this); ResourceLocation.SetResource(Block.ResourceHeap); ResourceLocation.SetSize(SizeInBytes); ResourceLocation.SetOffsetFromBaseOfResource(AlignedBlockOffset); ResourceLocation.SetGPUVirtualAddress(Block.ResourceHeap->GetGPUVirtualAddress() + AlignedBlockOffset); if (IsCPUAccessible(InitConfig.HeapType)) { ResourceLocation.SetMappedBaseAddress((void*)((uint64)Block.ResourceHeap->GetResourceBaseAddress() + AlignedBlockOffset)); } // Check that when the offset is aligned that it doesn't go passed the end of the block check(ResourceLocation.GetOffsetFromBaseOfResource() - Block.Offset + SizeInBytes <= BlockSize); return true; } void FD3D12BucketAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation) { FScopeLock Lock(&CS); FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); FD3D12BlockAllocatorPrivateData& Block = ResourceLocation.GetBlockAllocatorPrivateData(); Block.FrameFence = FrameFence.GetNextFenceToSignal(); ExpiredBlocks.Enqueue(Block); } void FD3D12BucketAllocator::Initialize() { } void FD3D12BucketAllocator::Destroy() { ReleaseAllResources(); } void FD3D12BucketAllocator::CleanUpAllocations(uint64 InFrameLag) { FScopeLock Lock(&CS); FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); #if SUB_ALLOCATED_DEFAULT_ALLOCATIONS const static uint32 MinCleanupBucket = FMath::Max(0, BucketFromSize(MIN_HEAP_SIZE, BucketShift) - 4); #else const static uint32 MinCleanupBucket = 0; #endif // Start at bucket 8 since smaller buckets are sub-allocated resources // and would be fragmented by deleting blocks for (uint32 bucket = MinCleanupBucket; bucket < NumBuckets; bucket++) { FD3D12BlockAllocatorPrivateData BlockInQueue = {}; const uint32 RetentionCount = BlockRetentionFrameCount; const auto& Functor = [&FrameFence, RetentionCount](const FD3D12BlockAllocatorPrivateData& Block) { return FrameFence.IsFenceComplete(Block.FrameFence + RetentionCount, /* bUpdateCachedFenceValue */ false); }; while (AvailableBlocks[bucket].Dequeue(BlockInQueue, Functor)) { SAFE_RELEASE(BlockInQueue.ResourceHeap); } } FD3D12BlockAllocatorPrivateData BlockInQueue = {}; const auto& Functor = [&FrameFence](const FD3D12BlockAllocatorPrivateData& Block) { return FrameFence.IsFenceComplete(Block.FrameFence, /* bUpdateCachedFenceValue */ false); }; while (ExpiredBlocks.Dequeue(BlockInQueue, Functor)) { // Add the bucket to the available list AvailableBlocks[BlockInQueue.BucketIndex].Enqueue(BlockInQueue); } } void FD3D12BucketAllocator::DumpAllocatorStats(class FOutputDevice& Ar) { //TODO: } void FD3D12BucketAllocator::ReleaseAllResources() { const static uint32 MinCleanupBucket = 0; // Start at bucket 8 since smaller buckets are sub-allocated resources // and would be fragmented by deleting blocks for (uint32 bucket = MinCleanupBucket; bucket < NumBuckets; bucket++) { FD3D12BlockAllocatorPrivateData Block = {}; while (AvailableBlocks[bucket].Dequeue(Block)) { SAFE_RELEASE(Block.ResourceHeap); } } FD3D12BlockAllocatorPrivateData Block = {}; while (ExpiredBlocks.Dequeue(Block)) { if (Block.BucketIndex >= MinCleanupBucket) //-V547 { SAFE_RELEASE(Block.ResourceHeap); } } for (FD3D12Resource*& Resource : SubAllocatedResources) { Resource->Release(); delete(Resource); } } void FD3D12BucketAllocator::Reset() { } //----------------------------------------------------------------------------- // Dynamic Buffer Allocator //----------------------------------------------------------------------------- FD3D12UploadHeapAllocator::FD3D12UploadHeapAllocator(FD3D12Adapter* InParent, FD3D12Device* InParentDevice, const FString& InName) : FD3D12AdapterChild(InParent) , FD3D12DeviceChild(InParentDevice) , FD3D12MultiNodeGPUObject(InParentDevice->GetGPUMask(), FRHIGPUMask::All()) // Upload memory, thus they can be trivially visibile to all GPUs , TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, *FString(InName + TEXT(" (UploadHeapAllocator)")))) , SmallBlockAllocator( InParentDevice , GetVisibilityMask() , FD3D12ResourceInitConfig::CreateUpload() , TEXT("Small Block Multi Buddy Allocator") , EResourceAllocationStrategy::kManualSubAllocation , GD3D12UploadHeapSmallBlockMaxAllocationSize , GD3D12UploadHeapSmallBlockPoolSize , D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT , TraceHeapId) , BigBlockAllocator( InParentDevice , GetVisibilityMask() , FD3D12ResourceInitConfig::CreateUpload() , TEXT("Big Block Pool Allocator") , EResourceAllocationStrategy::kManualSubAllocation , GD3D12UploadHeapBigBlockPoolSize , D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT , GD3D12UploadHeapBigBlockMaxAllocationSize , FRHIMemoryPool::EFreeListOrder::SortByOffset , false /*defrag*/ , TraceHeapId) , FastConstantPageAllocator( InParentDevice , GetVisibilityMask() , FD3D12ResourceInitConfig::CreateUpload() , TEXT("Fast Constant Page Multi Buddy Allocator") , EResourceAllocationStrategy::kManualSubAllocation , GD3D12FastConstantAllocatorPageSize * 64 , GD3D12UploadHeapSmallBlockPoolSize , GD3D12FastConstantAllocatorPageSize , TraceHeapId) { } void FD3D12UploadHeapAllocator::Destroy() { SmallBlockAllocator.Destroy(); BigBlockAllocator.Destroy(); FastConstantPageAllocator.Destroy(); } void* FD3D12UploadHeapAllocator::AllocUploadResource(uint32 InSize, uint32 InAlignment, FD3D12ResourceLocation& ResourceLocation) { TRACE_CPUPROFILER_EVENT_SCOPE(FD3D12UploadHeapAllocator::AllocUploadResource); check(InSize > 0); ResourceLocation.Clear(); // Fit in small block allocator? if (InSize <= SmallBlockAllocator.GetMaximumAllocationSizeForPooling()) { verify(SmallBlockAllocator.TryAllocate(InSize, InAlignment, ResourceLocation)); } else { FD3D12ScopeLock Lock(&BigBlockCS); // Forward to the big block allocator const D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(InSize, D3D12_RESOURCE_FLAG_NONE); BigBlockAllocator.AllocateResource(GetParentDevice()->GetGPUIndex(), D3D12_HEAP_TYPE_UPLOAD, ResourceDesc, InSize, InAlignment, ED3D12ResourceStateMode::SingleState, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, nullptr, ResourceLocation); ResourceLocation.UnlockPoolData(); } return ResourceLocation.GetMappedBaseAddress(); } void* FD3D12UploadHeapAllocator::AllocFastConstantAllocationPage(uint32 InSize, uint32 InAlignment, FD3D12ResourceLocation& ResourceLocation) { check(InSize > 0); check(InSize <= FastConstantPageAllocator.GetMaximumAllocationSizeForPooling()); ResourceLocation.Clear(); verify(FastConstantPageAllocator.TryAllocate(InSize, InAlignment, ResourceLocation)); return ResourceLocation.GetMappedBaseAddress(); } void FD3D12UploadHeapAllocator::CleanUpAllocations(uint64 InFrameLag) { SmallBlockAllocator.CleanUpAllocations(InFrameLag); { FD3D12ScopeLock Lock(&BigBlockCS); BigBlockAllocator.CleanUpAllocations(InFrameLag); } FastConstantPageAllocator.CleanUpAllocations(InFrameLag); } void FD3D12UploadHeapAllocator::UpdateMemoryStats() { uint32 MemoryAllocated = 0; uint32 MemoryUsed = 0; uint32 FreeMemory = 0; uint32 EndFreeMemory = 0; uint32 AlignmentWaste = 0; uint32 AllocatedPageCount = 0; uint32 FullPageCount = 0; #if D3D12RHI_TRACK_DETAILED_STATS SmallBlockAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount); { FD3D12ScopeLock Lock(&BigBlockCS); BigBlockAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount); } FastConstantPageAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount); #endif SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryAllocated, MemoryAllocated); SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryUsed, MemoryUsed); SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryFree, FreeMemory); SET_MEMORY_STAT(STAT_D3D12UploadPoolAlignmentWaste, AlignmentWaste); SET_DWORD_STAT(STAT_D3D12UploadPoolPageCount, AllocatedPageCount); SET_DWORD_STAT(STAT_D3D12UploadPoolFullPages, FullPageCount); } //----------------------------------------------------------------------------- // Default Buffer Allocator //----------------------------------------------------------------------------- FD3D12ResourceInitConfig FD3D12DefaultBufferPool::GetResourceAllocatorInitConfig(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage) { FD3D12ResourceInitConfig InitConfig; InitConfig.HeapType = InHeapType; InitConfig.ResourceFlags = InResourceFlags; #if D3D12_RHI_RAYTRACING // Setup initial resource state depending on the requested buffer flags if (EnumHasAnyFlags(InBufferUsage, BUF_AccelerationStructure)) { // should only have this flag and no other flags check(InBufferUsage == BUF_AccelerationStructure); InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; } else #endif // D3D12_RHI_RAYTRACING if (InitConfig.HeapType == D3D12_HEAP_TYPE_READBACK) { InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_COPY_DEST; } else if (EnumHasAnyFlags(InBufferUsage, BUF_UnorderedAccess)) { check(InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; } else { InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_GENERIC_READ; } InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; if (EnumHasAnyFlags(InBufferUsage, BUF_DrawIndirect)) { check(InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); InitConfig.HeapFlags |= D3D12RHI_HEAP_FLAG_ALLOW_INDIRECT_BUFFERS; } return InitConfig; } EResourceAllocationStrategy FD3D12DefaultBufferPool::GetResourceAllocationStrategy(D3D12_RESOURCE_FLAGS InResourceFlags, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment) { if (Alignment > kD3D12ManualSubAllocationAlignment) { return EResourceAllocationStrategy::kPlacedResource; } // Does the resource need state tracking and transitions ED3D12ResourceStateMode ResourceStateMode = InResourceStateMode; if (ResourceStateMode == ED3D12ResourceStateMode::Default) { ResourceStateMode = (InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) ? ED3D12ResourceStateMode::MultiState : ED3D12ResourceStateMode::SingleState; } // multi state resource need to placed because each allocation can be in a different state return (ResourceStateMode == ED3D12ResourceStateMode::MultiState) ? EResourceAllocationStrategy::kPlacedResource : EResourceAllocationStrategy::kManualSubAllocation; } //----------------------------------------------------------------------------- // Default Buffer Pool //----------------------------------------------------------------------------- FD3D12DefaultBufferPool::FD3D12DefaultBufferPool(FD3D12Device* InParent, FD3D12MultiBuddyAllocator* InAllocator) : FD3D12DeviceChild(InParent) , FD3D12MultiNodeGPUObject(InAllocator->GetGPUMask(), InAllocator->GetVisibilityMask()) , Allocator(InAllocator) { } bool FD3D12DefaultBufferPool::SupportsAllocation(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment) const { FD3D12ResourceInitConfig InitConfig = GetResourceAllocatorInitConfig(InHeapType, InResourceFlags, InBufferUsage); EResourceAllocationStrategy AllocationStrategy = GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment); return (Allocator->GetInitConfig() == InitConfig && Allocator->GetAllocationStrategy() == AllocationStrategy); } void FD3D12DefaultBufferPool::CleanUpAllocations(uint64 FrameLag) { Allocator->CleanUpAllocations(FrameLag); } // Grab a buffer from the available buffers or create a new buffer if none are available void FD3D12DefaultBufferPool::AllocDefaultResource(D3D12_HEAP_TYPE InHeapType, const D3D12_RESOURCE_DESC& Desc, EBufferUsageFlags InUsage, ED3D12ResourceStateMode InResourceStateMode, D3D12_RESOURCE_STATES InCreateState, uint32 Alignment, const TCHAR* Name, FD3D12ResourceLocation& ResourceLocation) { FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); // If the resource location owns a block, this will deallocate it. ResourceLocation.Clear(); if (Desc.Width == 0) { return; } #if DO_CHECK // Validate the create state if (InHeapType == D3D12_HEAP_TYPE_READBACK) { check(InCreateState == D3D12_RESOURCE_STATE_COPY_DEST); } else if (InHeapType == D3D12_HEAP_TYPE_UPLOAD) { check(InCreateState == D3D12_RESOURCE_STATE_GENERIC_READ); } #if D3D12_RHI_RAYTRACING else if (EnumHasAnyFlags(InUsage, BUF_AccelerationStructure)) { // RayTracing acceleration structures must be created in a particular state and may never transition out of it. check(InResourceStateMode == ED3D12ResourceStateMode::SingleState); check(InCreateState == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE); } #endif // D3D12_RHI_RAYTRACING #endif // DO_CHECK const bool PoolResource = Desc.Width < Allocator->GetMaximumAllocationSizeForPooling()/* && ((Desc.Width % (1024 * 64)) != 0)*/; if (PoolResource) { const bool bPlacedResource = (Allocator->GetAllocationStrategy() == EResourceAllocationStrategy::kPlacedResource); // Ensure we're allocating from the correct pool if (bPlacedResource) { // Writeable resources get separate ID3D12Resource* with their own resource state by using placed resources. Just make sure it's UAV, other flags are free to differ. check((Desc.Flags & Allocator->GetInitConfig().ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) != 0 || InHeapType == D3D12_HEAP_TYPE_READBACK || Alignment > kD3D12ManualSubAllocationAlignment || InResourceStateMode == ED3D12ResourceStateMode::MultiState); } else { // Read-only resources get suballocated from big resources, thus share ID3D12Resource* and resource state with other resources. Ensure it's suballocated from a resource with identical flags. check(Desc.Flags == Allocator->GetInitConfig().ResourceFlags); } if (Allocator->TryAllocate(Desc.Width, Alignment, ResourceLocation)) { if (bPlacedResource) { check(ResourceLocation.GetResource() == nullptr); FD3D12Heap* BackingHeap = ((FD3D12BuddyAllocator*) ResourceLocation.GetAllocator())->GetBackingHeap(); uint64 HeapOffset = ResourceLocation.GetAllocator()->GetAllocationOffsetInBytes(ResourceLocation.GetBuddyAllocatorPrivateData()); FD3D12Resource* NewResource = nullptr; VERIFYD3D12RESULT(Adapter->CreatePlacedResource(Desc, BackingHeap, HeapOffset, InCreateState, ED3D12ResourceStateMode::MultiState, D3D12_RESOURCE_STATE_TBD, nullptr, &NewResource, Name)); ResourceLocation.SetResource(NewResource); } else { // Nothing to do for suballocated resources } // Successfully sub-allocated return; } } // Allocate Standalone // Todo: track stand alone allocations and see how much memory we use by this and how many we have FD3D12Resource* NewResource = nullptr; VERIFYD3D12RESULT(Adapter->CreateBuffer(InHeapType, GetGPUMask(), GetVisibilityMask(), InCreateState, InResourceStateMode, Desc.Width, &NewResource, Name, Desc.Flags)); ResourceLocation.AsStandAlone(NewResource, Desc.Width); } void FD3D12DefaultBufferPool::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOMemoryEndFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount) { Allocator->UpdateMemoryStats(IOMemoryAllocated, IOMemoryUsed, IOMemoryFree, IOAlignmentWaste, IOAllocatedPageCount, IOFullPageCount); } //----------------------------------------------------------------------------- // Default Buffer Allocator //----------------------------------------------------------------------------- FD3D12DefaultBufferAllocator::FD3D12DefaultBufferAllocator(FD3D12Device* InParent, FRHIGPUMask VisibleNodes) : FD3D12DeviceChild(InParent) , FD3D12MultiNodeGPUObject(InParent->GetGPUMask(), VisibleNodes) , TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Default Buffer Allocator"))) { FMemory::Memset(DefaultBufferPools, 0); } FD3D12BufferPool* FD3D12DefaultBufferAllocator::CreateBufferPool(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment) { FD3D12Device* Device = GetParentDevice(); FD3D12ResourceInitConfig InitConfig = FD3D12BufferPool::GetResourceAllocatorInitConfig(InHeapType, InResourceFlags, InBufferUsage); // NNE resources must be in heaps visible on GPU0 only. Required by DirectML. Note that in single GPU mode, GetVisibilityMask() will // be the same as GPU0(), so no extra heap fragmentation occurs in that case. FRHIGPUMask VisibleNodes = EnumHasAnyFlags(InBufferUsage, EBufferUsageFlags::NNE) ? FRHIGPUMask::GPU0() : GetVisibilityMask(); #if USE_BUFFER_POOL_ALLOCATOR const FString Name(L"D3D12 Pool Allocator"); EResourceAllocationStrategy AllocationStrategy = FD3D12PoolAllocator::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment); uint64 PoolSize = InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_DEFAULT_POOL_SIZE : BUFFER_POOL_DEFAULT_POOL_SIZE; uint64 PoolAlignment = (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) ? MIN_PLACED_RESOURCE_SIZE : kD3D12ManualSubAllocationAlignment; uint64 MaxAllocationSize = InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_MAX_ALLOC_SIZE : BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE; FRHIMemoryPool::EFreeListOrder FreeListOrder = FRHIMemoryPool::EFreeListOrder::SortBySize; // Disable defrag if not Default memory bool bDefragEnabled = (InitConfig.HeapType == D3D12_HEAP_TYPE_DEFAULT); #if D3D12_RHI_RAYTRACING // Disable defrag on the RT Acceleration pool - #kenzo_todo // Acceleration structure buffers may be created, but not built immediately. // If CopyRaytracingAccelerationStructure is called on such buffer, the GPU will crash. if (InitConfig.InitialResourceState == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE) { bDefragEnabled = false; // Use custom pool and allocation size for RT structures because they don't defrag and will thus 'waste' more memory PoolSize = BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE; MaxAllocationSize = BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE; } #endif // D3D12_RHI_RAYTRACING FD3D12BufferPool* NewPool = new FD3D12PoolAllocator(Device, VisibleNodes, InitConfig, Name, AllocationStrategy, PoolSize, PoolAlignment, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId); #else // USE_BUFFER_POOL_ALLOCATOR EResourceAllocationStrategy AllocationStrategy = FD3D12DefaultBufferPool::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment); // if placed then 64KB alignment required :( uint32 MinBlockSize = (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) ? MIN_PLACED_RESOURCE_SIZE : 16; const FString Name(L"Default Buffer Multi Buddy Allocator"); FD3D12MultiBuddyAllocator* Allocator = new FD3D12MultiBuddyAllocator(Device, VisibleNodes, InitConfig, Name, AllocationStrategy, InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_MAX_ALLOC_SIZE : DEFAULT_BUFFER_POOL_MAX_ALLOC_SIZE, InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_DEFAULT_POOL_SIZE : DEFAULT_BUFFER_POOL_DEFAULT_POOL_SIZE, MinBlockSize, TraceHeapId ); FD3D12DefaultBufferPool* NewPool = new FD3D12DefaultBufferPool(Device, Allocator); #endif // USE_BUFFER_POOL_ALLOCATOR DefaultBufferPools.Add(NewPool); return NewPool; } bool FD3D12DefaultBufferAllocator::IsPlacedResource(D3D12_RESOURCE_FLAGS InResourceFlags, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment) { EResourceAllocationStrategy AllocationStrategy = FD3D12BufferPool::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment); return (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource); } D3D12_RESOURCE_STATES FD3D12DefaultBufferAllocator::GetDefaultInitialResourceState(D3D12_HEAP_TYPE InHeapType, EBufferUsageFlags InBufferFlags, ED3D12ResourceStateMode InResourceStateMode) { // Validate the create state if (InHeapType == D3D12_HEAP_TYPE_READBACK) { return D3D12_RESOURCE_STATE_COPY_DEST; } else if (InHeapType == D3D12_HEAP_TYPE_UPLOAD) { return D3D12_RESOURCE_STATE_GENERIC_READ; } else if (InBufferFlags == BUF_UnorderedAccess && InResourceStateMode == ED3D12ResourceStateMode::SingleState) { check(InHeapType == D3D12_HEAP_TYPE_DEFAULT); return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; } #if D3D12_RHI_RAYTRACING else if (EnumHasAnyFlags(InBufferFlags, BUF_AccelerationStructure)) { check(InHeapType == D3D12_HEAP_TYPE_DEFAULT); return D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; } #endif // D3D12_RHI_RAYTRACING else { return D3D12_RESOURCE_STATE_GENERIC_READ; } } // Grab a buffer from the available buffers or create a new buffer if none are available void FD3D12DefaultBufferAllocator::AllocDefaultResource(D3D12_HEAP_TYPE InHeapType, const D3D12_RESOURCE_DESC& InResourceDesc, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, D3D12_RESOURCE_STATES InCreateState, FD3D12ResourceLocation& ResourceLocation, uint32 Alignment, const TCHAR* Name) { FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); // Force indirect args to stand alone allocations instead of pooled if (!GD3D12AllowPoolAllocateIndirectArgBuffers && EnumHasAnyFlags(InBufferUsage, BUF_DrawIndirect)) { ResourceLocation.Clear(); // NNE resources must be in heaps visible on GPU0 only. Required by DirectML. FD3D12Resource* NewResource = nullptr; const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InHeapType, GetGPUMask().GetNative(), EnumHasAnyFlags(InBufferUsage, EBufferUsageFlags::NNE) ? FRHIGPUMask::GPU0().GetNative() : GetVisibilityMask().GetNative()); D3D12_RESOURCE_DESC Desc = InResourceDesc; Desc.Alignment = 0; VERIFYD3D12RESULT(Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InCreateState, InResourceStateMode, InCreateState, nullptr, &NewResource, Name, false)); ResourceLocation.AsStandAlone(NewResource, InResourceDesc.Width); return; } if (EnumHasAnyFlags(InBufferUsage, BUF_ReservedResource)) { ResourceLocation.Clear(); FD3D12Resource* NewResource = nullptr; checkf(Alignment % GRHIGlobals.ReservedResources.TileSizeInBytes == 0, TEXT("Reserved buffer alignment is expected to be a multiple of the reserved resource tile size")); FD3D12ResourceDesc Desc = InResourceDesc; Desc.Alignment = Alignment; Desc.bReservedResource = true; VERIFYD3D12RESULT(Adapter->CreateReservedResource(Desc, GetGPUMask(), InCreateState, InResourceStateMode, InCreateState, nullptr, &NewResource, Name, false)); ResourceLocation.AsStandAlone(NewResource, InResourceDesc.Width); return; } FScopeLock Lock(&CS); // Patch out deny shader resource because it doesn't add anything for buffers and allows more pool sharing // TODO: check if this is different on Xbox? D3D12_RESOURCE_DESC ResourceDesc = InResourceDesc; ResourceDesc.Flags = ResourceDesc.Flags & (~D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); // Do we already have a default pool which support this allocation? FD3D12BufferPool* BufferPool = nullptr; for (FD3D12BufferPool* Pool : DefaultBufferPools) { if (Pool->SupportsAllocation(InHeapType, ResourceDesc.Flags, InBufferUsage, InResourceStateMode, Alignment)) { BufferPool = Pool; break; } } // No pool yet, then create one if (BufferPool == nullptr) { BufferPool = CreateBufferPool(InHeapType, ResourceDesc.Flags, InBufferUsage, InResourceStateMode, Alignment); } // Perform actual allocation BufferPool->AllocDefaultResource(InHeapType, ResourceDesc, InBufferUsage, InResourceStateMode, InCreateState, Alignment, Name, ResourceLocation); } void FD3D12DefaultBufferAllocator::FreeDefaultBufferPools() { FScopeLock Lock(&CS); for (FD3D12BufferPool*& DefaultBufferPool : DefaultBufferPools) { if (DefaultBufferPool) { // No frame lag, delete all unused pages immediatly DefaultBufferPool->CleanUpAllocations(0); delete DefaultBufferPool; DefaultBufferPool = nullptr; } } } void FD3D12DefaultBufferAllocator::BeginFrame(FD3D12ContextArray const& Contexts) { #if USE_BUFFER_POOL_ALLOCATOR FScopeLock Lock(&CS); if (GD3D12VRAMBufferPoolDefrag > 0 && GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame > 0) { TRACE_CPUPROFILER_EVENT_SCOPE(DefragBufferPool); uint32 MaxCopySize = GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame; uint32 CopySize = 0; for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools) { if (DefaultBufferPool) { DefaultBufferPool->Defrag(Contexts, MaxCopySize, CopySize); // break when we reach the max copy size if (CopySize >= MaxCopySize) { break; } } } } { // @todo dev-pr - explicit use of graphics context - nothing is synchronizing async compute - needs refactor FD3D12CommandContext& CommandContext = *Contexts[ERHIPipeline::Graphics]->GetSingleDeviceContext(GetParentDevice()->GetGPUIndex()); TRACE_CPUPROFILER_EVENT_SCOPE(FlushPendingBufferCopyOps); RHI_BREADCRUMB_EVENT(CommandContext, "BufferPoolCopyOps"); for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools) { if (DefaultBufferPool) { DefaultBufferPool->FlushPendingCopyOps(CommandContext); } } } #endif // USE_BUFFER_POOL_ALLOCATOR } void FD3D12DefaultBufferAllocator::CleanupFreeBlocks(uint64 InFrameLag) { FScopeLock Lock(&CS); for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools) { if (DefaultBufferPool) { DefaultBufferPool->CleanUpAllocations(InFrameLag); } } } void FD3D12DefaultBufferAllocator::UpdateMemoryStats() { FScopeLock Lock(&CS); uint32 MemoryAllocated = 0; uint32 MemoryUsed = 0; uint32 FreeMemory = 0; uint32 EndFreeMemory = 0; uint32 AlignmentWaste = 0; uint32 AllocatedPageCount = 0; uint32 FullPageCount = 0; #if D3D12RHI_TRACK_DETAILED_STATS for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools) { if (DefaultBufferPool) { DefaultBufferPool->UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount); } } #endif //check((MemoryUsed + AlignmentWaste + FreeMemory) == MemoryAllocated); // compute fragmentation percentage stats: uint32 Fragmentation = FreeMemory - EndFreeMemory; uint32 FragmentationUsed = MemoryUsed + AlignmentWaste + Fragmentation; float FragmentationPercentage = FragmentationUsed == 0 ? 0.0 : float(Fragmentation) / float(FragmentationUsed); SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryAllocated, MemoryAllocated); SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryUsed, MemoryUsed); SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryFree, FreeMemory); SET_MEMORY_STAT(STAT_D3D12BufferPoolAlignmentWaste, AlignmentWaste); SET_DWORD_STAT(STAT_D3D12BufferPoolPageCount, AllocatedPageCount); SET_DWORD_STAT(STAT_D3D12BufferPoolFullPages, FullPageCount); SET_MEMORY_STAT(STAT_D3D12BufferPoolFragmentation, Fragmentation); SET_FLOAT_STAT(STAT_D3D12BufferPoolFragmentationPercentage, FragmentationPercentage); } //----------------------------------------------------------------------------- // Texture Allocator //----------------------------------------------------------------------------- #if USE_TEXTURE_POOL_ALLOCATOR FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) : FD3D12DeviceChild(Device), FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode), TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Texture Allocator Pool"))) { FD3D12ResourceInitConfig SharedInitConfig; // texture only interesting in VRAM for now SharedInitConfig.HeapType = D3D12_HEAP_TYPE_DEFAULT; // unused for textures because placed and not suballocated SharedInitConfig.ResourceFlags = D3D12_RESOURCE_FLAG_NONE; SharedInitConfig.InitialResourceState = D3D12_RESOURCE_STATE_COMMON; EResourceAllocationStrategy AllocationStrategy = EResourceAllocationStrategy::kPlacedResource; FRHIMemoryPool::EFreeListOrder FreeListOrder = FRHIMemoryPool::EFreeListOrder::SortBySize; { FD3D12ResourceInitConfig InitConfig = SharedInitConfig; InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; const FString Name(L"D3D12 ReadOnly4K Texture Pool Allocator"); uint64 PoolSize = 4 * 1024 * 1024; uint64 MaxAllocationSize = PoolSize; bool bDefragEnabled = false; // Disable defrag on 4K pool because it shouldn't really fragment - all allocations are 4K or multiple of 4K and pretty small FD3D12PoolAllocator* ReadOnly4KPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId); ReadOnly4KPool->Initialize(); PoolAllocators[(int)EPoolType::ReadOnly4K] = ReadOnly4KPool; } { FD3D12ResourceInitConfig InitConfig = SharedInitConfig; InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; const FString Name(L"D3D12 ReadOnly Texture Pool Allocator"); uint64 PoolSize = GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize; uint64 MaxAllocationSize = GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize; bool bDefragEnabled = true; FD3D12PoolAllocator* ReadOnlyPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId); ReadOnlyPool->Initialize(); PoolAllocators[(int)EPoolType::ReadOnly] = ReadOnlyPool; } { FD3D12ResourceInitConfig InitConfig = SharedInitConfig; InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; const FString Name(L"D3D12 RT Texture Pool Allocator"); uint64 PoolSize = GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize; uint64 MaxAllocationSize = GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize; // FD3D12ResourceLocation::OnAllocationMoved doesn't correctly retrieve the clear value when recreating moved resources, so we need to disable defrag for this pool for the time being. bool bDefragEnabled = false; FD3D12PoolAllocator* RTPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId); RTPool->Initialize(); PoolAllocators[(int)EPoolType::RenderTarget] = RTPool; } { FD3D12ResourceInitConfig InitConfig = SharedInitConfig; InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; const FString Name(L"D3D12 UAV Texture Pool Allocator"); uint64 PoolSize = GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize; uint64 MaxAllocationSize = GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize; // Defrag doesn't correctly handle resources which need the BCn/UINT UAV aliasing workaround, so we'll turn off defrag for this heap for now. bool bDefragEnabled = false; FD3D12PoolAllocator* UAVPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId); UAVPool->Initialize(); PoolAllocators[(int)EPoolType::UAV] = UAVPool; } } void FD3D12TextureAllocatorPool::Destroy() { for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex) { PoolAllocators[PoolIndex]->CleanUpAllocations(0); delete PoolAllocators[PoolIndex]; PoolAllocators[PoolIndex] = nullptr; } } HRESULT FD3D12TextureAllocatorPool::AllocateTexture( FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, EPixelFormat UEFormat, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name) { // The top mip level must be less than 64 KB to use 4 KB alignment bool b4KAligment = FD3D12Texture::CanBe4KAligned(Desc, (EPixelFormat)UEFormat); Desc.Alignment = b4KAligment ? D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT : (Desc.SampleDesc.Count > 1 ? D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); const D3D12_RESOURCE_ALLOCATION_INFO Info = GetParentDevice()->GetResourceAllocationInfoUncached(Desc); const bool bIsRenderTarget = EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); const bool bIsReadOnly = !bIsRenderTarget && !EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) && !Desc.NeedsUAVAliasWorkarounds(); ED3D12ResourceStateMode ResourceStateMode = bIsReadOnly ? ED3D12ResourceStateMode::Default : ED3D12ResourceStateMode::MultiState; EPoolType PoolType = bIsReadOnly ? (b4KAligment ? EPoolType::ReadOnly4K : EPoolType::ReadOnly) : (bIsRenderTarget ? EPoolType::RenderTarget : EPoolType::UAV); PoolAllocators[(int)PoolType]->AllocateResource(GetParentDevice()->GetGPUIndex(), D3D12_HEAP_TYPE_DEFAULT, Desc, Info.SizeInBytes, Info.Alignment, ResourceStateMode, InitialState, ClearValue, Name, TextureLocation); return S_OK; } void FD3D12TextureAllocatorPool::BeginFrame(FD3D12ContextArray const& Contexts) { if (GD3D12VRAMTexturePoolDefrag > 0 && GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame > 0) { TRACE_CPUPROFILER_EVENT_SCOPE(DefragTexturePool); uint32 MaxCopySize = GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame; uint32 CopySize = 0; for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex) { PoolAllocators[PoolIndex]->Defrag(Contexts, MaxCopySize, CopySize); } } { // @todo dev-pr - explicit use of graphics context - nothing is synchronizing async compute - needs refactor FD3D12CommandContext& CommandContext = *Contexts[ERHIPipeline::Graphics]->GetSingleDeviceContext(GetParentDevice()->GetGPUIndex()); TRACE_CPUPROFILER_EVENT_SCOPE(FlushPendingTextureCopyOps); RHI_BREADCRUMB_EVENT(CommandContext, "TexturePoolCopyOps"); for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex) { PoolAllocators[PoolIndex]->FlushPendingCopyOps(CommandContext); } } } void FD3D12TextureAllocatorPool::CleanUpAllocations() { for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex) { PoolAllocators[PoolIndex]->CleanUpAllocations(20); } } bool FD3D12TextureAllocatorPool::GetMemoryStats(uint64& OutTotalAllocated, uint64& OutTotalUnused) const { uint32 MemoryAllocated = 0; uint32 MemoryUsed = 0; uint32 FreeMemory = 0; uint32 EndFreeMemory = 0; uint32 AlignmentWaste = 0; uint32 AllocatedPageCount = 0; uint32 FullPageCount = 0; for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex) { PoolAllocators[PoolIndex]->UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount); } OutTotalAllocated = MemoryAllocated; OutTotalUnused = FreeMemory; return true; } #elif D3D12RHI_SEGREGATED_TEXTURE_ALLOC FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) : FD3D12DeviceChild(Device), FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode), ReadOnlyTexturePool( Device, VisibilityNode, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES, GD3D12ReadOnlyTextureAllocatorMinPoolSize, GD3D12ReadOnlyTextureAllocatorMinNumToPool, GD3D12ReadOnlyTextureAllocatorMaxPoolSize) { } HRESULT FD3D12TextureAllocatorPool::AllocateTexture( FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, EPixelFormat UEFormat, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name) { HRESULT RetCode = S_OK; FD3D12Resource* NewResource = nullptr; FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); TextureLocation.Clear(); if (!EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET|D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL|D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) && !Desc.NeedsUAVAliasWorkarounds() && // 4K align with NV12 causes a crash on HoloLens 2. Desc.Format != DXGI_FORMAT_NV12 && Desc.SampleDesc.Count == 1) { // The top mip level must be less than 64 KB to use 4 KB alignment Desc.Alignment = FD3D12Texture::CanBe4KAligned(Desc, UEFormat) ? D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; const D3D12_RESOURCE_ALLOCATION_INFO Info = Device->GetResourceAllocationInfoUncached(Desc); TRefCountPtr BackingHeap; const uint32 Offset = ReadOnlyTexturePool.Allocate(Info.SizeInBytes, Info.Alignment, BackingHeap); if (Offset != FD3D12SegListAllocator::InvalidOffset) { RetCode = Adapter->CreatePlacedResource(Desc, BackingHeap.GetReference(), Offset, InitialState, ClearValue, &NewResource, Name, false); if (SUCCEEDED(RetCode)) { FD3D12SegListAllocatorPrivateData& PrivateData = TextureLocation.GetSegListAllocatorPrivateData(); PrivateData.Offset = Offset; TextureLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation); TextureLocation.SetSegListAllocator(&ReadOnlyTexturePool); TextureLocation.SetSize(Info.SizeInBytes); TextureLocation.SetOffsetFromBaseOfResource(Offset); TextureLocation.SetResource(NewResource); INC_DWORD_STAT(STAT_D3D12TextureAllocatorCount); } return RetCode; } } const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), GetVisibilityMask().GetNative()); Desc.Alignment = 0; VERIFYD3D12RESULT(RetCode = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &NewResource, Name, false)); TextureLocation.AsStandAlone(NewResource); return RetCode; } #else FD3D12TextureAllocator::FD3D12TextureAllocator(FD3D12Device* Device, FRHIGPUMask VisibleNodes, const FString& Name, uint32 HeapSize, D3D12_HEAP_FLAGS Flags, HeapId InTraceParentHeapId) : FD3D12MultiBuddyAllocator(Device, VisibleNodes, FD3D12ResourceInitConfig { D3D12_HEAP_TYPE_DEFAULT, Flags | D3D12_HEAP_FLAG_DENY_BUFFERS, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_GENERIC_READ }, Name, EResourceAllocationStrategy::kPlacedResource, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, HeapSize, D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT, InTraceParentHeapId) { } FD3D12TextureAllocator::~FD3D12TextureAllocator() { } HRESULT FD3D12TextureAllocator::AllocateTexture(FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name) { FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); HRESULT hr = S_OK; FD3D12Resource* NewResource = nullptr; TextureLocation.Clear(); D3D12_RESOURCE_ALLOCATION_INFO Info = Device->GetResourceAllocationInfoUncached(Desc); if (Info.SizeInBytes < D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT) { if (TryAllocate(Info.SizeInBytes, Info.Alignment, TextureLocation)) { FD3D12Heap* BackingHeap = ((FD3D12BuddyAllocator*)TextureLocation.GetAllocator())->GetBackingHeap(); uint64 HeapOffset = TextureLocation.GetAllocator()->GetAllocationOffsetInBytes(TextureLocation.GetBuddyAllocatorPrivateData()); hr = Adapter->CreatePlacedResource(Desc, BackingHeap, HeapOffset, InitialState, ClearValue, &NewResource, Name, false); TextureLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation); TextureLocation.SetResource(NewResource); return hr; } } // Request default alignment for stand alone textures Desc.Alignment = 0; const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), (uint32)GetVisibilityMask().GetNative()); hr = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &NewResource, Name, false); TextureLocation.AsStandAlone(NewResource, Info.SizeInBytes); return hr; } FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) : FD3D12DeviceChild(Device), FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode), TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Texture Allocator Pool"))), ReadOnlyTexturePool(Device, VisibilityNode, FString(L"Small Non-DS/RT Texture allocator"), TEXTURE_POOL_SIZE, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES, TraceHeapId) {}; HRESULT FD3D12TextureAllocatorPool::AllocateTexture(FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, EPixelFormat UEFormat, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name) { if (FD3D12Texture::CanBe4KAligned(Desc, (EPixelFormat)UEFormat)) { Desc.Alignment = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; // request 4k alignment return ReadOnlyTexturePool.AllocateTexture(Desc, ClearValue, TextureLocation, InitialState, Name); } FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12Resource* Resource = nullptr; const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), GetVisibilityMask().GetNative()); const D3D12_RESOURCE_ALLOCATION_INFO Info = GetParentDevice()->GetResourceAllocationInfoUncached(Desc); HRESULT hr = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &Resource, Name, false); if (SUCCEEDED(hr)) { TextureLocation.AsStandAlone(Resource, Info.SizeInBytes); } return hr; } #endif //----------------------------------------------------------------------------- // Fast Allocation //----------------------------------------------------------------------------- FD3D12FastAllocator::FD3D12FastAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, D3D12_HEAP_TYPE InHeapType, uint32 PageSize) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask) , PagePool(Parent, VisibiltyMask, InHeapType, PageSize) , CurrentAllocatorPage(nullptr) {} FD3D12FastAllocator::FD3D12FastAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, const D3D12_HEAP_PROPERTIES& InHeapProperties, uint32 PageSize) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask) , PagePool(Parent, VisibiltyMask, InHeapProperties, PageSize) , CurrentAllocatorPage(nullptr) {} void* FD3D12FastAllocator::Allocate(uint32 Size, uint32 Alignment, class FD3D12ResourceLocation* ResourceLocation) { // Check to make sure our assumption that we don't need a ResourceLocation->Clear() here is valid. checkf(!ResourceLocation->IsValid(), TEXT("The supplied resource location already has a valid resource. You should Clear() it first or it may leak.")); if (Size > PagePool.GetPageSize()) { FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); // If upload memory then fallback to the shader upload heap allocator which support dynamic sized allocation of bigger sizes if (PagePool.GetHeapType() == D3D12_HEAP_TYPE_UPLOAD) { return Adapter->GetUploadHeapAllocator(GetGPUMask().ToIndex()).AllocUploadResource(Size, Alignment, *ResourceLocation); } //Allocations are 64k aligned if (Alignment) { Alignment = (D3D_BUFFER_ALIGNMENT % Alignment) == 0 ? 0 : Alignment; } FD3D12Resource* Resource = nullptr; FString ResourceName; #if NAME_OBJECTS static int64 ID = 0; const int64 UniqueID = FPlatformAtomics::InterlockedIncrement(&ID); ResourceName = FString::Printf(TEXT("Stand Alone Fast Allocation %lld"), UniqueID); #endif VERIFYD3D12RESULT(Adapter->CreateBuffer(PagePool.GetHeapType(), GetGPUMask(), GetVisibilityMask(), Size + Alignment, &Resource, *ResourceName)); ResourceLocation->AsStandAlone(Resource, Size + Alignment); return PagePool.IsCPUWritable() ? Resource->GetResourceBaseAddress() : nullptr; } else { FD3D12ScopeLock Lock(&CS); const uint32 Offset = (CurrentAllocatorPage) ? CurrentAllocatorPage->NextFastAllocOffset : 0; uint32 CurrentOffset = AlignArbitrary(Offset, Alignment); // See if there is room in the current pool if (CurrentAllocatorPage == nullptr || PagePool.GetPageSize() < CurrentOffset + Size) { if (CurrentAllocatorPage) { PagePool.ReturnFastAllocatorPage(CurrentAllocatorPage); } CurrentAllocatorPage = PagePool.RequestFastAllocatorPage(); CurrentOffset = AlignArbitrary(CurrentAllocatorPage->NextFastAllocOffset, Alignment); } check(PagePool.GetPageSize() - Size >= CurrentOffset); // Create a FD3D12ResourceLocation representing a sub-section of the pool resource ResourceLocation->AsFastAllocation(CurrentAllocatorPage->FastAllocBuffer.GetReference(), Size, CurrentAllocatorPage->FastAllocBuffer->GetGPUVirtualAddress(), CurrentAllocatorPage->FastAllocData, 0, CurrentOffset); CurrentAllocatorPage->NextFastAllocOffset = CurrentOffset + Size; CurrentAllocatorPage->UpdateFence(); check(ResourceLocation->GetMappedBaseAddress()); return ResourceLocation->GetMappedBaseAddress(); } } void FD3D12FastAllocator::CleanupPages(uint64 FrameLag) { FD3D12ScopeLock Lock(&CS); PagePool.CleanupPages(FrameLag); } void FD3D12FastAllocator::Destroy() { FD3D12ScopeLock Lock(&CS); if (CurrentAllocatorPage) { PagePool.ReturnFastAllocatorPage(CurrentAllocatorPage); CurrentAllocatorPage = nullptr; } PagePool.Destroy(); } FD3D12FastAllocatorPagePool::FD3D12FastAllocatorPagePool(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, D3D12_HEAP_TYPE InHeapType, uint32 Size) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask) , PageSize(Size) , HeapProperties(CD3DX12_HEAP_PROPERTIES(InHeapType, Parent->GetGPUMask().GetNative(), VisibiltyMask.GetNative())) {}; FD3D12FastAllocatorPagePool::FD3D12FastAllocatorPagePool(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, const D3D12_HEAP_PROPERTIES& InHeapProperties, uint32 Size) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask) , PageSize(Size) , HeapProperties(InHeapProperties) {}; FD3D12FastAllocatorPage* FD3D12FastAllocatorPagePool::RequestFastAllocatorPage() { FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); FD3D12ManualFence& Fence = Adapter->GetFrameFence(); const uint64 CompletedFence = Fence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true); for (int32 Index = 0; Index < Pool.Num(); Index++) { FD3D12FastAllocatorPage* Page = Pool[Index]; //If the GPU is done with it and no-one has a lock on it if (Page->FastAllocBuffer->GetRefCount() == 1 && Page->FrameFence <= CompletedFence) { Page->Reset(); Pool.RemoveAt(Index); return Page; } } FD3D12FastAllocatorPage* Page = new FD3D12FastAllocatorPage(PageSize); const D3D12_RESOURCE_STATES InitialState = DetermineInitialResourceState(HeapProperties.Type, &HeapProperties); VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProperties, GetGPUMask(), InitialState, ED3D12ResourceStateMode::SingleState, InitialState, PageSize, Page->FastAllocBuffer.GetInitReference(), TEXT("Fast Allocator Page"))); Page->FastAllocBuffer->DoNotDeferDelete(); Page->FastAllocData = Page->FastAllocBuffer->Map(); return Page; } FD3D12FastAllocatorPage::~FD3D12FastAllocatorPage() { #if UE_MEMORY_TRACE_ENABLED // Matches MemoryTrace_Alloc issued from CreateBuffer call inside RequestFastAllocatorPage() function MemoryTrace_Free(FastAllocBuffer->GetGPUVirtualAddress(), EMemoryTraceRootHeap::VideoMemory); #endif } void FD3D12FastAllocatorPage::UpdateFence() { // Fence value must be updated every time the page is used to service an allocation. // Max() is required as fast allocator may be used from Render or RHI thread, // which have different fence values. See FD3D12ManualFence::GetCurrentFence() implementation. FD3D12Adapter* Adapter = FastAllocBuffer->GetParentDevice()->GetParentAdapter(); FrameFence = FMath::Max(FrameFence, Adapter->GetFrameFence().GetNextFenceToSignal()); } void FD3D12FastAllocatorPagePool::ReturnFastAllocatorPage(FD3D12FastAllocatorPage* Page) { // TODO: AFR has been removed, but I don't understand the comment, so I'm leaving it. What did this code have to do with AFR? // Extend the lifetime of these resources when in AFR as other nodes might be relying on this Page->UpdateFence(); Pool.Add(Page); } void FD3D12FastAllocatorPagePool::CleanupPages(uint64 FrameLag) { if (Pool.Num() <= GD3D12FastAllocatorMinPagesToRetain) { return; } FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); const uint64 CompletedFence = FrameFence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true); // Pages get returned to end of list, so we'll look for pages to delete, starting from the LRU for (int32 Index = 0; Index < Pool.Num(); Index++) { FD3D12FastAllocatorPage* Page = Pool[Index]; //If the GPU is done with it and no-one has a lock on it if (Page->FastAllocBuffer->GetRefCount() == 1 && Page->FrameFence + FrameLag <= CompletedFence) { Pool.RemoveAt(Index); delete(Page); // Only release at most one page per frame return; } } } void FD3D12FastAllocatorPagePool::Destroy() { for (int32 i = 0; i < Pool.Num(); i++) { //check(Pool[i]->FastAllocBuffer->GetRefCount() == 1); { FD3D12FastAllocatorPage *Page = Pool[i]; delete(Page); Page = nullptr; } } Pool.Empty(); } FD3D12FastConstantAllocator::FD3D12FastConstantAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask) , UnderlyingResource(Parent) , Offset(GD3D12FastConstantAllocatorPageSize) // Initial offset is at end of page so that first Allocate() call triggers a page allocation , PageSize(GD3D12FastConstantAllocatorPageSize) { check(PageSize % D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT == 0); } void* FD3D12FastConstantAllocator::Allocate(uint32 Bytes, FD3D12ResourceLocation& OutLocation, FD3D12ConstantBufferView* OutCBView) { check(Bytes <= PageSize); const uint32 AlignedSize = Align(Bytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); if (Offset + AlignedSize > PageSize) { Offset = 0; FD3D12Device* Device = GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); FD3D12UploadHeapAllocator& Allocator = Adapter->GetUploadHeapAllocator(Device->GetGPUIndex()); Allocator.AllocFastConstantAllocationPage(PageSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, UnderlyingResource); } OutLocation.AsFastAllocation(UnderlyingResource.GetResource(), AlignedSize, UnderlyingResource.GetGPUVirtualAddress(), UnderlyingResource.GetMappedBaseAddress(), UnderlyingResource.GetOffsetFromBaseOfResource(), // AllocUploadResource returns a suballocated resource where we're suballocating (again) from Offset); if (OutCBView) { OutCBView->CreateView(&UnderlyingResource, Offset, AlignedSize); } Offset += AlignedSize; return OutLocation.GetMappedBaseAddress(); } FD3D12SegHeap* FD3D12SegList::CreateBackingHeap( FD3D12Device* Parent, FRHIGPUMask VisibleNodeMask, D3D12_HEAP_TYPE HeapType, D3D12_HEAP_FLAGS HeapFlags) { // CS can be unlocked at this point and re-locked before adding it to FreeHeaps // but doing so may cause multiple heaps to be created ID3D12Heap* D3DHeap = nullptr; D3D12_HEAP_DESC Desc = {}; Desc.SizeInBytes = HeapSize; Desc.Properties = CD3DX12_HEAP_PROPERTIES(HeapType, Parent->GetGPUMask().GetNative(), VisibleNodeMask.GetNative()); Desc.Flags = HeapFlags; if (Parent->GetParentAdapter()->IsHeapNotZeroedSupported()) { Desc.Flags |= FD3D12_HEAP_FLAG_CREATE_NOT_ZEROED; } VERIFYD3D12RESULT(Parent->GetDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&D3DHeap))); FD3D12SegHeap* Ret = new FD3D12SegHeap(Parent, VisibleNodeMask, D3DHeap, HeapSize, this, FreeHeaps.Num()); FreeHeaps.Add(Ret); return Ret; } FD3D12SegListAllocator::FD3D12SegListAllocator( FD3D12Device* Parent, FRHIGPUMask VisibilityMask, D3D12_HEAP_TYPE InHeapType, D3D12_HEAP_FLAGS InHeapFlags, uint32 InMinPoolSize, uint32 InMinNumToPool, uint32 InMaxPoolSize) : FD3D12DeviceChild(Parent) , FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibilityMask) , HeapType(InHeapType) , HeapFlags(InHeapFlags) , MinPoolSize(InMinPoolSize) , MinNumToPool(InMinNumToPool) , MaxPoolSize(InMaxPoolSize) #if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE , TotalBytesRequested(0) #endif {} void FD3D12SegListAllocator::Deallocate( FD3D12Resource* PlacedResource, uint32 Offset, uint32 SizeInBytes) { FD3D12Device* Device = this->GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); uint64 CurFenceValue = Adapter->GetFrameFence().GetNextFenceToSignal(); { FScopeLock Lock(&DeferredDeletionCS); check(FenceValues.Num() == DeferredDeletionQueue.Num()); check(!FenceValues.Num() || FenceValues.Last() <= CurFenceValue); int32 LastIdx = FenceValues.Num() - 1; if (LastIdx < 0 || FenceValues[LastIdx] != CurFenceValue) { ++LastIdx; FenceValues.Add(CurFenceValue); DeferredDeletionQueue.AddDefaulted(); } new (DeferredDeletionQueue[LastIdx]) FRetiredBlock(PlacedResource, Offset, SizeInBytes); } DEC_DWORD_STAT(STAT_D3D12TextureAllocatorCount); } template void FD3D12SegListAllocator::FreeRetiredBlocks(TArray, AllocY>& PendingDeletes) { for (int32 Y = 0; Y < PendingDeletes.Num(); ++Y) { TArray& RetiredBlocks = PendingDeletes[Y]; for (int32 X = 0; X < RetiredBlocks.Num(); ++X) { FRetiredBlock& Block = RetiredBlocks[X]; if (ensureAlwaysMsgf(Block.PlacedResource->GetRefCount() == 1, TEXT("Invalid refcount while releasing %s"), *Block.PlacedResource->GetName().ToString())) { FD3D12SegHeap* BackingHeap = static_cast(Block.PlacedResource->GetHeap()); Block.PlacedResource->Release(); FD3D12SegList* Owner = BackingHeap->OwnerList; check(!!Owner); Owner->FreeBlock(BackingHeap, Block.Offset); OnFree(Block.Offset, BackingHeap, Block.ResourceSize); } } } } void FD3D12SegListAllocator::CleanUpAllocations() { TArray, TInlineAllocator<1>> PendingDeletes; { int32 NumToRemove = 0; FD3D12Device* Device = this->GetParentDevice(); FD3D12Adapter* Adapter = Device->GetParentAdapter(); FD3D12ManualFence& FrameFence = Adapter->GetFrameFence(); FScopeLock Lock(&DeferredDeletionCS); for (int32 Idx = 0; Idx < DeferredDeletionQueue.Num(); ++Idx) { if (FrameFence.IsFenceComplete(FenceValues[Idx], /* bUpdateCachedFenceValue */ false)) { ++NumToRemove; PendingDeletes.Add(MoveTemp(DeferredDeletionQueue[Idx])); } else { break; } } if (!!NumToRemove) { FenceValues.RemoveAt(0, NumToRemove); DeferredDeletionQueue.RemoveAt(0, NumToRemove); } } FreeRetiredBlocks(PendingDeletes); } void FD3D12SegListAllocator::Destroy() { { FScopeLock Lock(&DeferredDeletionCS); check(FenceValues.Num() == DeferredDeletionQueue.Num()); FreeRetiredBlocks(DeferredDeletionQueue); FenceValues.Empty(); DeferredDeletionQueue.Empty(); VerifyEmpty(); } { FRWScopeLock Lock(SegListsRWLock, SLT_Write); for (auto& Pair : SegLists) { FD3D12SegList*& SegList = Pair.Value; check(!!SegList); delete SegList; SegList = nullptr; } SegLists.Empty(); } } #if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE void FD3D12SegListAllocator::VerifyEmpty() { FScopeLock Lock(&SegListTrackedAllocationCS); if(SegListTrackedAllocations.Num() != 0) { UE_LOG(LogD3D12RHI, Warning, TEXT("Dumping leaked SegListAllocations\n")); for (FD3D12SegListAllocatorLeakTrack& LeakTrack : SegListTrackedAllocations) { DumpStack(LeakTrack); } } ensureMsgf(TotalBytesRequested == 0, TEXT("FD3D12SegListAllocator contains %lld allocated bytes but is expected to be empty. This likely means a memory leak. Use d3d12.SegListTrackLeaks=1 CVar to print allocations to the log."), (uint64)TotalBytesRequested); } void FD3D12SegListAllocator::DumpStack(const FD3D12SegListAllocatorLeakTrack& LeakTrack) { UE_LOG(LogD3D12RHI, Warning, TEXT("Leaking Allocation Heap %p Offset %d\nStack Dump\n"), LeakTrack.Heap, LeakTrack.Offset); for(uint32 Index = 0; Index < LeakTrack.StackDepth; ++Index) { const size_t STRING_SIZE = 16 * 1024; ANSICHAR StackTrace[STRING_SIZE]; StackTrace[0] = 0; FPlatformStackWalk::ProgramCounterToHumanReadableString(Index, LeakTrack.Stack[Index], StackTrace, STRING_SIZE, 0); UE_LOG(LogD3D12RHI, Warning, TEXT("%d %S\n"), Index, StackTrace); } } void FD3D12SegListAllocator::OnAlloc(uint32 Offset, void* Heap, uint32 Size) { TotalBytesRequested += Size; if(GD3D12SegListTrackLeaks == 0) return; FD3D12SegListAllocatorLeakTrack LeakTrack; LeakTrack.Offset = Offset; LeakTrack.Heap = Heap; LeakTrack.Size = Size; LeakTrack.StackDepth = FPlatformStackWalk::CaptureStackBackTrace(&LeakTrack.Stack[0], D3D12RHI_SEGLIST_ALLOC_TRACK_LEAK_STACK_DEPTH); FScopeLock Lock(&SegListTrackedAllocationCS); check(!SegListTrackedAllocations.Contains(LeakTrack)); SegListTrackedAllocations.Add(LeakTrack); } void FD3D12SegListAllocator::OnFree(uint32 Offset, void* Heap, uint32 Size) { TotalBytesRequested -= Size; if (GD3D12SegListTrackLeaks == 0) return; FD3D12SegListAllocatorLeakTrack LeakTrack; LeakTrack.Offset = Offset; LeakTrack.Heap = Heap; FScopeLock Lock(&SegListTrackedAllocationCS); FD3D12SegListAllocatorLeakTrack* Element = SegListTrackedAllocations.Find(LeakTrack); check(Element); // element being freed was not found. if(Element->Size != Size) { UE_LOG(LogD3D12RHI, Warning, TEXT("Mismatched alloc/free size %d != %d, %p/%08x"), Element->Size, Size, Element->Heap, Element->Offset); DumpStack(*Element); check(0); //element being freed had incorrect size. } SegListTrackedAllocations.Remove(LeakTrack); check(!SegListTrackedAllocations.Contains(LeakTrack)); } #endif