Files
UnrealEngine/Engine/Source/Runtime/D3D12RHI/Private/D3D12Allocation.cpp
2025-05-18 13:04:45 +08:00

2520 lines
90 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
// Implementation of Memory Allocation Strategies
//-----------------------------------------------------------------------------
// Include Files
//-----------------------------------------------------------------------------
#include "D3D12Allocation.h"
#include "D3D12RHIPrivate.h"
#include "Misc/BufferedOutputDevice.h"
#include "HAL/PlatformStackWalk.h"
#include "HAL/LowLevelMemTracker.h"
#include "HAL/LowLevelMemStats.h"
#include "ProfilingDebugging/MemoryTrace.h"
// Fix for random GPU crashes on draw indirects on multiple IHVs. Force all indirect arg buffers as committed resources (see UE-115982)
static int32 GD3D12AllowPoolAllocateIndirectArgBuffers = 1;
static FAutoConsoleVariableRef CVarD3D12AllowPoolAllocateIndirectArgBuffers(
TEXT("d3d12.AllowPoolAllocateIndirectArgBuffers"),
GD3D12AllowPoolAllocateIndirectArgBuffers,
TEXT("Allow indirect args to be pool allocated (otherwise they will be committed resources) (default: 0)"),
ECVF_ReadOnly);
#if D3D12RHI_SEGREGATED_TEXTURE_ALLOC
static int32 GD3D12ReadOnlyTextureAllocatorMinPoolSize = 4 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMinPoolSize(
TEXT("d3d12.ReadOnlyTextureAllocator.MinPoolSize"),
GD3D12ReadOnlyTextureAllocatorMinPoolSize,
TEXT("Minimum allocation granularity (in bytes) of each size list"),
ECVF_ReadOnly);
static int32 GD3D12ReadOnlyTextureAllocatorMinNumToPool = 8;
static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMinNumToPool(
TEXT("d3d12.ReadOnlyTextureAllocator.MinNumToPool"),
GD3D12ReadOnlyTextureAllocatorMinNumToPool,
TEXT("Texture pool of each size list must be large enough to store this")
TEXT("many textures unless constrained by maximum allocation granularity"),
ECVF_ReadOnly);
static int32 GD3D12ReadOnlyTextureAllocatorMaxPoolSize = 20 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12ReadOnlyTextureAllocatorMaxPoolSize(
TEXT("d3d12.ReadOnlyTextureAllocator.MaxPoolSize"),
GD3D12ReadOnlyTextureAllocatorMaxPoolSize,
TEXT("Maximum allocation granularity (in bytes) of each size list"),
ECVF_ReadOnly);
#endif
#if USE_BUFFER_POOL_ALLOCATOR
#if !defined(BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME)
#define BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME 32*1024*1024
#endif
#if !defined(BUFFER_POOL_DEFAULT_POOL_SIZE)
#define BUFFER_POOL_DEFAULT_POOL_SIZE 32*1024*1024
#endif
#if !defined(BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE)
#define BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE 16*1024*1024
#endif
#if !defined(BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE)
#define BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE 32*1024*1024
#endif
#if !defined(BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE)
#define BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE 16*1024*1024
#endif
static int32 GD3D12VRAMBufferPoolDefrag = 1;
static FAutoConsoleVariableRef CVarD3D12VRAMBufferPoolDefrag(
TEXT("d3d12.VRAMBufferPoolDefrag"),
GD3D12VRAMBufferPoolDefrag,
TEXT("Defrag the VRAM buffer pool"),
ECVF_ReadOnly);
static int32 GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame = BUFFER_POOL_DEFRAG_MAX_COPY_SIZE_PER_FRAME;
static FAutoConsoleVariableRef CVarD3D12VRAMBufferPoolDefragMaxCopySizePerFrame(
TEXT("d3d12.VRAMBufferPoolDefrag.MaxCopySizePerFrame"),
GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame,
TEXT("Max amount of data to copy during defragmentation in a single frame (default 32MB)"),
ECVF_ReadOnly);
#endif // USE_BUFFER_POOL_ALLOCATOR
#if USE_TEXTURE_POOL_ALLOCATOR
static int32 GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize = 64 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize(
TEXT("d3d12.PoolAllocator.ReadOnlyTextureVRAMPoolSize"),
GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize,
TEXT("Pool size of a single VRAM ReadOnly Texture memory pool (default 64MB)"),
ECVF_ReadOnly);
static int32 GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize = 64 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12PoolAllocatorReadOnlyTextureMaxAllocationSize(
TEXT("d3d12.PoolAllocator.ReadOnlyTextureMaxAllocationSize"),
GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize,
TEXT("Maximum size of a single allocation in the VRAM ReadOnly Texture pool allocator (default 64MB)"),
ECVF_ReadOnly);
static int32 GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize = 0 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12PoolAllocatorRTUAVTextureVRAMPoolSize(
TEXT("d3d12.PoolAllocator.RTUAVTextureVRAMPoolSize"),
GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize,
TEXT("Pool size of a single VRAM RTUAV Texture memory pool (default 0MB - disabled)"),
ECVF_ReadOnly);
static int32 GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize = 0 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12PoolAllocatorRTUAVTextureMaxAllocationSize(
TEXT("d3d12.PoolAllocator.RTUAVTextureMaxAllocationSize"),
GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize,
TEXT("Maximum size of a single allocation in the VRAM RTUAV Texture pool allocator (default 0MB - disabled)"),
ECVF_ReadOnly);
static int32 GD3D12VRAMTexturePoolDefrag = 1;
static FAutoConsoleVariableRef CVarD3D12VRAMTexturePoolDefrag(
TEXT("d3d12.VRAMTexturePoolDefrag"),
GD3D12VRAMTexturePoolDefrag,
TEXT("Defrag the VRAM Texture pool (enabled by default)"),
ECVF_ReadOnly);
static int32 GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame = 32 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12VRAMTexturePoolDefragMaxCopySizePerFrame(
TEXT("d3d12.VRAMTexturePoolDefrag.MaxCopySizePerFrame"),
GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame,
TEXT("Max amount of data to copy during defragmentation in a single frame (default 32MB)"),
ECVF_ReadOnly);
#endif // USE_TEXTURE_POOL_ALLOCATOR
static int32 GD3D12UploadHeapSmallBlockMaxAllocationSize = 64 * 1024;
static FAutoConsoleVariableRef CVarD3D12UploadHeapSmallBlockMaxAllocationSize(
TEXT("d3d12.UploadHeap.SmallBlock.MaxAllocationSize"),
GD3D12UploadHeapSmallBlockMaxAllocationSize,
TEXT("Maximum allocation size on the small block allocator for upload memory"),
ECVF_ReadOnly);
static int32 GD3D12UploadHeapSmallBlockPoolSize = 4 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12UploadHeapSmallBlockPoolSize(
TEXT("d3d12.UploadHeap.SmallBlock.PoolSize"),
GD3D12UploadHeapSmallBlockPoolSize,
TEXT("Pool size for the upload memory small block allocator"),
ECVF_ReadOnly);
static int32 GD3D12UploadHeapBigBlockMaxAllocationSize = 64 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12UploadHeapBigBlockMaxAllocationSize(
TEXT("d3d12.UploadHeap.BigBlock.MaxAllocationSize"),
GD3D12UploadHeapBigBlockMaxAllocationSize,
TEXT("Maximum allocation size on the big block allocator for upload memory"),
ECVF_ReadOnly);
static int32 GD3D12UploadHeapBigBlockPoolSize = 8 * 1024 * 1024;
static FAutoConsoleVariableRef CVarD3D12UploadHeapBigBlockPoolSize(
TEXT("d3d12.UploadHeap.BigBlock.PoolSize"),
GD3D12UploadHeapBigBlockPoolSize,
TEXT("Pool size for the upload memory big block allocator"),
ECVF_ReadOnly);
static int32 GD3D12FastConstantAllocatorPageSize = 64 * 1024;
static FAutoConsoleVariableRef CVarD3D12FastConstantAllocatorPageSize(
TEXT("d3d12.FastConstantAllocatorPageSize"),
GD3D12FastConstantAllocatorPageSize,
TEXT("Page size for the fast constant allocator"),
ECVF_ReadOnly);
#if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE
static int32 GD3D12SegListTrackLeaks = 0;
static FAutoConsoleVariableRef CVarD3D12SegListTrackLeaks(
TEXT("d3d12.SegListTrackLeaks"),
GD3D12SegListTrackLeaks,
TEXT("1: Enable leak tracking in d3d12 seglist's"),
ECVF_ReadOnly);
#endif
static int32 GD3D12FastAllocatorMinPagesToRetain = 5;
static FAutoConsoleVariableRef CVarD3D12FastAllocatorMinPagesToRetain(
TEXT("d3d12.FastAllocator.MinPagesToRetain"),
GD3D12FastAllocatorMinPagesToRetain,
TEXT("Minimum number of pages to retain. Pages below this limit will never be released. Pages above can be released after being unused for a certain number of frames."),
ECVF_Default);
DECLARE_LLM_MEMORY_STAT(TEXT("D3D12AllocatorUnused"), STAT_D3D12AllocatorUnusedLLM, STATGROUP_LLMFULL);
LLM_DEFINE_TAG(D3D12AllocatorUnused, NAME_None, NAME_None, GET_STATFNAME(STAT_D3D12AllocatorUnusedLLM), GET_STATFNAME(STAT_EngineSummaryLLM));
DECLARE_LLM_MEMORY_STAT(TEXT("D3D12AllocatorWasted"), STAT_D3D12AllocatorWastedLLM, STATGROUP_LLMFULL);
LLM_DEFINE_TAG(D3D12AllocatorWasted, NAME_None, NAME_None, GET_STATFNAME(STAT_D3D12AllocatorWastedLLM), GET_STATFNAME(STAT_EngineSummaryLLM));
namespace ED3D12AllocatorID
{
enum Type
{
DefaultBufferAllocator,
DynamicHeapAllocator,
TextureAllocator,
DefaultBufferAllocatorFullResources
};
};
//-----------------------------------------------------------------------------
// Allocator Base
//-----------------------------------------------------------------------------
FD3D12ResourceAllocator::FD3D12ResourceAllocator(FD3D12Device* ParentDevice,
FRHIGPUMask VisibleNodes,
const FD3D12ResourceInitConfig& InInitConfig,
const FString& Name,
uint32 MaxSizeForPooling)
: FD3D12DeviceChild(ParentDevice)
, FD3D12MultiNodeGPUObject(ParentDevice->GetGPUMask(), VisibleNodes)
, InitConfig(InInitConfig)
, DebugName(Name)
, Initialized(false)
, MaximumAllocationSizeForPooling(MaxSizeForPooling)
#if D3D12RHI_TRACK_DETAILED_STATS
, SpaceAlignedUsed(0)
, SpaceActualUsed(0)
, NumBlocksInDeferredDeletionQueue(0)
, PeakUsage(0)
, FailedAllocationSpace(0)
#endif
{
}
FD3D12ResourceAllocator::~FD3D12ResourceAllocator()
{
}
//-----------------------------------------------------------------------------
// Buddy Allocator
//-----------------------------------------------------------------------------
FD3D12BuddyAllocator::FD3D12BuddyAllocator(FD3D12Device* ParentDevice,
FRHIGPUMask VisibleNodes,
const FD3D12ResourceInitConfig& InInitConfig,
const FString& Name,
EResourceAllocationStrategy InAllocationStrategy,
uint32 MaxSizeForPooling,
uint32 InMaxBlockSize,
uint32 InMinBlockSize,
HeapId InTraceParentHeapId)
: FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, MaxSizeForPooling)
, MaxBlockSize(InMaxBlockSize)
, MinBlockSize(InMinBlockSize)
, AllocationStrategy(InAllocationStrategy)
, BackingHeap(nullptr)
, LastUsedFrameFence(0)
, TotalSizeUsed(0)
, HeapFullMessageDisplayed(false)
{
#if UE_MEMORY_TRACE_ENABLED
TraceHeapId = MemoryTrace_HeapSpec(InTraceParentHeapId, AllocationStrategy == EResourceAllocationStrategy::kPlacedResource ? TEXT("BuddyAllocator (PlacedResource)") : TEXT("BuddyAllocator (ManualSubAllocation)"));
#endif
// maxBlockSize should be evenly dividable by MinBlockSize and
// maxBlockSize / MinBlockSize should be a power of two
check((MaxBlockSize / MinBlockSize) * MinBlockSize == MaxBlockSize); // Evenly dividable
check(0 == ((MaxBlockSize / MinBlockSize) & ((MaxBlockSize / MinBlockSize) - 1))); // Power of two
MaxOrder = UnitSizeToOrder(SizeToUnitSize(MaxBlockSize));
Reset();
}
void FD3D12BuddyAllocator::Initialize()
{
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
if (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource)
{
D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InitConfig.HeapType);
HeapProps.CreationNodeMask = GetGPUMask().GetNative();
HeapProps.VisibleNodeMask = GetVisibilityMask().GetNative();
D3D12_HEAP_DESC Desc = {};
Desc.SizeInBytes = MaxBlockSize;
Desc.Properties = HeapProps;
Desc.Alignment = 0;
Desc.Flags = InitConfig.HeapFlags;
if (Adapter->IsHeapNotZeroedSupported())
{
Desc.Flags |= FD3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
}
ID3D12Heap* Heap = nullptr;
{
LLM_PLATFORM_SCOPE(ELLMTag::GraphicsPlatform);
#if PLATFORM_WINDOWS
// we are tracking allocations ourselves
LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System);
VERIFYD3D12RESULT(Adapter->GetD3DDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&Heap)));
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, MaxBlockSize, ELLMTracker::Platform, ELLMAllocType::System);
#else
LLM_SCOPE_BYTAG(D3D12AllocatorUnused);
VERIFYD3D12RESULT(Adapter->GetD3DDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&Heap)));
#endif
}
BackingHeap = new FD3D12Heap(GetParentDevice(), GetVisibilityMask(), TraceHeapId);
BackingHeap->SetHeap(Heap, TEXT("BuddyAllocator Heap"));
// Only track resources that cannot be accessed on the CPU.
if (IsGPUOnly(InitConfig.HeapType))
{
BackingHeap->BeginTrackingResidency(Desc.SizeInBytes);
}
}
else
{
{
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InitConfig.HeapType, GetGPUMask().GetNative(), GetVisibilityMask().GetNative());
#if PLATFORM_WINDOWS
LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System);
VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProps, GetGPUMask(), InitConfig.InitialResourceState, ED3D12ResourceStateMode::SingleState, InitConfig.InitialResourceState, MaxBlockSize, BackingResource.GetInitReference(), TEXT("Resource Allocator Underlying Buffer"), InitConfig.ResourceFlags));
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, MaxBlockSize, ELLMTracker::Platform, ELLMAllocType::System);
#else
LLM_SCOPE_BYTAG(D3D12AllocatorUnused);
VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProps, GetGPUMask(), InitConfig.InitialResourceState, ED3D12ResourceStateMode::SingleState, InitConfig.InitialResourceState, MaxBlockSize, BackingResource.GetInitReference(), TEXT("Resource Allocator Underlying Buffer"), InitConfig.ResourceFlags));
#endif
#if UE_MEMORY_TRACE_ENABLED
MemoryTrace_MarkAllocAsHeap(BackingResource->GetGPUVirtualAddress(), TraceHeapId);
#endif
}
if (IsCPUAccessible(InitConfig.HeapType))
{
BackingResource->Map();
}
}
}
void FD3D12BuddyAllocator::Destroy()
{
ReleaseAllResources();
}
uint32 FD3D12BuddyAllocator::AllocateBlock(uint32 order)
{
uint32 offset;
if (order > MaxOrder)
{
// Can't allocate a block that large
check(false);
// Crash to avoid infinite recursivity
UE_LOG(LogD3D12RHI, Fatal, TEXT("Buddy Allocator cant allocate a block that large (order %d)"), order);
}
if (FreeBlocks[order].Num() == 0)
{
// No free nodes in the requested pool. Try to find a higher-order block and split it.
uint32 left = AllocateBlock(order + 1);
uint32 size = OrderToUnitSize(order);
uint32 right = left + size;
FreeBlocks[order].Add(right); // Add the right block to the free pool
offset = left; // Return the left block
}
else
{
TSet<uint32>::TConstIterator it(FreeBlocks[order]);
offset = *it;
// Remove the block from the free list
FreeBlocks[order].Remove(*it);
}
return offset;
}
void FD3D12BuddyAllocator::DeallocateBlock(uint32 offset, uint32 order)
{
// See if the buddy block is free
uint32 size = OrderToUnitSize(order);
uint32 buddy = GetBuddyOffset(offset, size);
uint32* it = FreeBlocks[order].Find(buddy);
if (it != nullptr)
{
// Deallocate merged blocks
DeallocateBlock(FMath::Min(offset, buddy), order + 1);
// Remove the buddy from the free list
FreeBlocks[order].Remove(*it);
}
else
{
// Add the block to the free list
FreeBlocks[order].Add(offset);
}
}
void FD3D12BuddyAllocator::Allocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation)
{
FScopeLock Lock(&CS);
if (Initialized == false)
{
Initialize();
Initialized = true;
}
uint32 SizeToAllocate = SizeInBytes;
// If the alignment doesn't match the block size
if (Alignment != 0 && MinBlockSize % Alignment != 0)
{
SizeToAllocate = SizeInBytes + Alignment;
}
// Work out what size block is needed and allocate one
const uint32 UnitSize = SizeToUnitSize(SizeToAllocate);
const uint32 Order = UnitSizeToOrder(UnitSize);
const uint32 Offset = AllocateBlock(Order); // This is the offset in MinBlockSize units
const uint32 AllocSize = uint32(OrderToUnitSize(Order) * MinBlockSize);
const uint32 AllocationBlockOffset = uint32(Offset * MinBlockSize);
uint32 Padding = 0;
if (Alignment != 0 && AllocationBlockOffset % Alignment != 0)
{
uint32 AlignedBlockOffset = AlignArbitrary(AllocationBlockOffset, Alignment);
Padding = AlignedBlockOffset - AllocationBlockOffset;
check((Padding + SizeInBytes) <= AllocSize)
}
INCREASE_ALLOC_COUNTER(SpaceAlignedUsed, AllocSize);
INCREASE_ALLOC_COUNTER(SpaceActualUsed, SizeInBytes);
#if PLATFORM_WINDOWS
// Decrease only texture size so wasted amount stays in D3D12AllocatorUnused
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(SizeInBytes), ELLMTracker::Platform, ELLMAllocType::System);
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorWasted, int64(AllocSize - SizeInBytes), ELLMTracker::Platform, ELLMAllocType::System);
#endif
TotalSizeUsed += AllocSize;
#if D3D12RHI_TRACK_DETAILED_STATS
if (SpaceActualUsed > PeakUsage)
{
PeakUsage = SpaceActualUsed;
}
#endif
const uint32 AlignedOffsetFromResourceBase = AllocationBlockOffset + Padding;
check((AlignedOffsetFromResourceBase % Alignment) == 0);
// Setup the info that this allocator
FD3D12BuddyAllocatorPrivateData& PrivateData = ResourceLocation.GetBuddyAllocatorPrivateData();
PrivateData.Order = Order;
PrivateData.Offset = Offset;
ResourceLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation);
ResourceLocation.SetAllocator((FD3D12BaseAllocatorType*)this);
ResourceLocation.SetSize(SizeInBytes);
if (AllocationStrategy == EResourceAllocationStrategy::kManualSubAllocation)
{
ResourceLocation.SetOffsetFromBaseOfResource(AlignedOffsetFromResourceBase);
ResourceLocation.SetResource(BackingResource);
ResourceLocation.SetGPUVirtualAddress(BackingResource->GetGPUVirtualAddress() + AlignedOffsetFromResourceBase);
if (IsCPUAccessible(InitConfig.HeapType))
{
ResourceLocation.SetMappedBaseAddress((uint8*)BackingResource->GetResourceBaseAddress() + AlignedOffsetFromResourceBase);
}
}
else
{
// Place resources are intialized elsewhere
}
// track the allocation
#if !PLATFORM_WINDOWS
{
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(AllocSize), ELLMTracker::Default, ELLMAllocType::System);
}
LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelAlloc(ELLMTracker::Default, ResourceLocation.GetAddressForLLMTracking(), AllocSize));
// Note: Disabling this LLM hook for Windows is due to a work-around in the way that d3d12 buffers are tracked
// by LLM. LLM tracks buffer data in the UpdateBufferStats function because that is the easiest place to ensure that LLM
// can be updated whenever a buffer is created or released. Unfortunately, some buffers allocate from this allocator
// which means that the memory would be counted twice. Because of this the tracking had to be disabled here.
// This does mean that non-buffer memory that goes through this allocator won't be tracked, so this does need a better solution.
// see UpdateBufferStats for a more detailed explanation.
#endif
}
bool FD3D12BuddyAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation)
{
FScopeLock Lock(&CS);
if (CanAllocate(SizeInBytes, Alignment))
{
Allocate(SizeInBytes, Alignment, ResourceLocation);
return true;
}
else
{
INCREASE_ALLOC_COUNTER(FailedAllocationSpace, SizeInBytes);
return false;
}
}
void FD3D12BuddyAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation)
{
check(IsOwner(ResourceLocation));
// Blocks are cleaned up async so need a lock
FScopeLock Lock(&CS);
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
RetiredBlock& Block = DeferredDeletionQueue.Emplace_GetRef();
Block.FrameFence = FrameFence.GetNextFenceToSignal();
FD3D12BuddyAllocatorPrivateData& PrivateData = ResourceLocation.GetBuddyAllocatorPrivateData();
Block.Data.Order = PrivateData.Order;
Block.Data.Offset = PrivateData.Offset;
Block.AllocationSize = ResourceLocation.GetSize();
// update the last used framce fence used during garbage collection
LastUsedFrameFence = FMath::Max(LastUsedFrameFence, Block.FrameFence);
if (ResourceLocation.GetResource()->IsPlacedResource())
{
Block.PlacedResource = ResourceLocation.GetResource();
}
INCREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1);
// track the allocation
#if !PLATFORM_WINDOWS
// Note: Disabling this LLM hook for Windows is due to a work-around in the way that d3d12 buffers are tracked
// by LLM. LLM tracks buffer data in the UpdateBufferStats function because that is the easiest place to ensure that LLM
// can be updated whenever a buffer is created or released. Unfortunately, some buffers allocate from this allocator
// which means that the memory would be counted twice. Because of this the tracking had to be disabled here.
// This does mean that non-buffer memory that goes through this allocator won't be tracked, so this does need a better solution.
// see UpdateBufferStats for a more detailed explanation.
LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelFree(ELLMTracker::Default, ResourceLocation.GetAddressForLLMTracking()));
{
const uint32 AllocSize = uint32(OrderToUnitSize(Block.Data.Order) * MinBlockSize);
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, int64(AllocSize), ELLMTracker::Default, ELLMAllocType::System);
}
#endif
}
void FD3D12BuddyAllocator::DeallocateInternal(RetiredBlock& Block)
{
DeallocateBlock(Block.Data.Offset, Block.Data.Order);
const uint32 Size = uint32(OrderToUnitSize(Block.Data.Order) * MinBlockSize);
DECREASE_ALLOC_COUNTER(SpaceAlignedUsed, Size);
DECREASE_ALLOC_COUNTER(SpaceActualUsed, Block.AllocationSize);
#if PLATFORM_WINDOWS
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, int64(Block.AllocationSize), ELLMTracker::Platform, ELLMAllocType::System);
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorWasted, 0 - int64(Size - Block.AllocationSize), ELLMTracker::Platform, ELLMAllocType::System);
#endif
TotalSizeUsed -= Size;
if (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource)
{
// Release the resource
check(Block.PlacedResource != nullptr);
Block.PlacedResource->Release();
Block.PlacedResource = nullptr;
}
};
void FD3D12BuddyAllocator::CleanUpAllocations()
{
FScopeLock Lock(&CS);
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
uint32 PopCount = 0;
for (int32 i = 0; i < DeferredDeletionQueue.Num(); i++)
{
RetiredBlock& Block = DeferredDeletionQueue[i];
if (FrameFence.IsFenceComplete(Block.FrameFence, /* bUpdateCachedFenceValue */ false))
{
DeallocateInternal(Block);
DECREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1);
PopCount = i + 1;
}
else
{
break;
}
}
if (PopCount)
{
// clear out all of the released blocks, don't allow the array to shrink
DeferredDeletionQueue.RemoveAt(0, PopCount, EAllowShrinking::No);
}
}
void FD3D12BuddyAllocator::ReleaseAllResources()
{
#if PLATFORM_WINDOWS
LLM_SCOPED_PAUSE_TRACKING_FOR_TRACKER(ELLMTracker::Default, ELLMAllocType::System);
LLM_SCOPED_PAUSE_TRACKING_WITH_ENUM_AND_AMOUNT_BYTAG(D3D12AllocatorUnused, 0 - int64(MaxBlockSize), ELLMTracker::Platform, ELLMAllocType::System);
#endif
#if UE_MEMORY_TRACE_ENABLED
if (AllocationStrategy != EResourceAllocationStrategy::kPlacedResource)
{
// Free memory & heap to match alloc operations
D3D12_GPU_VIRTUAL_ADDRESS GPUAddress = BackingResource ? BackingResource->GetGPUVirtualAddress() : 0;
if (GPUAddress > 0)
{
MemoryTrace_UnmarkAllocAsHeap(GPUAddress, TraceHeapId);
MemoryTrace_Free(GPUAddress, EMemoryTraceRootHeap::VideoMemory);
}
}
#endif
for (RetiredBlock& Block : DeferredDeletionQueue)
{
DeallocateInternal(Block);
DECREASE_ALLOC_COUNTER(NumBlocksInDeferredDeletionQueue, 1);
}
DeferredDeletionQueue.Empty();
if (BackingResource)
{
ensure(BackingResource->GetRefCount() == 1 || GNumExplicitGPUsForRendering > 1);
BackingResource = nullptr;
}
}
void FD3D12BuddyAllocator::DumpAllocatorStats(class FOutputDevice& Ar)
{
#if defined(UE_BUILD_DEBUG)
FBufferedOutputDevice BufferedOutput;
{
// This is the memory tracked inside individual allocation pools
FD3D12DynamicRHI* D3DRHI = FD3D12DynamicRHI::GetD3DRHI();
FName categoryName(&DebugName.GetCharArray()[0]);
BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT(""));
BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("Heap Size | MinBlock Size | Space Used | Peak Usage | Unpooled Allocations | Internal Fragmentation | Blocks in Deferred Delete Queue "));
BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("----------"));
uint64 InternalFragmentation = SpaceAlignedUsed - SpaceActualUsed;
BufferedOutput.CategorizedLogf(categoryName, ELogVerbosity::Log, TEXT("% 10i % 10i % 16i % 12i % 13i % 8i % 10I"),
MaxBlockSize,
MinBlockSize,
SpaceAlignedUsed,
PeakUsage,
FailedAllocationSpace,
InternalFragmentation,
NumBlocksInDeferredDeletionQueue);
}
BufferedOutput.RedirectTo(Ar);
#endif
}
void FD3D12BuddyAllocator::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount)
{
#if D3D12RHI_TRACK_DETAILED_STATS
IOMemoryAllocated += MaxBlockSize;
IOMemoryUsed += SpaceActualUsed;
IOMemoryFree += (MaxBlockSize - SpaceAlignedUsed);
IOAlignmentWaste += SpaceAlignedUsed - SpaceActualUsed;
IOAllocatedPageCount++;
if (MaxBlockSize == SpaceAlignedUsed)
IOFullPageCount++;
#endif
}
bool FD3D12BuddyAllocator::CanAllocate(uint32 size, uint32 alignment)
{
if (TotalSizeUsed == MaxBlockSize)
{
return false;
}
uint32 sizeToAllocate = size;
// If the alignment doesn't match the block size
if (alignment != 0 && MinBlockSize % alignment != 0)
{
sizeToAllocate = size + alignment;
}
uint32 blockSize = MaxBlockSize;
for (int32 i = FreeBlocks.Num() - 1; i >= 0; i--)
{
if (FreeBlocks[i].Num() && blockSize >= sizeToAllocate)
{
return true;
}
// Halve the block size;
blockSize = blockSize >> 1;
if (blockSize < sizeToAllocate) return false;
}
return false;
}
void FD3D12BuddyAllocator::Reset()
{
// Clear the free blocks collection
FreeBlocks.Empty();
// Initialize the pool with a free inner block of max inner block size
FreeBlocks.SetNum(MaxOrder + 1);
FreeBlocks[MaxOrder].Add((uint32)0);
}
//-----------------------------------------------------------------------------
// Multi-Buddy Allocator
//-----------------------------------------------------------------------------
FD3D12MultiBuddyAllocator::FD3D12MultiBuddyAllocator(FD3D12Device* ParentDevice,
FRHIGPUMask VisibleNodes,
const FD3D12ResourceInitConfig& InInitConfig,
const FString& Name,
EResourceAllocationStrategy InAllocationStrategy,
uint32 InMaxAllocationSize,
uint32 InDefaultPoolSize,
uint32 InMinBlockSize,
HeapId InTraceParentHeapId)
: FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, InMaxAllocationSize)
, AllocationStrategy(InAllocationStrategy)
, MinBlockSize(InMinBlockSize)
, DefaultPoolSize(InDefaultPoolSize)
{
#if UE_MEMORY_TRACE_ENABLED
TraceHeapId = MemoryTrace_HeapSpec(InTraceParentHeapId, *Name);
#endif
}
FD3D12MultiBuddyAllocator::~FD3D12MultiBuddyAllocator()
{
Destroy();
}
bool FD3D12MultiBuddyAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation)
{
FScopeLock Lock(&CS);
for (int32 i = 0; i < Allocators.Num(); i++)
{
if (Allocators[i]->TryAllocate(SizeInBytes, Alignment, ResourceLocation))
{
return true;
}
}
Allocators.Add(CreateNewAllocator(SizeInBytes));
return Allocators.Last()->TryAllocate(SizeInBytes, Alignment, ResourceLocation);
}
void FD3D12MultiBuddyAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation)
{
//The sub-allocators should handle the deallocation
check(false);
}
FD3D12BuddyAllocator* FD3D12MultiBuddyAllocator::CreateNewAllocator(uint32 InMinSizeInBytes)
{
check(InMinSizeInBytes <= MaximumAllocationSizeForPooling);
uint32 AllocationSize = (InMinSizeInBytes > DefaultPoolSize) ? FMath::RoundUpToPowerOfTwo(InMinSizeInBytes) : DefaultPoolSize;
return new FD3D12BuddyAllocator(GetParentDevice(),
GetVisibilityMask(),
InitConfig,
DebugName,
AllocationStrategy,
AllocationSize,
AllocationSize,
MinBlockSize,
TraceHeapId);
}
void FD3D12MultiBuddyAllocator::Initialize()
{
Allocators.Add(CreateNewAllocator(DefaultPoolSize));
}
void FD3D12MultiBuddyAllocator::Destroy()
{
ReleaseAllResources();
}
void FD3D12MultiBuddyAllocator::CleanUpAllocations(uint64 InFrameLag)
{
FScopeLock Lock(&CS);
for (auto*& Allocator : Allocators)
{
Allocator->CleanUpAllocations();
}
// Trim empty allocators if not used in last n frames
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
const uint64 CompletedFence = FrameFence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true);
for (int32 i = (Allocators.Num() - 1); i >= 0; i--)
{
if (Allocators[i]->IsEmpty() && (Allocators[i]->GetLastUsedFrameFence() + InFrameLag <= CompletedFence))
{
Allocators[i]->Destroy();
delete(Allocators[i]);
Allocators.RemoveAt(i);
}
}
}
void FD3D12MultiBuddyAllocator::DumpAllocatorStats(class FOutputDevice& Ar)
{
//TODO
}
void FD3D12MultiBuddyAllocator::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount)
{
#if D3D12RHI_TRACK_DETAILED_STATS
FScopeLock Lock(&CS);
for (FD3D12BuddyAllocator* Allocator : Allocators)
{
Allocator->UpdateMemoryStats(IOMemoryAllocated, IOMemoryUsed, IOMemoryFree, IOAlignmentWaste, IOAllocatedPageCount, IOFullPageCount);
}
#endif
}
void FD3D12MultiBuddyAllocator::ReleaseAllResources()
{
for (int32 i = (Allocators.Num() - 1); i >= 0; i--)
{
if (Allocators[i])
{
Allocators[i]->Destroy();
delete(Allocators[i]);
}
}
Allocators.Empty();
}
void FD3D12MultiBuddyAllocator::Reset()
{
}
//-----------------------------------------------------------------------------
// Bucket Allocator
//-----------------------------------------------------------------------------
FD3D12BucketAllocator::FD3D12BucketAllocator(FD3D12Device* ParentDevice,
FRHIGPUMask VisibleNodes,
const FD3D12ResourceInitConfig& InInitConfig,
const FString& Name,
uint64 InBlockRetentionFrameCount) :
FD3D12ResourceAllocator(ParentDevice, VisibleNodes, InInitConfig, Name, 32 * 1024 * 1024),
BlockRetentionFrameCount(InBlockRetentionFrameCount)
{}
bool FD3D12BucketAllocator::TryAllocate(uint32 SizeInBytes, uint32 Alignment, FD3D12ResourceLocation& ResourceLocation)
{
FScopeLock Lock(&CS);
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
// Size cannot be smaller than the requested alignment
SizeInBytes = FMath::Max(SizeInBytes, Alignment);
uint32 Bucket = BucketFromSize(SizeInBytes, BucketShift);
check(Bucket < NumBuckets);
uint32 BlockSize = BlockSizeFromBufferSize(SizeInBytes, BucketShift);
// If some odd alignment is requested, make sure the block can fulfill it.
if (BlockSize % Alignment != 0)
{
const uint32 AlignedSizeInBytes = SizeInBytes + Alignment;
Bucket = BucketFromSize(AlignedSizeInBytes, BucketShift);
BlockSize = BlockSizeFromBufferSize(AlignedSizeInBytes, BucketShift);
}
FD3D12BlockAllocatorPrivateData& Block = ResourceLocation.GetBlockAllocatorPrivateData();
// See if a block is already available in the bucket
if (AvailableBlocks[Bucket].Dequeue(Block))
{
check(Block.ResourceHeap);
}
else
{
// No blocks of the requested size are available so make one
FD3D12Resource* Resource = nullptr;
void* BaseAddress = nullptr;
// Allocate a block
check(BlockSize >= SizeInBytes);
if (FAILED(Adapter->CreateBuffer(InitConfig.HeapType, GetGPUMask(), GetVisibilityMask(), SizeInBytes < MIN_HEAP_SIZE ? MIN_HEAP_SIZE : SizeInBytes, &Resource, TEXT("BucketAllocator"), InitConfig.ResourceFlags)))
{
return false;
}
// Track the resource so we know when to delete it
SubAllocatedResources.Add(Resource);
if (IsCPUAccessible(InitConfig.HeapType))
{
BaseAddress = Resource->Map();
check(BaseAddress);
check(BaseAddress == (uint8*)(((uint64)BaseAddress + Alignment - 1) & ~((uint64)Alignment - 1)));
}
// Init the block we will return
Block.BucketIndex = Bucket;
Block.Offset = 0;
Block.ResourceHeap = Resource;
Block.ResourceHeap->AddRef();
// Chop up the rest of the resource into reusable blocks
if (BlockSize < MIN_HEAP_SIZE)
{
// Create additional available blocks that can be sub-allocated from the same resource
for (uint32 Offset = BlockSize; Offset <= MIN_HEAP_SIZE - BlockSize; Offset += BlockSize)
{
FD3D12BlockAllocatorPrivateData NewBlock = {};
NewBlock.BucketIndex = Bucket;
NewBlock.Offset = Offset;
NewBlock.ResourceHeap = Resource;
NewBlock.ResourceHeap->AddRef();
// Add the bucket to the available list
AvailableBlocks[Bucket].Enqueue(NewBlock);
}
}
}
uint64 AlignedBlockOffset = Block.Offset;
if (Alignment != 0 && AlignedBlockOffset % Alignment != 0)
{
AlignedBlockOffset = AlignArbitrary(AlignedBlockOffset, Alignment);
}
ResourceLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation);
ResourceLocation.SetAllocator((FD3D12BaseAllocatorType*)this);
ResourceLocation.SetResource(Block.ResourceHeap);
ResourceLocation.SetSize(SizeInBytes);
ResourceLocation.SetOffsetFromBaseOfResource(AlignedBlockOffset);
ResourceLocation.SetGPUVirtualAddress(Block.ResourceHeap->GetGPUVirtualAddress() + AlignedBlockOffset);
if (IsCPUAccessible(InitConfig.HeapType))
{
ResourceLocation.SetMappedBaseAddress((void*)((uint64)Block.ResourceHeap->GetResourceBaseAddress() + AlignedBlockOffset));
}
// Check that when the offset is aligned that it doesn't go passed the end of the block
check(ResourceLocation.GetOffsetFromBaseOfResource() - Block.Offset + SizeInBytes <= BlockSize);
return true;
}
void FD3D12BucketAllocator::Deallocate(FD3D12ResourceLocation& ResourceLocation)
{
FScopeLock Lock(&CS);
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
FD3D12BlockAllocatorPrivateData& Block = ResourceLocation.GetBlockAllocatorPrivateData();
Block.FrameFence = FrameFence.GetNextFenceToSignal();
ExpiredBlocks.Enqueue(Block);
}
void FD3D12BucketAllocator::Initialize()
{
}
void FD3D12BucketAllocator::Destroy()
{
ReleaseAllResources();
}
void FD3D12BucketAllocator::CleanUpAllocations(uint64 InFrameLag)
{
FScopeLock Lock(&CS);
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
#if SUB_ALLOCATED_DEFAULT_ALLOCATIONS
const static uint32 MinCleanupBucket = FMath::Max<uint32>(0, BucketFromSize(MIN_HEAP_SIZE, BucketShift) - 4);
#else
const static uint32 MinCleanupBucket = 0;
#endif
// Start at bucket 8 since smaller buckets are sub-allocated resources
// and would be fragmented by deleting blocks
for (uint32 bucket = MinCleanupBucket; bucket < NumBuckets; bucket++)
{
FD3D12BlockAllocatorPrivateData BlockInQueue = {};
const uint32 RetentionCount = BlockRetentionFrameCount;
const auto& Functor = [&FrameFence, RetentionCount](const FD3D12BlockAllocatorPrivateData& Block) { return FrameFence.IsFenceComplete(Block.FrameFence + RetentionCount, /* bUpdateCachedFenceValue */ false); };
while (AvailableBlocks[bucket].Dequeue(BlockInQueue, Functor))
{
SAFE_RELEASE(BlockInQueue.ResourceHeap);
}
}
FD3D12BlockAllocatorPrivateData BlockInQueue = {};
const auto& Functor = [&FrameFence](const FD3D12BlockAllocatorPrivateData& Block) { return FrameFence.IsFenceComplete(Block.FrameFence, /* bUpdateCachedFenceValue */ false); };
while (ExpiredBlocks.Dequeue(BlockInQueue, Functor))
{
// Add the bucket to the available list
AvailableBlocks[BlockInQueue.BucketIndex].Enqueue(BlockInQueue);
}
}
void FD3D12BucketAllocator::DumpAllocatorStats(class FOutputDevice& Ar)
{
//TODO:
}
void FD3D12BucketAllocator::ReleaseAllResources()
{
const static uint32 MinCleanupBucket = 0;
// Start at bucket 8 since smaller buckets are sub-allocated resources
// and would be fragmented by deleting blocks
for (uint32 bucket = MinCleanupBucket; bucket < NumBuckets; bucket++)
{
FD3D12BlockAllocatorPrivateData Block = {};
while (AvailableBlocks[bucket].Dequeue(Block))
{
SAFE_RELEASE(Block.ResourceHeap);
}
}
FD3D12BlockAllocatorPrivateData Block = {};
while (ExpiredBlocks.Dequeue(Block))
{
if (Block.BucketIndex >= MinCleanupBucket) //-V547
{
SAFE_RELEASE(Block.ResourceHeap);
}
}
for (FD3D12Resource*& Resource : SubAllocatedResources)
{
Resource->Release();
delete(Resource);
}
}
void FD3D12BucketAllocator::Reset()
{
}
//-----------------------------------------------------------------------------
// Dynamic Buffer Allocator
//-----------------------------------------------------------------------------
FD3D12UploadHeapAllocator::FD3D12UploadHeapAllocator(FD3D12Adapter* InParent, FD3D12Device* InParentDevice, const FString& InName)
: FD3D12AdapterChild(InParent)
, FD3D12DeviceChild(InParentDevice)
, FD3D12MultiNodeGPUObject(InParentDevice->GetGPUMask(), FRHIGPUMask::All()) // Upload memory, thus they can be trivially visibile to all GPUs
, TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, *FString(InName + TEXT(" (UploadHeapAllocator)"))))
, SmallBlockAllocator(
InParentDevice
, GetVisibilityMask()
, FD3D12ResourceInitConfig::CreateUpload()
, TEXT("Small Block Multi Buddy Allocator")
, EResourceAllocationStrategy::kManualSubAllocation
, GD3D12UploadHeapSmallBlockMaxAllocationSize
, GD3D12UploadHeapSmallBlockPoolSize
, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT
, TraceHeapId)
, BigBlockAllocator(
InParentDevice
, GetVisibilityMask()
, FD3D12ResourceInitConfig::CreateUpload()
, TEXT("Big Block Pool Allocator")
, EResourceAllocationStrategy::kManualSubAllocation
, GD3D12UploadHeapBigBlockPoolSize
, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT
, GD3D12UploadHeapBigBlockMaxAllocationSize
, FRHIMemoryPool::EFreeListOrder::SortByOffset
, false /*defrag*/
, TraceHeapId)
, FastConstantPageAllocator(
InParentDevice
, GetVisibilityMask()
, FD3D12ResourceInitConfig::CreateUpload()
, TEXT("Fast Constant Page Multi Buddy Allocator")
, EResourceAllocationStrategy::kManualSubAllocation
, GD3D12FastConstantAllocatorPageSize * 64
, GD3D12UploadHeapSmallBlockPoolSize
, GD3D12FastConstantAllocatorPageSize
, TraceHeapId)
{
}
void FD3D12UploadHeapAllocator::Destroy()
{
SmallBlockAllocator.Destroy();
BigBlockAllocator.Destroy();
FastConstantPageAllocator.Destroy();
}
void* FD3D12UploadHeapAllocator::AllocUploadResource(uint32 InSize, uint32 InAlignment, FD3D12ResourceLocation& ResourceLocation)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FD3D12UploadHeapAllocator::AllocUploadResource);
check(InSize > 0);
ResourceLocation.Clear();
// Fit in small block allocator?
if (InSize <= SmallBlockAllocator.GetMaximumAllocationSizeForPooling())
{
verify(SmallBlockAllocator.TryAllocate(InSize, InAlignment, ResourceLocation));
}
else
{
FD3D12ScopeLock Lock(&BigBlockCS);
// Forward to the big block allocator
const D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(InSize, D3D12_RESOURCE_FLAG_NONE);
BigBlockAllocator.AllocateResource(GetParentDevice()->GetGPUIndex(), D3D12_HEAP_TYPE_UPLOAD, ResourceDesc, InSize, InAlignment, ED3D12ResourceStateMode::SingleState, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, nullptr, ResourceLocation);
ResourceLocation.UnlockPoolData();
}
return ResourceLocation.GetMappedBaseAddress();
}
void* FD3D12UploadHeapAllocator::AllocFastConstantAllocationPage(uint32 InSize, uint32 InAlignment, FD3D12ResourceLocation& ResourceLocation)
{
check(InSize > 0);
check(InSize <= FastConstantPageAllocator.GetMaximumAllocationSizeForPooling());
ResourceLocation.Clear();
verify(FastConstantPageAllocator.TryAllocate(InSize, InAlignment, ResourceLocation));
return ResourceLocation.GetMappedBaseAddress();
}
void FD3D12UploadHeapAllocator::CleanUpAllocations(uint64 InFrameLag)
{
SmallBlockAllocator.CleanUpAllocations(InFrameLag);
{
FD3D12ScopeLock Lock(&BigBlockCS);
BigBlockAllocator.CleanUpAllocations(InFrameLag);
}
FastConstantPageAllocator.CleanUpAllocations(InFrameLag);
}
void FD3D12UploadHeapAllocator::UpdateMemoryStats()
{
uint32 MemoryAllocated = 0;
uint32 MemoryUsed = 0;
uint32 FreeMemory = 0;
uint32 EndFreeMemory = 0;
uint32 AlignmentWaste = 0;
uint32 AllocatedPageCount = 0;
uint32 FullPageCount = 0;
#if D3D12RHI_TRACK_DETAILED_STATS
SmallBlockAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount);
{
FD3D12ScopeLock Lock(&BigBlockCS);
BigBlockAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount);
}
FastConstantPageAllocator.UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount);
#endif
SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryAllocated, MemoryAllocated);
SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryUsed, MemoryUsed);
SET_MEMORY_STAT(STAT_D3D12UploadPoolMemoryFree, FreeMemory);
SET_MEMORY_STAT(STAT_D3D12UploadPoolAlignmentWaste, AlignmentWaste);
SET_DWORD_STAT(STAT_D3D12UploadPoolPageCount, AllocatedPageCount);
SET_DWORD_STAT(STAT_D3D12UploadPoolFullPages, FullPageCount);
}
//-----------------------------------------------------------------------------
// Default Buffer Allocator
//-----------------------------------------------------------------------------
FD3D12ResourceInitConfig FD3D12DefaultBufferPool::GetResourceAllocatorInitConfig(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage)
{
FD3D12ResourceInitConfig InitConfig;
InitConfig.HeapType = InHeapType;
InitConfig.ResourceFlags = InResourceFlags;
#if D3D12_RHI_RAYTRACING
// Setup initial resource state depending on the requested buffer flags
if (EnumHasAnyFlags(InBufferUsage, BUF_AccelerationStructure))
{
// should only have this flag and no other flags
check(InBufferUsage == BUF_AccelerationStructure);
InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
}
else
#endif // D3D12_RHI_RAYTRACING
if (InitConfig.HeapType == D3D12_HEAP_TYPE_READBACK)
{
InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_COPY_DEST;
}
else if (EnumHasAnyFlags(InBufferUsage, BUF_UnorderedAccess))
{
check(InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
else
{
InitConfig.InitialResourceState = D3D12_RESOURCE_STATE_GENERIC_READ;
}
InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
if (EnumHasAnyFlags(InBufferUsage, BUF_DrawIndirect))
{
check(InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
InitConfig.HeapFlags |= D3D12RHI_HEAP_FLAG_ALLOW_INDIRECT_BUFFERS;
}
return InitConfig;
}
EResourceAllocationStrategy FD3D12DefaultBufferPool::GetResourceAllocationStrategy(D3D12_RESOURCE_FLAGS InResourceFlags, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment)
{
if (Alignment > kD3D12ManualSubAllocationAlignment)
{
return EResourceAllocationStrategy::kPlacedResource;
}
// Does the resource need state tracking and transitions
ED3D12ResourceStateMode ResourceStateMode = InResourceStateMode;
if (ResourceStateMode == ED3D12ResourceStateMode::Default)
{
ResourceStateMode = (InResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) ? ED3D12ResourceStateMode::MultiState : ED3D12ResourceStateMode::SingleState;
}
// multi state resource need to placed because each allocation can be in a different state
return (ResourceStateMode == ED3D12ResourceStateMode::MultiState) ? EResourceAllocationStrategy::kPlacedResource : EResourceAllocationStrategy::kManualSubAllocation;
}
//-----------------------------------------------------------------------------
// Default Buffer Pool
//-----------------------------------------------------------------------------
FD3D12DefaultBufferPool::FD3D12DefaultBufferPool(FD3D12Device* InParent, FD3D12MultiBuddyAllocator* InAllocator)
: FD3D12DeviceChild(InParent)
, FD3D12MultiNodeGPUObject(InAllocator->GetGPUMask(), InAllocator->GetVisibilityMask())
, Allocator(InAllocator)
{
}
bool FD3D12DefaultBufferPool::SupportsAllocation(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment) const
{
FD3D12ResourceInitConfig InitConfig = GetResourceAllocatorInitConfig(InHeapType, InResourceFlags, InBufferUsage);
EResourceAllocationStrategy AllocationStrategy = GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment);
return (Allocator->GetInitConfig() == InitConfig && Allocator->GetAllocationStrategy() == AllocationStrategy);
}
void FD3D12DefaultBufferPool::CleanUpAllocations(uint64 FrameLag)
{
Allocator->CleanUpAllocations(FrameLag);
}
// Grab a buffer from the available buffers or create a new buffer if none are available
void FD3D12DefaultBufferPool::AllocDefaultResource(D3D12_HEAP_TYPE InHeapType, const D3D12_RESOURCE_DESC& Desc, EBufferUsageFlags InUsage, ED3D12ResourceStateMode InResourceStateMode,
D3D12_RESOURCE_STATES InCreateState, uint32 Alignment, const TCHAR* Name, FD3D12ResourceLocation& ResourceLocation)
{
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
// If the resource location owns a block, this will deallocate it.
ResourceLocation.Clear();
if (Desc.Width == 0)
{
return;
}
#if DO_CHECK
// Validate the create state
if (InHeapType == D3D12_HEAP_TYPE_READBACK)
{
check(InCreateState == D3D12_RESOURCE_STATE_COPY_DEST);
}
else if (InHeapType == D3D12_HEAP_TYPE_UPLOAD)
{
check(InCreateState == D3D12_RESOURCE_STATE_GENERIC_READ);
}
#if D3D12_RHI_RAYTRACING
else if (EnumHasAnyFlags(InUsage, BUF_AccelerationStructure))
{
// RayTracing acceleration structures must be created in a particular state and may never transition out of it.
check(InResourceStateMode == ED3D12ResourceStateMode::SingleState);
check(InCreateState == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE);
}
#endif // D3D12_RHI_RAYTRACING
#endif // DO_CHECK
const bool PoolResource = Desc.Width < Allocator->GetMaximumAllocationSizeForPooling()/* && ((Desc.Width % (1024 * 64)) != 0)*/;
if (PoolResource)
{
const bool bPlacedResource = (Allocator->GetAllocationStrategy() == EResourceAllocationStrategy::kPlacedResource);
// Ensure we're allocating from the correct pool
if (bPlacedResource)
{
// Writeable resources get separate ID3D12Resource* with their own resource state by using placed resources. Just make sure it's UAV, other flags are free to differ.
check((Desc.Flags & Allocator->GetInitConfig().ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) != 0 || InHeapType == D3D12_HEAP_TYPE_READBACK || Alignment > kD3D12ManualSubAllocationAlignment || InResourceStateMode == ED3D12ResourceStateMode::MultiState);
}
else
{
// Read-only resources get suballocated from big resources, thus share ID3D12Resource* and resource state with other resources. Ensure it's suballocated from a resource with identical flags.
check(Desc.Flags == Allocator->GetInitConfig().ResourceFlags);
}
if (Allocator->TryAllocate(Desc.Width, Alignment, ResourceLocation))
{
if (bPlacedResource)
{
check(ResourceLocation.GetResource() == nullptr);
FD3D12Heap* BackingHeap = ((FD3D12BuddyAllocator*) ResourceLocation.GetAllocator())->GetBackingHeap();
uint64 HeapOffset = ResourceLocation.GetAllocator()->GetAllocationOffsetInBytes(ResourceLocation.GetBuddyAllocatorPrivateData());
FD3D12Resource* NewResource = nullptr;
VERIFYD3D12RESULT(Adapter->CreatePlacedResource(Desc, BackingHeap, HeapOffset, InCreateState, ED3D12ResourceStateMode::MultiState, D3D12_RESOURCE_STATE_TBD, nullptr, &NewResource, Name));
ResourceLocation.SetResource(NewResource);
}
else
{
// Nothing to do for suballocated resources
}
// Successfully sub-allocated
return;
}
}
// Allocate Standalone
// Todo: track stand alone allocations and see how much memory we use by this and how many we have
FD3D12Resource* NewResource = nullptr;
VERIFYD3D12RESULT(Adapter->CreateBuffer(InHeapType, GetGPUMask(), GetVisibilityMask(), InCreateState, InResourceStateMode, Desc.Width, &NewResource, Name, Desc.Flags));
ResourceLocation.AsStandAlone(NewResource, Desc.Width);
}
void FD3D12DefaultBufferPool::UpdateMemoryStats(uint32& IOMemoryAllocated, uint32& IOMemoryUsed, uint32& IOMemoryFree, uint32& IOMemoryEndFree, uint32& IOAlignmentWaste, uint32& IOAllocatedPageCount, uint32& IOFullPageCount)
{
Allocator->UpdateMemoryStats(IOMemoryAllocated, IOMemoryUsed, IOMemoryFree, IOAlignmentWaste, IOAllocatedPageCount, IOFullPageCount);
}
//-----------------------------------------------------------------------------
// Default Buffer Allocator
//-----------------------------------------------------------------------------
FD3D12DefaultBufferAllocator::FD3D12DefaultBufferAllocator(FD3D12Device* InParent, FRHIGPUMask VisibleNodes)
: FD3D12DeviceChild(InParent)
, FD3D12MultiNodeGPUObject(InParent->GetGPUMask(), VisibleNodes)
, TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Default Buffer Allocator")))
{
FMemory::Memset(DefaultBufferPools, 0);
}
FD3D12BufferPool* FD3D12DefaultBufferAllocator::CreateBufferPool(D3D12_HEAP_TYPE InHeapType, D3D12_RESOURCE_FLAGS InResourceFlags, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment)
{
FD3D12Device* Device = GetParentDevice();
FD3D12ResourceInitConfig InitConfig = FD3D12BufferPool::GetResourceAllocatorInitConfig(InHeapType, InResourceFlags, InBufferUsage);
// NNE resources must be in heaps visible on GPU0 only. Required by DirectML. Note that in single GPU mode, GetVisibilityMask() will
// be the same as GPU0(), so no extra heap fragmentation occurs in that case.
FRHIGPUMask VisibleNodes = EnumHasAnyFlags(InBufferUsage, EBufferUsageFlags::NNE) ? FRHIGPUMask::GPU0() : GetVisibilityMask();
#if USE_BUFFER_POOL_ALLOCATOR
const FString Name(L"D3D12 Pool Allocator");
EResourceAllocationStrategy AllocationStrategy = FD3D12PoolAllocator::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment);
uint64 PoolSize = InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_DEFAULT_POOL_SIZE : BUFFER_POOL_DEFAULT_POOL_SIZE;
uint64 PoolAlignment = (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) ? MIN_PLACED_RESOURCE_SIZE : kD3D12ManualSubAllocationAlignment;
uint64 MaxAllocationSize = InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_MAX_ALLOC_SIZE : BUFFER_POOL_DEFAULT_POOL_MAX_ALLOC_SIZE;
FRHIMemoryPool::EFreeListOrder FreeListOrder = FRHIMemoryPool::EFreeListOrder::SortBySize;
// Disable defrag if not Default memory
bool bDefragEnabled = (InitConfig.HeapType == D3D12_HEAP_TYPE_DEFAULT);
#if D3D12_RHI_RAYTRACING
// Disable defrag on the RT Acceleration pool - #kenzo_todo
// Acceleration structure buffers may be created, but not built immediately.
// If CopyRaytracingAccelerationStructure is called on such buffer, the GPU will crash.
if (InitConfig.InitialResourceState == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE)
{
bDefragEnabled = false;
// Use custom pool and allocation size for RT structures because they don't defrag and will thus 'waste' more memory
PoolSize = BUFFER_POOL_RT_ACCELERATION_STRUCTURE_POOL_SIZE;
MaxAllocationSize = BUFFER_POOL_RT_ACCELERATION_STRUCTURE_MAX_ALLOC_SIZE;
}
#endif // D3D12_RHI_RAYTRACING
FD3D12BufferPool* NewPool = new FD3D12PoolAllocator(Device, VisibleNodes, InitConfig, Name, AllocationStrategy, PoolSize, PoolAlignment, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId);
#else // USE_BUFFER_POOL_ALLOCATOR
EResourceAllocationStrategy AllocationStrategy = FD3D12DefaultBufferPool::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment);
// if placed then 64KB alignment required :(
uint32 MinBlockSize = (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource) ? MIN_PLACED_RESOURCE_SIZE : 16;
const FString Name(L"Default Buffer Multi Buddy Allocator");
FD3D12MultiBuddyAllocator* Allocator = new FD3D12MultiBuddyAllocator(Device,
VisibleNodes,
InitConfig,
Name,
AllocationStrategy,
InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_MAX_ALLOC_SIZE : DEFAULT_BUFFER_POOL_MAX_ALLOC_SIZE,
InHeapType == D3D12_HEAP_TYPE_READBACK ? READBACK_BUFFER_POOL_DEFAULT_POOL_SIZE : DEFAULT_BUFFER_POOL_DEFAULT_POOL_SIZE,
MinBlockSize,
TraceHeapId
);
FD3D12DefaultBufferPool* NewPool = new FD3D12DefaultBufferPool(Device, Allocator);
#endif // USE_BUFFER_POOL_ALLOCATOR
DefaultBufferPools.Add(NewPool);
return NewPool;
}
bool FD3D12DefaultBufferAllocator::IsPlacedResource(D3D12_RESOURCE_FLAGS InResourceFlags, ED3D12ResourceStateMode InResourceStateMode, uint32 Alignment)
{
EResourceAllocationStrategy AllocationStrategy = FD3D12BufferPool::GetResourceAllocationStrategy(InResourceFlags, InResourceStateMode, Alignment);
return (AllocationStrategy == EResourceAllocationStrategy::kPlacedResource);
}
D3D12_RESOURCE_STATES FD3D12DefaultBufferAllocator::GetDefaultInitialResourceState(D3D12_HEAP_TYPE InHeapType, EBufferUsageFlags InBufferFlags, ED3D12ResourceStateMode InResourceStateMode)
{
// Validate the create state
if (InHeapType == D3D12_HEAP_TYPE_READBACK)
{
return D3D12_RESOURCE_STATE_COPY_DEST;
}
else if (InHeapType == D3D12_HEAP_TYPE_UPLOAD)
{
return D3D12_RESOURCE_STATE_GENERIC_READ;
}
else if (InBufferFlags == BUF_UnorderedAccess && InResourceStateMode == ED3D12ResourceStateMode::SingleState)
{
check(InHeapType == D3D12_HEAP_TYPE_DEFAULT);
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
#if D3D12_RHI_RAYTRACING
else if (EnumHasAnyFlags(InBufferFlags, BUF_AccelerationStructure))
{
check(InHeapType == D3D12_HEAP_TYPE_DEFAULT);
return D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
}
#endif // D3D12_RHI_RAYTRACING
else
{
return D3D12_RESOURCE_STATE_GENERIC_READ;
}
}
// Grab a buffer from the available buffers or create a new buffer if none are available
void FD3D12DefaultBufferAllocator::AllocDefaultResource(D3D12_HEAP_TYPE InHeapType, const D3D12_RESOURCE_DESC& InResourceDesc, EBufferUsageFlags InBufferUsage, ED3D12ResourceStateMode InResourceStateMode, D3D12_RESOURCE_STATES InCreateState, FD3D12ResourceLocation& ResourceLocation, uint32 Alignment, const TCHAR* Name)
{
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
// Force indirect args to stand alone allocations instead of pooled
if (!GD3D12AllowPoolAllocateIndirectArgBuffers && EnumHasAnyFlags(InBufferUsage, BUF_DrawIndirect))
{
ResourceLocation.Clear();
// NNE resources must be in heaps visible on GPU0 only. Required by DirectML.
FD3D12Resource* NewResource = nullptr;
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(InHeapType, GetGPUMask().GetNative(), EnumHasAnyFlags(InBufferUsage, EBufferUsageFlags::NNE) ? FRHIGPUMask::GPU0().GetNative() : GetVisibilityMask().GetNative());
D3D12_RESOURCE_DESC Desc = InResourceDesc;
Desc.Alignment = 0;
VERIFYD3D12RESULT(Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InCreateState, InResourceStateMode, InCreateState, nullptr, &NewResource, Name, false));
ResourceLocation.AsStandAlone(NewResource, InResourceDesc.Width);
return;
}
if (EnumHasAnyFlags(InBufferUsage, BUF_ReservedResource))
{
ResourceLocation.Clear();
FD3D12Resource* NewResource = nullptr;
checkf(Alignment % GRHIGlobals.ReservedResources.TileSizeInBytes == 0,
TEXT("Reserved buffer alignment is expected to be a multiple of the reserved resource tile size"));
FD3D12ResourceDesc Desc = InResourceDesc;
Desc.Alignment = Alignment;
Desc.bReservedResource = true;
VERIFYD3D12RESULT(Adapter->CreateReservedResource(Desc, GetGPUMask(), InCreateState, InResourceStateMode, InCreateState, nullptr, &NewResource, Name, false));
ResourceLocation.AsStandAlone(NewResource, InResourceDesc.Width);
return;
}
FScopeLock Lock(&CS);
// Patch out deny shader resource because it doesn't add anything for buffers and allows more pool sharing
// TODO: check if this is different on Xbox?
D3D12_RESOURCE_DESC ResourceDesc = InResourceDesc;
ResourceDesc.Flags = ResourceDesc.Flags & (~D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
// Do we already have a default pool which support this allocation?
FD3D12BufferPool* BufferPool = nullptr;
for (FD3D12BufferPool* Pool : DefaultBufferPools)
{
if (Pool->SupportsAllocation(InHeapType, ResourceDesc.Flags, InBufferUsage, InResourceStateMode, Alignment))
{
BufferPool = Pool;
break;
}
}
// No pool yet, then create one
if (BufferPool == nullptr)
{
BufferPool = CreateBufferPool(InHeapType, ResourceDesc.Flags, InBufferUsage, InResourceStateMode, Alignment);
}
// Perform actual allocation
BufferPool->AllocDefaultResource(InHeapType, ResourceDesc, InBufferUsage, InResourceStateMode, InCreateState, Alignment, Name, ResourceLocation);
}
void FD3D12DefaultBufferAllocator::FreeDefaultBufferPools()
{
FScopeLock Lock(&CS);
for (FD3D12BufferPool*& DefaultBufferPool : DefaultBufferPools)
{
if (DefaultBufferPool)
{
// No frame lag, delete all unused pages immediatly
DefaultBufferPool->CleanUpAllocations(0);
delete DefaultBufferPool;
DefaultBufferPool = nullptr;
}
}
}
void FD3D12DefaultBufferAllocator::BeginFrame(FD3D12ContextArray const& Contexts)
{
#if USE_BUFFER_POOL_ALLOCATOR
FScopeLock Lock(&CS);
if (GD3D12VRAMBufferPoolDefrag > 0 && GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame > 0)
{
TRACE_CPUPROFILER_EVENT_SCOPE(DefragBufferPool);
uint32 MaxCopySize = GD3D12VRAMBufferPoolDefragMaxCopySizePerFrame;
uint32 CopySize = 0;
for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools)
{
if (DefaultBufferPool)
{
DefaultBufferPool->Defrag(Contexts, MaxCopySize, CopySize);
// break when we reach the max copy size
if (CopySize >= MaxCopySize)
{
break;
}
}
}
}
{
// @todo dev-pr - explicit use of graphics context - nothing is synchronizing async compute - needs refactor
FD3D12CommandContext& CommandContext = *Contexts[ERHIPipeline::Graphics]->GetSingleDeviceContext(GetParentDevice()->GetGPUIndex());
TRACE_CPUPROFILER_EVENT_SCOPE(FlushPendingBufferCopyOps);
RHI_BREADCRUMB_EVENT(CommandContext, "BufferPoolCopyOps");
for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools)
{
if (DefaultBufferPool)
{
DefaultBufferPool->FlushPendingCopyOps(CommandContext);
}
}
}
#endif // USE_BUFFER_POOL_ALLOCATOR
}
void FD3D12DefaultBufferAllocator::CleanupFreeBlocks(uint64 InFrameLag)
{
FScopeLock Lock(&CS);
for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools)
{
if (DefaultBufferPool)
{
DefaultBufferPool->CleanUpAllocations(InFrameLag);
}
}
}
void FD3D12DefaultBufferAllocator::UpdateMemoryStats()
{
FScopeLock Lock(&CS);
uint32 MemoryAllocated = 0;
uint32 MemoryUsed = 0;
uint32 FreeMemory = 0;
uint32 EndFreeMemory = 0;
uint32 AlignmentWaste = 0;
uint32 AllocatedPageCount = 0;
uint32 FullPageCount = 0;
#if D3D12RHI_TRACK_DETAILED_STATS
for (FD3D12BufferPool* DefaultBufferPool : DefaultBufferPools)
{
if (DefaultBufferPool)
{
DefaultBufferPool->UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount);
}
}
#endif
//check((MemoryUsed + AlignmentWaste + FreeMemory) == MemoryAllocated);
// compute fragmentation percentage stats:
uint32 Fragmentation = FreeMemory - EndFreeMemory;
uint32 FragmentationUsed = MemoryUsed + AlignmentWaste + Fragmentation;
float FragmentationPercentage = FragmentationUsed == 0 ? 0.0 : float(Fragmentation) / float(FragmentationUsed);
SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryAllocated, MemoryAllocated);
SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryUsed, MemoryUsed);
SET_MEMORY_STAT(STAT_D3D12BufferPoolMemoryFree, FreeMemory);
SET_MEMORY_STAT(STAT_D3D12BufferPoolAlignmentWaste, AlignmentWaste);
SET_DWORD_STAT(STAT_D3D12BufferPoolPageCount, AllocatedPageCount);
SET_DWORD_STAT(STAT_D3D12BufferPoolFullPages, FullPageCount);
SET_MEMORY_STAT(STAT_D3D12BufferPoolFragmentation, Fragmentation);
SET_FLOAT_STAT(STAT_D3D12BufferPoolFragmentationPercentage, FragmentationPercentage);
}
//-----------------------------------------------------------------------------
// Texture Allocator
//-----------------------------------------------------------------------------
#if USE_TEXTURE_POOL_ALLOCATOR
FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) :
FD3D12DeviceChild(Device),
FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode),
TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Texture Allocator Pool")))
{
FD3D12ResourceInitConfig SharedInitConfig;
// texture only interesting in VRAM for now
SharedInitConfig.HeapType = D3D12_HEAP_TYPE_DEFAULT;
// unused for textures because placed and not suballocated
SharedInitConfig.ResourceFlags = D3D12_RESOURCE_FLAG_NONE;
SharedInitConfig.InitialResourceState = D3D12_RESOURCE_STATE_COMMON;
EResourceAllocationStrategy AllocationStrategy = EResourceAllocationStrategy::kPlacedResource;
FRHIMemoryPool::EFreeListOrder FreeListOrder = FRHIMemoryPool::EFreeListOrder::SortBySize;
{
FD3D12ResourceInitConfig InitConfig = SharedInitConfig;
InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
const FString Name(L"D3D12 ReadOnly4K Texture Pool Allocator");
uint64 PoolSize = 4 * 1024 * 1024;
uint64 MaxAllocationSize = PoolSize;
bool bDefragEnabled = false; // Disable defrag on 4K pool because it shouldn't really fragment - all allocations are 4K or multiple of 4K and pretty small
FD3D12PoolAllocator* ReadOnly4KPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId);
ReadOnly4KPool->Initialize();
PoolAllocators[(int)EPoolType::ReadOnly4K] = ReadOnly4KPool;
}
{
FD3D12ResourceInitConfig InitConfig = SharedInitConfig;
InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
const FString Name(L"D3D12 ReadOnly Texture Pool Allocator");
uint64 PoolSize = GD3D12PoolAllocatorReadOnlyTextureVRAMPoolSize;
uint64 MaxAllocationSize = GD3D12PoolAllocatorReadOnlyTextureVRAMMaxAllocationSize;
bool bDefragEnabled = true;
FD3D12PoolAllocator* ReadOnlyPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId);
ReadOnlyPool->Initialize();
PoolAllocators[(int)EPoolType::ReadOnly] = ReadOnlyPool;
}
{
FD3D12ResourceInitConfig InitConfig = SharedInitConfig;
InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
const FString Name(L"D3D12 RT Texture Pool Allocator");
uint64 PoolSize = GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize;
uint64 MaxAllocationSize = GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize;
// FD3D12ResourceLocation::OnAllocationMoved doesn't correctly retrieve the clear value when recreating moved resources, so we need to disable defrag for this pool for the time being.
bool bDefragEnabled = false;
FD3D12PoolAllocator* RTPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId);
RTPool->Initialize();
PoolAllocators[(int)EPoolType::RenderTarget] = RTPool;
}
{
FD3D12ResourceInitConfig InitConfig = SharedInitConfig;
InitConfig.HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
const FString Name(L"D3D12 UAV Texture Pool Allocator");
uint64 PoolSize = GD3D12PoolAllocatorRTUAVTextureVRAMPoolSize;
uint64 MaxAllocationSize = GD3D12PoolAllocatorRTUAVTextureVRAMMaxAllocationSize;
// Defrag doesn't correctly handle resources which need the BCn/UINT UAV aliasing workaround, so we'll turn off defrag for this heap for now.
bool bDefragEnabled = false;
FD3D12PoolAllocator* UAVPool = new FD3D12PoolAllocator(Device, GetVisibilityMask(), InitConfig, Name, AllocationStrategy, PoolSize, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, MaxAllocationSize, FreeListOrder, bDefragEnabled, TraceHeapId);
UAVPool->Initialize();
PoolAllocators[(int)EPoolType::UAV] = UAVPool;
}
}
void FD3D12TextureAllocatorPool::Destroy()
{
for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex)
{
PoolAllocators[PoolIndex]->CleanUpAllocations(0);
delete PoolAllocators[PoolIndex];
PoolAllocators[PoolIndex] = nullptr;
}
}
HRESULT FD3D12TextureAllocatorPool::AllocateTexture(
FD3D12ResourceDesc Desc,
const D3D12_CLEAR_VALUE* ClearValue,
EPixelFormat UEFormat,
FD3D12ResourceLocation& TextureLocation,
const D3D12_RESOURCE_STATES InitialState,
const TCHAR* Name)
{
// The top mip level must be less than 64 KB to use 4 KB alignment
bool b4KAligment = FD3D12Texture::CanBe4KAligned(Desc, (EPixelFormat)UEFormat);
Desc.Alignment = b4KAligment ? D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT : (Desc.SampleDesc.Count > 1 ? D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
const D3D12_RESOURCE_ALLOCATION_INFO Info = GetParentDevice()->GetResourceAllocationInfoUncached(Desc);
const bool bIsRenderTarget = EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
const bool bIsReadOnly = !bIsRenderTarget && !EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) && !Desc.NeedsUAVAliasWorkarounds();
ED3D12ResourceStateMode ResourceStateMode = bIsReadOnly ? ED3D12ResourceStateMode::Default : ED3D12ResourceStateMode::MultiState;
EPoolType PoolType = bIsReadOnly ? (b4KAligment ? EPoolType::ReadOnly4K : EPoolType::ReadOnly) : (bIsRenderTarget ? EPoolType::RenderTarget : EPoolType::UAV);
PoolAllocators[(int)PoolType]->AllocateResource(GetParentDevice()->GetGPUIndex(), D3D12_HEAP_TYPE_DEFAULT, Desc, Info.SizeInBytes, Info.Alignment, ResourceStateMode, InitialState, ClearValue, Name, TextureLocation);
return S_OK;
}
void FD3D12TextureAllocatorPool::BeginFrame(FD3D12ContextArray const& Contexts)
{
if (GD3D12VRAMTexturePoolDefrag > 0 && GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame > 0)
{
TRACE_CPUPROFILER_EVENT_SCOPE(DefragTexturePool);
uint32 MaxCopySize = GD3D12VRAMTexturePoolDefragMaxCopySizePerFrame;
uint32 CopySize = 0;
for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex)
{
PoolAllocators[PoolIndex]->Defrag(Contexts, MaxCopySize, CopySize);
}
}
{
// @todo dev-pr - explicit use of graphics context - nothing is synchronizing async compute - needs refactor
FD3D12CommandContext& CommandContext = *Contexts[ERHIPipeline::Graphics]->GetSingleDeviceContext(GetParentDevice()->GetGPUIndex());
TRACE_CPUPROFILER_EVENT_SCOPE(FlushPendingTextureCopyOps);
RHI_BREADCRUMB_EVENT(CommandContext, "TexturePoolCopyOps");
for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex)
{
PoolAllocators[PoolIndex]->FlushPendingCopyOps(CommandContext);
}
}
}
void FD3D12TextureAllocatorPool::CleanUpAllocations()
{
for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex)
{
PoolAllocators[PoolIndex]->CleanUpAllocations(20);
}
}
bool FD3D12TextureAllocatorPool::GetMemoryStats(uint64& OutTotalAllocated, uint64& OutTotalUnused) const
{
uint32 MemoryAllocated = 0;
uint32 MemoryUsed = 0;
uint32 FreeMemory = 0;
uint32 EndFreeMemory = 0;
uint32 AlignmentWaste = 0;
uint32 AllocatedPageCount = 0;
uint32 FullPageCount = 0;
for (uint32 PoolIndex = 0; PoolIndex < (uint32)EPoolType::Count; ++PoolIndex)
{
PoolAllocators[PoolIndex]->UpdateMemoryStats(MemoryAllocated, MemoryUsed, FreeMemory, EndFreeMemory, AlignmentWaste, AllocatedPageCount, FullPageCount);
}
OutTotalAllocated = MemoryAllocated;
OutTotalUnused = FreeMemory;
return true;
}
#elif D3D12RHI_SEGREGATED_TEXTURE_ALLOC
FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) :
FD3D12DeviceChild(Device),
FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode),
ReadOnlyTexturePool(
Device,
VisibilityNode,
D3D12_HEAP_TYPE_DEFAULT,
D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES,
GD3D12ReadOnlyTextureAllocatorMinPoolSize,
GD3D12ReadOnlyTextureAllocatorMinNumToPool,
GD3D12ReadOnlyTextureAllocatorMaxPoolSize)
{
}
HRESULT FD3D12TextureAllocatorPool::AllocateTexture(
FD3D12ResourceDesc Desc,
const D3D12_CLEAR_VALUE* ClearValue,
EPixelFormat UEFormat,
FD3D12ResourceLocation& TextureLocation,
const D3D12_RESOURCE_STATES InitialState,
const TCHAR* Name)
{
HRESULT RetCode = S_OK;
FD3D12Resource* NewResource = nullptr;
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
TextureLocation.Clear();
if (!EnumHasAnyFlags(Desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET|D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL|D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) &&
!Desc.NeedsUAVAliasWorkarounds() &&
// 4K align with NV12 causes a crash on HoloLens 2.
Desc.Format != DXGI_FORMAT_NV12 &&
Desc.SampleDesc.Count == 1)
{
// The top mip level must be less than 64 KB to use 4 KB alignment
Desc.Alignment = FD3D12Texture::CanBe4KAligned(Desc, UEFormat) ?
D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT :
D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
const D3D12_RESOURCE_ALLOCATION_INFO Info = Device->GetResourceAllocationInfoUncached(Desc);
TRefCountPtr<FD3D12SegHeap> BackingHeap;
const uint32 Offset = ReadOnlyTexturePool.Allocate(Info.SizeInBytes, Info.Alignment, BackingHeap);
if (Offset != FD3D12SegListAllocator::InvalidOffset)
{
RetCode = Adapter->CreatePlacedResource(Desc, BackingHeap.GetReference(), Offset, InitialState, ClearValue, &NewResource, Name, false);
if (SUCCEEDED(RetCode))
{
FD3D12SegListAllocatorPrivateData& PrivateData = TextureLocation.GetSegListAllocatorPrivateData();
PrivateData.Offset = Offset;
TextureLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation);
TextureLocation.SetSegListAllocator(&ReadOnlyTexturePool);
TextureLocation.SetSize(Info.SizeInBytes);
TextureLocation.SetOffsetFromBaseOfResource(Offset);
TextureLocation.SetResource(NewResource);
INC_DWORD_STAT(STAT_D3D12TextureAllocatorCount);
}
return RetCode;
}
}
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), GetVisibilityMask().GetNative());
Desc.Alignment = 0;
VERIFYD3D12RESULT(RetCode = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &NewResource, Name, false));
TextureLocation.AsStandAlone(NewResource);
return RetCode;
}
#else
FD3D12TextureAllocator::FD3D12TextureAllocator(FD3D12Device* Device,
FRHIGPUMask VisibleNodes,
const FString& Name,
uint32 HeapSize,
D3D12_HEAP_FLAGS Flags,
HeapId InTraceParentHeapId) :
FD3D12MultiBuddyAllocator(Device,
VisibleNodes,
FD3D12ResourceInitConfig
{
D3D12_HEAP_TYPE_DEFAULT,
Flags | D3D12_HEAP_FLAG_DENY_BUFFERS,
D3D12_RESOURCE_FLAG_NONE,
D3D12_RESOURCE_STATE_GENERIC_READ
},
Name,
EResourceAllocationStrategy::kPlacedResource,
D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
HeapSize,
D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT,
InTraceParentHeapId)
{
}
FD3D12TextureAllocator::~FD3D12TextureAllocator()
{
}
HRESULT FD3D12TextureAllocator::AllocateTexture(FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name)
{
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
HRESULT hr = S_OK;
FD3D12Resource* NewResource = nullptr;
TextureLocation.Clear();
D3D12_RESOURCE_ALLOCATION_INFO Info = Device->GetResourceAllocationInfoUncached(Desc);
if (Info.SizeInBytes < D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT)
{
if (TryAllocate(Info.SizeInBytes, Info.Alignment, TextureLocation))
{
FD3D12Heap* BackingHeap = ((FD3D12BuddyAllocator*)TextureLocation.GetAllocator())->GetBackingHeap();
uint64 HeapOffset = TextureLocation.GetAllocator()->GetAllocationOffsetInBytes(TextureLocation.GetBuddyAllocatorPrivateData());
hr = Adapter->CreatePlacedResource(Desc, BackingHeap, HeapOffset, InitialState, ClearValue, &NewResource, Name, false);
TextureLocation.SetType(FD3D12ResourceLocation::ResourceLocationType::eSubAllocation);
TextureLocation.SetResource(NewResource);
return hr;
}
}
// Request default alignment for stand alone textures
Desc.Alignment = 0;
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), (uint32)GetVisibilityMask().GetNative());
hr = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &NewResource, Name, false);
TextureLocation.AsStandAlone(NewResource, Info.SizeInBytes);
return hr;
}
FD3D12TextureAllocatorPool::FD3D12TextureAllocatorPool(FD3D12Device* Device, FRHIGPUMask VisibilityNode) :
FD3D12DeviceChild(Device),
FD3D12MultiNodeGPUObject(Device->GetGPUMask(), VisibilityNode),
TraceHeapId(MemoryTrace_HeapSpec(EMemoryTraceRootHeap::VideoMemory, TEXT("Texture Allocator Pool"))),
ReadOnlyTexturePool(Device, VisibilityNode, FString(L"Small Non-DS/RT Texture allocator"), TEXTURE_POOL_SIZE, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES, TraceHeapId)
{};
HRESULT FD3D12TextureAllocatorPool::AllocateTexture(FD3D12ResourceDesc Desc, const D3D12_CLEAR_VALUE* ClearValue, EPixelFormat UEFormat, FD3D12ResourceLocation& TextureLocation, const D3D12_RESOURCE_STATES InitialState, const TCHAR* Name)
{
if (FD3D12Texture::CanBe4KAligned(Desc, (EPixelFormat)UEFormat))
{
Desc.Alignment = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; // request 4k alignment
return ReadOnlyTexturePool.AllocateTexture(Desc, ClearValue, TextureLocation, InitialState, Name);
}
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12Resource* Resource = nullptr;
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, GetGPUMask().GetNative(), GetVisibilityMask().GetNative());
const D3D12_RESOURCE_ALLOCATION_INFO Info = GetParentDevice()->GetResourceAllocationInfoUncached(Desc);
HRESULT hr = Adapter->CreateCommittedResource(Desc, GetGPUMask(), HeapProps, InitialState, ClearValue, &Resource, Name, false);
if (SUCCEEDED(hr))
{
TextureLocation.AsStandAlone(Resource, Info.SizeInBytes);
}
return hr;
}
#endif
//-----------------------------------------------------------------------------
// Fast Allocation
//-----------------------------------------------------------------------------
FD3D12FastAllocator::FD3D12FastAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, D3D12_HEAP_TYPE InHeapType, uint32 PageSize)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask)
, PagePool(Parent, VisibiltyMask, InHeapType, PageSize)
, CurrentAllocatorPage(nullptr)
{}
FD3D12FastAllocator::FD3D12FastAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, const D3D12_HEAP_PROPERTIES& InHeapProperties, uint32 PageSize)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask)
, PagePool(Parent, VisibiltyMask, InHeapProperties, PageSize)
, CurrentAllocatorPage(nullptr)
{}
void* FD3D12FastAllocator::Allocate(uint32 Size, uint32 Alignment, class FD3D12ResourceLocation* ResourceLocation)
{
// Check to make sure our assumption that we don't need a ResourceLocation->Clear() here is valid.
checkf(!ResourceLocation->IsValid(), TEXT("The supplied resource location already has a valid resource. You should Clear() it first or it may leak."));
if (Size > PagePool.GetPageSize())
{
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
// If upload memory then fallback to the shader upload heap allocator which support dynamic sized allocation of bigger sizes
if (PagePool.GetHeapType() == D3D12_HEAP_TYPE_UPLOAD)
{
return Adapter->GetUploadHeapAllocator(GetGPUMask().ToIndex()).AllocUploadResource(Size, Alignment, *ResourceLocation);
}
//Allocations are 64k aligned
if (Alignment)
{
Alignment = (D3D_BUFFER_ALIGNMENT % Alignment) == 0 ? 0 : Alignment;
}
FD3D12Resource* Resource = nullptr;
FString ResourceName;
#if NAME_OBJECTS
static int64 ID = 0;
const int64 UniqueID = FPlatformAtomics::InterlockedIncrement(&ID);
ResourceName = FString::Printf(TEXT("Stand Alone Fast Allocation %lld"), UniqueID);
#endif
VERIFYD3D12RESULT(Adapter->CreateBuffer(PagePool.GetHeapType(), GetGPUMask(), GetVisibilityMask(), Size + Alignment, &Resource, *ResourceName));
ResourceLocation->AsStandAlone(Resource, Size + Alignment);
return PagePool.IsCPUWritable() ? Resource->GetResourceBaseAddress() : nullptr;
}
else
{
FD3D12ScopeLock Lock(&CS);
const uint32 Offset = (CurrentAllocatorPage) ? CurrentAllocatorPage->NextFastAllocOffset : 0;
uint32 CurrentOffset = AlignArbitrary(Offset, Alignment);
// See if there is room in the current pool
if (CurrentAllocatorPage == nullptr || PagePool.GetPageSize() < CurrentOffset + Size)
{
if (CurrentAllocatorPage)
{
PagePool.ReturnFastAllocatorPage(CurrentAllocatorPage);
}
CurrentAllocatorPage = PagePool.RequestFastAllocatorPage();
CurrentOffset = AlignArbitrary(CurrentAllocatorPage->NextFastAllocOffset, Alignment);
}
check(PagePool.GetPageSize() - Size >= CurrentOffset);
// Create a FD3D12ResourceLocation representing a sub-section of the pool resource
ResourceLocation->AsFastAllocation(CurrentAllocatorPage->FastAllocBuffer.GetReference(),
Size,
CurrentAllocatorPage->FastAllocBuffer->GetGPUVirtualAddress(),
CurrentAllocatorPage->FastAllocData,
0,
CurrentOffset);
CurrentAllocatorPage->NextFastAllocOffset = CurrentOffset + Size;
CurrentAllocatorPage->UpdateFence();
check(ResourceLocation->GetMappedBaseAddress());
return ResourceLocation->GetMappedBaseAddress();
}
}
void FD3D12FastAllocator::CleanupPages(uint64 FrameLag)
{
FD3D12ScopeLock Lock(&CS);
PagePool.CleanupPages(FrameLag);
}
void FD3D12FastAllocator::Destroy()
{
FD3D12ScopeLock Lock(&CS);
if (CurrentAllocatorPage)
{
PagePool.ReturnFastAllocatorPage(CurrentAllocatorPage);
CurrentAllocatorPage = nullptr;
}
PagePool.Destroy();
}
FD3D12FastAllocatorPagePool::FD3D12FastAllocatorPagePool(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, D3D12_HEAP_TYPE InHeapType, uint32 Size)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask)
, PageSize(Size)
, HeapProperties(CD3DX12_HEAP_PROPERTIES(InHeapType, Parent->GetGPUMask().GetNative(), VisibiltyMask.GetNative()))
{};
FD3D12FastAllocatorPagePool::FD3D12FastAllocatorPagePool(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask, const D3D12_HEAP_PROPERTIES& InHeapProperties, uint32 Size)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask)
, PageSize(Size)
, HeapProperties(InHeapProperties)
{};
FD3D12FastAllocatorPage* FD3D12FastAllocatorPagePool::RequestFastAllocatorPage()
{
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
FD3D12ManualFence& Fence = Adapter->GetFrameFence();
const uint64 CompletedFence = Fence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true);
for (int32 Index = 0; Index < Pool.Num(); Index++)
{
FD3D12FastAllocatorPage* Page = Pool[Index];
//If the GPU is done with it and no-one has a lock on it
if (Page->FastAllocBuffer->GetRefCount() == 1 &&
Page->FrameFence <= CompletedFence)
{
Page->Reset();
Pool.RemoveAt(Index);
return Page;
}
}
FD3D12FastAllocatorPage* Page = new FD3D12FastAllocatorPage(PageSize);
const D3D12_RESOURCE_STATES InitialState = DetermineInitialResourceState(HeapProperties.Type, &HeapProperties);
VERIFYD3D12RESULT(Adapter->CreateBuffer(HeapProperties, GetGPUMask(), InitialState, ED3D12ResourceStateMode::SingleState, InitialState, PageSize, Page->FastAllocBuffer.GetInitReference(), TEXT("Fast Allocator Page")));
Page->FastAllocBuffer->DoNotDeferDelete();
Page->FastAllocData = Page->FastAllocBuffer->Map();
return Page;
}
FD3D12FastAllocatorPage::~FD3D12FastAllocatorPage()
{
#if UE_MEMORY_TRACE_ENABLED
// Matches MemoryTrace_Alloc issued from CreateBuffer call inside RequestFastAllocatorPage() function
MemoryTrace_Free(FastAllocBuffer->GetGPUVirtualAddress(), EMemoryTraceRootHeap::VideoMemory);
#endif
}
void FD3D12FastAllocatorPage::UpdateFence()
{
// Fence value must be updated every time the page is used to service an allocation.
// Max() is required as fast allocator may be used from Render or RHI thread,
// which have different fence values. See FD3D12ManualFence::GetCurrentFence() implementation.
FD3D12Adapter* Adapter = FastAllocBuffer->GetParentDevice()->GetParentAdapter();
FrameFence = FMath::Max(FrameFence, Adapter->GetFrameFence().GetNextFenceToSignal());
}
void FD3D12FastAllocatorPagePool::ReturnFastAllocatorPage(FD3D12FastAllocatorPage* Page)
{
// TODO: AFR has been removed, but I don't understand the comment, so I'm leaving it. What did this code have to do with AFR?
// Extend the lifetime of these resources when in AFR as other nodes might be relying on this
Page->UpdateFence();
Pool.Add(Page);
}
void FD3D12FastAllocatorPagePool::CleanupPages(uint64 FrameLag)
{
if (Pool.Num() <= GD3D12FastAllocatorMinPagesToRetain)
{
return;
}
FD3D12Adapter* Adapter = GetParentDevice()->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
const uint64 CompletedFence = FrameFence.GetCompletedFenceValue(/* bUpdateCachedFenceValue */ true);
// Pages get returned to end of list, so we'll look for pages to delete, starting from the LRU
for (int32 Index = 0; Index < Pool.Num(); Index++)
{
FD3D12FastAllocatorPage* Page = Pool[Index];
//If the GPU is done with it and no-one has a lock on it
if (Page->FastAllocBuffer->GetRefCount() == 1 &&
Page->FrameFence + FrameLag <= CompletedFence)
{
Pool.RemoveAt(Index);
delete(Page);
// Only release at most one page per frame
return;
}
}
}
void FD3D12FastAllocatorPagePool::Destroy()
{
for (int32 i = 0; i < Pool.Num(); i++)
{
//check(Pool[i]->FastAllocBuffer->GetRefCount() == 1);
{
FD3D12FastAllocatorPage *Page = Pool[i];
delete(Page);
Page = nullptr;
}
}
Pool.Empty();
}
FD3D12FastConstantAllocator::FD3D12FastConstantAllocator(FD3D12Device* Parent, FRHIGPUMask VisibiltyMask)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibiltyMask)
, UnderlyingResource(Parent)
, Offset(GD3D12FastConstantAllocatorPageSize) // Initial offset is at end of page so that first Allocate() call triggers a page allocation
, PageSize(GD3D12FastConstantAllocatorPageSize)
{
check(PageSize % D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT == 0);
}
void* FD3D12FastConstantAllocator::Allocate(uint32 Bytes, FD3D12ResourceLocation& OutLocation, FD3D12ConstantBufferView* OutCBView)
{
check(Bytes <= PageSize);
const uint32 AlignedSize = Align(Bytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
if (Offset + AlignedSize > PageSize)
{
Offset = 0;
FD3D12Device* Device = GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
FD3D12UploadHeapAllocator& Allocator = Adapter->GetUploadHeapAllocator(Device->GetGPUIndex());
Allocator.AllocFastConstantAllocationPage(PageSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, UnderlyingResource);
}
OutLocation.AsFastAllocation(UnderlyingResource.GetResource(),
AlignedSize,
UnderlyingResource.GetGPUVirtualAddress(),
UnderlyingResource.GetMappedBaseAddress(),
UnderlyingResource.GetOffsetFromBaseOfResource(), // AllocUploadResource returns a suballocated resource where we're suballocating (again) from
Offset);
if (OutCBView)
{
OutCBView->CreateView(&UnderlyingResource, Offset, AlignedSize);
}
Offset += AlignedSize;
return OutLocation.GetMappedBaseAddress();
}
FD3D12SegHeap* FD3D12SegList::CreateBackingHeap(
FD3D12Device* Parent,
FRHIGPUMask VisibleNodeMask,
D3D12_HEAP_TYPE HeapType,
D3D12_HEAP_FLAGS HeapFlags)
{
// CS can be unlocked at this point and re-locked before adding it to FreeHeaps
// but doing so may cause multiple heaps to be created
ID3D12Heap* D3DHeap = nullptr;
D3D12_HEAP_DESC Desc = {};
Desc.SizeInBytes = HeapSize;
Desc.Properties = CD3DX12_HEAP_PROPERTIES(HeapType, Parent->GetGPUMask().GetNative(), VisibleNodeMask.GetNative());
Desc.Flags = HeapFlags;
if (Parent->GetParentAdapter()->IsHeapNotZeroedSupported())
{
Desc.Flags |= FD3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
}
VERIFYD3D12RESULT(Parent->GetDevice()->CreateHeap(&Desc, IID_PPV_ARGS(&D3DHeap)));
FD3D12SegHeap* Ret = new FD3D12SegHeap(Parent, VisibleNodeMask, D3DHeap, HeapSize, this, FreeHeaps.Num());
FreeHeaps.Add(Ret);
return Ret;
}
FD3D12SegListAllocator::FD3D12SegListAllocator(
FD3D12Device* Parent,
FRHIGPUMask VisibilityMask,
D3D12_HEAP_TYPE InHeapType,
D3D12_HEAP_FLAGS InHeapFlags,
uint32 InMinPoolSize,
uint32 InMinNumToPool,
uint32 InMaxPoolSize)
: FD3D12DeviceChild(Parent)
, FD3D12MultiNodeGPUObject(Parent->GetGPUMask(), VisibilityMask)
, HeapType(InHeapType)
, HeapFlags(InHeapFlags)
, MinPoolSize(InMinPoolSize)
, MinNumToPool(InMinNumToPool)
, MaxPoolSize(InMaxPoolSize)
#if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE
, TotalBytesRequested(0)
#endif
{}
void FD3D12SegListAllocator::Deallocate(
FD3D12Resource* PlacedResource,
uint32 Offset,
uint32 SizeInBytes)
{
FD3D12Device* Device = this->GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
uint64 CurFenceValue = Adapter->GetFrameFence().GetNextFenceToSignal();
{
FScopeLock Lock(&DeferredDeletionCS);
check(FenceValues.Num() == DeferredDeletionQueue.Num());
check(!FenceValues.Num() || FenceValues.Last() <= CurFenceValue);
int32 LastIdx = FenceValues.Num() - 1;
if (LastIdx < 0 || FenceValues[LastIdx] != CurFenceValue)
{
++LastIdx;
FenceValues.Add(CurFenceValue);
DeferredDeletionQueue.AddDefaulted();
}
new (DeferredDeletionQueue[LastIdx]) FRetiredBlock(PlacedResource, Offset, SizeInBytes);
}
DEC_DWORD_STAT(STAT_D3D12TextureAllocatorCount);
}
template <typename AllocX, typename AllocY>
void FD3D12SegListAllocator::FreeRetiredBlocks(TArray<TArray<FRetiredBlock, AllocX>, AllocY>& PendingDeletes)
{
for (int32 Y = 0; Y < PendingDeletes.Num(); ++Y)
{
TArray<FRetiredBlock>& RetiredBlocks = PendingDeletes[Y];
for (int32 X = 0; X < RetiredBlocks.Num(); ++X)
{
FRetiredBlock& Block = RetiredBlocks[X];
if (ensureAlwaysMsgf(Block.PlacedResource->GetRefCount() == 1, TEXT("Invalid refcount while releasing %s"), *Block.PlacedResource->GetName().ToString()))
{
FD3D12SegHeap* BackingHeap = static_cast<FD3D12SegHeap*>(Block.PlacedResource->GetHeap());
Block.PlacedResource->Release();
FD3D12SegList* Owner = BackingHeap->OwnerList;
check(!!Owner);
Owner->FreeBlock(BackingHeap, Block.Offset);
OnFree(Block.Offset, BackingHeap, Block.ResourceSize);
}
}
}
}
void FD3D12SegListAllocator::CleanUpAllocations()
{
TArray<TArray<FRetiredBlock>, TInlineAllocator<1>> PendingDeletes;
{
int32 NumToRemove = 0;
FD3D12Device* Device = this->GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
FD3D12ManualFence& FrameFence = Adapter->GetFrameFence();
FScopeLock Lock(&DeferredDeletionCS);
for (int32 Idx = 0; Idx < DeferredDeletionQueue.Num(); ++Idx)
{
if (FrameFence.IsFenceComplete(FenceValues[Idx], /* bUpdateCachedFenceValue */ false))
{
++NumToRemove;
PendingDeletes.Add(MoveTemp(DeferredDeletionQueue[Idx]));
}
else
{
break;
}
}
if (!!NumToRemove)
{
FenceValues.RemoveAt(0, NumToRemove);
DeferredDeletionQueue.RemoveAt(0, NumToRemove);
}
}
FreeRetiredBlocks(PendingDeletes);
}
void FD3D12SegListAllocator::Destroy()
{
{
FScopeLock Lock(&DeferredDeletionCS);
check(FenceValues.Num() == DeferredDeletionQueue.Num());
FreeRetiredBlocks(DeferredDeletionQueue);
FenceValues.Empty();
DeferredDeletionQueue.Empty();
VerifyEmpty();
}
{
FRWScopeLock Lock(SegListsRWLock, SLT_Write);
for (auto& Pair : SegLists)
{
FD3D12SegList*& SegList = Pair.Value;
check(!!SegList);
delete SegList;
SegList = nullptr;
}
SegLists.Empty();
}
}
#if D3D12RHI_SEGLIST_ALLOC_TRACK_WASTAGE
void FD3D12SegListAllocator::VerifyEmpty()
{
FScopeLock Lock(&SegListTrackedAllocationCS);
if(SegListTrackedAllocations.Num() != 0)
{
UE_LOG(LogD3D12RHI, Warning, TEXT("Dumping leaked SegListAllocations\n"));
for (FD3D12SegListAllocatorLeakTrack& LeakTrack : SegListTrackedAllocations)
{
DumpStack(LeakTrack);
}
}
ensureMsgf(TotalBytesRequested == 0,
TEXT("FD3D12SegListAllocator contains %lld allocated bytes but is expected to be empty. This likely means a memory leak. Use d3d12.SegListTrackLeaks=1 CVar to print allocations to the log."),
(uint64)TotalBytesRequested);
}
void FD3D12SegListAllocator::DumpStack(const FD3D12SegListAllocatorLeakTrack& LeakTrack)
{
UE_LOG(LogD3D12RHI, Warning, TEXT("Leaking Allocation Heap %p Offset %d\nStack Dump\n"), LeakTrack.Heap, LeakTrack.Offset);
for(uint32 Index = 0; Index < LeakTrack.StackDepth; ++Index)
{
const size_t STRING_SIZE = 16 * 1024;
ANSICHAR StackTrace[STRING_SIZE];
StackTrace[0] = 0;
FPlatformStackWalk::ProgramCounterToHumanReadableString(Index, LeakTrack.Stack[Index], StackTrace, STRING_SIZE, 0);
UE_LOG(LogD3D12RHI, Warning, TEXT("%d %S\n"), Index, StackTrace);
}
}
void FD3D12SegListAllocator::OnAlloc(uint32 Offset, void* Heap, uint32 Size)
{
TotalBytesRequested += Size;
if(GD3D12SegListTrackLeaks == 0)
return;
FD3D12SegListAllocatorLeakTrack LeakTrack;
LeakTrack.Offset = Offset;
LeakTrack.Heap = Heap;
LeakTrack.Size = Size;
LeakTrack.StackDepth = FPlatformStackWalk::CaptureStackBackTrace(&LeakTrack.Stack[0], D3D12RHI_SEGLIST_ALLOC_TRACK_LEAK_STACK_DEPTH);
FScopeLock Lock(&SegListTrackedAllocationCS);
check(!SegListTrackedAllocations.Contains(LeakTrack));
SegListTrackedAllocations.Add(LeakTrack);
}
void FD3D12SegListAllocator::OnFree(uint32 Offset, void* Heap, uint32 Size)
{
TotalBytesRequested -= Size;
if (GD3D12SegListTrackLeaks == 0)
return;
FD3D12SegListAllocatorLeakTrack LeakTrack;
LeakTrack.Offset = Offset;
LeakTrack.Heap = Heap;
FScopeLock Lock(&SegListTrackedAllocationCS);
FD3D12SegListAllocatorLeakTrack* Element = SegListTrackedAllocations.Find(LeakTrack);
check(Element); // element being freed was not found.
if(Element->Size != Size)
{
UE_LOG(LogD3D12RHI, Warning, TEXT("Mismatched alloc/free size %d != %d, %p/%08x"), Element->Size, Size, Element->Heap, Element->Offset);
DumpStack(*Element);
check(0); //element being freed had incorrect size.
}
SegListTrackedAllocations.Remove(LeakTrack);
check(!SegListTrackedAllocations.Contains(LeakTrack));
}
#endif