Files
UnrealEngine/Engine/Source/Runtime/D3D12RHI/Private/D3D12TransientResourceAllocator.cpp
2025-05-18 13:04:45 +08:00

248 lines
11 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "D3D12TransientResourceAllocator.h"
#include "D3D12RHIPrivate.h"
#include "D3D12Stats.h"
#include "ProfilingDebugging/MemoryTrace.h"
#include "ProfilingDebugging/AssetMetadataTrace.h"
D3D12_RESOURCE_STATES GetInitialResourceState(const D3D12_RESOURCE_DESC& InDesc, ED3D12QueueType QueueType)
{
// Validate the creation state
D3D12_RESOURCE_STATES State = D3D12_RESOURCE_STATE_COMMON;
if (EnumHasAnyFlags(InDesc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) && QueueType == ED3D12QueueType::Direct)
{
State = D3D12_RESOURCE_STATE_RENDER_TARGET;
}
else if (EnumHasAnyFlags(InDesc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) && QueueType == ED3D12QueueType::Direct)
{
State = D3D12_RESOURCE_STATE_DEPTH_WRITE;
}
else if (EnumHasAnyFlags(InDesc.Flags, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS))
{
State = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
return State;
}
FD3D12TransientHeap::FD3D12TransientHeap(const FInitializer& Initializer, FD3D12Adapter* Adapter, FD3D12Device* Device, FRHIGPUMask VisibleNodeMask)
: FRHITransientHeap(Initializer)
{
const static FLazyName D3D12TransientHeapName(TEXT("FD3D12TransientHeap"));
UE_TRACE_METADATA_SCOPE_ASSET_FNAME(D3D12TransientHeapName, D3D12TransientHeapName, NAME_None);
D3D12_HEAP_FLAGS HeapFlags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES;
if (Initializer.Flags != ERHITransientHeapFlags::AllowAll)
{
switch (Initializer.Flags)
{
case ERHITransientHeapFlags::AllowBuffers:
HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
break;
case ERHITransientHeapFlags::AllowTextures:
HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
break;
case ERHITransientHeapFlags::AllowRenderTargets:
HeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
break;
}
}
D3D12_HEAP_PROPERTIES HeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
HeapProperties.CreationNodeMask = FRHIGPUMask::FromIndex(Device->GetGPUIndex()).GetNative();
HeapProperties.VisibleNodeMask = VisibleNodeMask.GetNative();
D3D12_HEAP_DESC Desc = {};
Desc.SizeInBytes = Initializer.Size;
Desc.Properties = HeapProperties;
Desc.Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT;
Desc.Flags = HeapFlags;
if (Adapter->IsHeapNotZeroedSupported())
{
Desc.Flags |= FD3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
}
ID3D12Heap* D3DHeap = nullptr;
{
ID3D12Device* D3DDevice = Device->GetDevice();
LLM_PLATFORM_SCOPE(ELLMTag::GraphicsPlatform);
VERIFYD3D12RESULT(D3DDevice->CreateHeap(&Desc, IID_PPV_ARGS(&D3DHeap)));
#if PLATFORM_WINDOWS
// On Windows there is no way to hook into the low level d3d allocations and frees.
// This means that we must manually add the tracking here.
LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelAlloc(ELLMTracker::Platform, D3DHeap, Desc.SizeInBytes, ELLMTag::GraphicsPlatform));
MemoryTrace_Alloc((uint64)D3DHeap, Desc.SizeInBytes, 0, EMemoryTraceRootHeap::VideoMemory);
// Boost priority to make sure it's not paged out
TRefCountPtr<ID3D12Device5> D3DDevice5;
if (SUCCEEDED(D3DDevice->QueryInterface(IID_PPV_ARGS(D3DDevice5.GetInitReference()))))
{
Adapter->SetResidencyPriority(D3DHeap, D3D12_RESIDENCY_PRIORITY_HIGH, Device->GetGPUIndex());
}
#endif // PLATFORM_WINDOWS
}
Heap = new FD3D12Heap(Device, VisibleNodeMask);
Heap->SetHeap(D3DHeap, TEXT("TransientResourceAllocator Backing Heap"), true, true);
Heap->SetIsTransient(true);
// UE-174791: we seem to have a bug related to residency where transient heaps are evicted, but are not restored correctly before a resource
// is needed, leading to GPU page faults like this one:
//
// PageFault: Found 1 active heaps containing page fault address
// GPU Address : "0x1008800000" - Size : 128.00 MB - Name : TransientResourceAllocator Backing Heap
//
// We don't really need to evict these heaps anyway, since they are used throughout the frame, and are garbage-collected after a few frames
// when they're no longer needed. Disabling residency tracking will not fix the underlying bug, but should make it less likely to occur,
// and might make the GPU crash data more useful when it does happen.
//Heap->BeginTrackingResidency(Desc.SizeInBytes);
Heap->DisallowTrackingResidency(); // Remove this when the above workaround is not needed
SetGpuVirtualAddress(Heap->GetGPUVirtualAddress());
INC_MEMORY_STAT_BY(STAT_D3D12TransientHeaps, Desc.SizeInBytes);
INC_MEMORY_STAT_BY(STAT_D3D12MemoryCurrentTotal, Desc.SizeInBytes);
}
FD3D12TransientHeap::~FD3D12TransientHeap()
{
if (Heap)
{
D3D12_HEAP_DESC Desc = Heap->GetHeapDesc();
DEC_MEMORY_STAT_BY(STAT_D3D12TransientHeaps, Desc.SizeInBytes);
DEC_MEMORY_STAT_BY(STAT_D3D12MemoryCurrentTotal, Desc.SizeInBytes);
#if PLATFORM_WINDOWS
LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelFree(ELLMTracker::Platform, Heap->GetHeap()));
MemoryTrace_Free((uint64)Heap->GetHeap(), EMemoryTraceRootHeap::VideoMemory);
#endif
Heap->DeferDelete();
}
}
TUniquePtr<FD3D12TransientHeapCache> FD3D12TransientHeapCache::Create(FD3D12Adapter* ParentAdapter)
{
FRHITransientHeapCache::FInitializer Initializer = FRHITransientHeapCache::FInitializer::CreateDefault();
// Tier2 hardware is able to mix resource types onto the same heap.
Initializer.bSupportsAllHeapFlags = ParentAdapter->GetResourceHeapTier() == D3D12_RESOURCE_HEAP_TIER_2;
return TUniquePtr<FD3D12TransientHeapCache>(new FD3D12TransientHeapCache(Initializer, ParentAdapter));
}
FD3D12TransientHeapCache::FD3D12TransientHeapCache(const FRHITransientHeapCache::FInitializer& Initializer, FD3D12Adapter* ParentAdapter)
: FRHITransientHeapCache(Initializer)
, FD3D12AdapterChild(ParentAdapter)
{}
FRHITransientHeap* FD3D12TransientHeapCache::CreateHeap(const FRHITransientHeap::FInitializer& HeapInitializer)
{
// If heap is flagged for NNE buffers, make it visible on first GPU only. Required by DirectML.
FRHIGPUMask VisibleNodeMask = HeapInitializer.Flags == ERHITransientHeapFlags::AllowNNEBuffers ? FRHIGPUMask::GPU0() : FRHIGPUMask::All();
return GetParentAdapter()->CreateLinkedObject<FD3D12TransientHeap>(VisibleNodeMask, [&](FD3D12Device* Device, FD3D12TransientHeap* FirstLinkedObject)
{
return new FD3D12TransientHeap(HeapInitializer, GetParentAdapter(), Device, VisibleNodeMask);
});
}
FD3D12TransientResourceHeapAllocator::FD3D12TransientResourceHeapAllocator(FD3D12TransientHeapCache& InHeapCache)
: FRHITransientResourceHeapAllocator(InHeapCache)
, FD3D12AdapterChild(InHeapCache.GetParentAdapter())
, AllocationInfoQueryDevice(GetParentAdapter()->GetDevice(0))
{}
FRHITransientTexture* FD3D12TransientResourceHeapAllocator::CreateTexture(const FRHITextureCreateInfo& InCreateInfo, const TCHAR* InDebugName, const FRHITransientAllocationFences& Fences)
{
FD3D12DynamicRHI* DynamicRHI = FD3D12DynamicRHI::GetD3DRHI();
FD3D12ResourceDesc Desc = DynamicRHI->GetResourceDesc(InCreateInfo);
D3D12_RESOURCE_ALLOCATION_INFO Info = AllocationInfoQueryDevice->GetResourceAllocationInfo(Desc);
Info.Alignment = FMath::Max<uint32>(D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, Info.Alignment);
return CreateTextureInternal(InCreateInfo, InDebugName, Fences, Info.SizeInBytes, Info.Alignment,
[&](const FRHITransientHeap::FResourceInitializer& Initializer)
{
FD3D12TransientHeap& Heap = static_cast<FD3D12TransientHeap&>(Initializer.Heap);
return CreateTransientResource<FRHITransientTexture>([DynamicRHI, Adapter = GetParentAdapter(), &Heap, Allocation = Initializer.Allocation, Desc, InCreateInfo, InDebugName]
{
TRACE_CPUPROFILER_EVENT_SCOPE(AllocatePlacedTexture);
FResourceAllocatorAdapter ResourceAllocatorAdapter(Adapter, Heap, Allocation, Desc);
FRHITextureCreateDesc CreateDesc(InCreateInfo, ERHIAccess::Discard, InDebugName);
FD3D12Texture* Texture = DynamicRHI->CreateD3D12Texture(CreateDesc, &ResourceAllocatorAdapter);
return FRHITransientResource::FResourceTaskResult{ Texture, ResourceAllocatorAdapter.GpuVirtualAddress };
}, Initializer.Hash, Info.SizeInBytes, InCreateInfo);
});
}
void FD3D12TransientResourceHeapAllocator::FResourceAllocatorAdapter::AllocateResource(
uint32 GPUIndex, D3D12_HEAP_TYPE, const FD3D12ResourceDesc& InDesc, uint64 InSize, uint32, ED3D12ResourceStateMode InResourceStateMode,
D3D12_RESOURCE_STATES InCreateState, const D3D12_CLEAR_VALUE* InClearValue, const TCHAR* InName, FD3D12ResourceLocation& ResourceLocation)
{
// The D3D12_RESOURCE_DESC's are built in two different functions right now. This checks that they actually match what we expect.
#if DO_CHECK
{
CD3DX12_RESOURCE_DESC CreatedDesc(InDesc);
CD3DX12_RESOURCE_DESC DerivedDesc(Desc);
check(CreatedDesc == DerivedDesc);
}
#endif
FD3D12Resource* NewResource = nullptr;
FD3D12Adapter* Adapter = GetParentAdapter();
VERIFYD3D12RESULT(Adapter->CreatePlacedResource(InDesc, Heap.GetLinkedObject(GPUIndex)->Get(), Allocation.Offset, InCreateState, InResourceStateMode, D3D12_RESOURCE_STATE_TBD, InClearValue, &NewResource, InName));
check(!ResourceLocation.IsValid());
ResourceLocation.AsHeapAliased(NewResource);
ResourceLocation.SetSize(InSize);
ResourceLocation.SetTransient(true);
GpuVirtualAddress = NewResource->GetGPUVirtualAddress();
#if TRACK_RESOURCE_ALLOCATIONS
if (Adapter->IsTrackingAllAllocations())
{
bool bCollectCallstack = false;
Adapter->TrackAllocationData(&ResourceLocation, Allocation.Size, bCollectCallstack);
}
#endif
}
FRHITransientBuffer* FD3D12TransientResourceHeapAllocator::CreateBuffer(const FRHIBufferDesc& InCreateInfo, const TCHAR* InDebugName, const FRHITransientAllocationFences& Fences)
{
const FRHIBufferCreateDesc CreateDesc =
FRHIBufferCreateDesc::Create(InDebugName, InCreateInfo)
.SetInitialState(ERHIAccess::Discard);
D3D12_RESOURCE_DESC Desc{};
uint32 Alignment{};
FD3D12Buffer::GetResourceDescAndAlignment(CreateDesc, Desc, Alignment);
Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
uint64 Size = Align(Desc.Width, Alignment);
return CreateBufferInternal(CreateDesc, InDebugName, Fences, Size, Alignment,
[&](const FRHITransientHeap::FResourceInitializer& Initializer)
{
FD3D12TransientHeap& Heap = static_cast<FD3D12TransientHeap&>(Initializer.Heap);
return CreateTransientResource<FRHITransientBuffer>([Adapter = GetParentAdapter(), &Heap, Allocation = Initializer.Allocation, Desc, CreateDesc, InDebugName]
{
TRACE_CPUPROFILER_EVENT_SCOPE(AllocatePlacedBuffer);
FResourceAllocatorAdapter ResourceAllocatorAdapter(Adapter, Heap, Allocation, Desc);
FRHIBuffer* Buffer = FD3D12DynamicRHI::GetD3DRHI()->CreateD3D12Buffer(CreateDesc, &ResourceAllocatorAdapter);
return FRHITransientResource::FResourceTaskResult{ Buffer, ResourceAllocatorAdapter.GpuVirtualAddress };
}, Initializer.Hash, Size, CreateDesc);
});
}