Files
UnrealEngine/Engine/Source/Runtime/D3D12RHI/Private/D3D12Device.h
2025-05-18 13:04:45 +08:00

382 lines
14 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
D3D12Device.h: D3D12 Device Interfaces
=============================================================================*/
#pragma once
#include "RHIBreadcrumbs.h"
#include "RHIDiagnosticBuffer.h"
#include "D3D12BindlessDescriptors.h"
#include "D3D12CommandContext.h"
#include "D3D12Descriptors.h"
#include "D3D12Query.h"
#include "D3D12Queue.h"
#include "D3D12Resources.h"
#include "D3D12Submission.h"
#include "D3D12GPUProfiler.h"
#include "Containers/LruCache.h"
class FD3D12Device;
class FD3D12DynamicRHI;
class FD3D12Buffer;
class FD3D12Queue;
class FD3D12ExplicitDescriptorHeapCache;
class FD3D12RayTracingPipelineCache;
class FD3D12RayTracingCompactionRequestHandler;
struct FD3D12RayTracingPipelineInfo;
//
// Diagnostic buffer, backed by a virtual heap. Stays accessible after a GPU crash to allow readback of diagnostic messages.
// Also used to track the progress of the GPU via breadcrumb markers.
//
class FD3D12DiagnosticBuffer : public FRHIDiagnosticBuffer
{
private:
TRefCountPtr<FD3D12Heap> Heap;
TRefCountPtr<FD3D12Resource> Resource;
D3D12_GPU_VIRTUAL_ADDRESS GpuAddress = 0;
D3D12_GPU_VIRTUAL_ADDRESS ToGPUAddress(void* Ptr) const
{
return GpuAddress + (uintptr_t(Ptr) - uintptr_t(Data));
}
public:
FD3D12DiagnosticBuffer(FD3D12Queue& Queue);
~FD3D12DiagnosticBuffer();
D3D12_GPU_VIRTUAL_ADDRESS GetGPUQueueData () const { return ToGPUAddress(Data); }
#if WITH_RHI_BREADCRUMBS
D3D12_GPU_VIRTUAL_ADDRESS GetGPUQueueMarkerIn () const { return ToGPUAddress(&Data->MarkerIn ); }
D3D12_GPU_VIRTUAL_ADDRESS GetGPUQueueMarkerOut() const { return ToGPUAddress(&Data->MarkerOut); }
uint32 ReadMarkerIn () const { return Data->MarkerIn; }
uint32 ReadMarkerOut() const { return Data->MarkerOut; }
#endif
bool IsValid() const { return Resource.IsValid(); }
};
// Encapsulates the state required for tracking GPU queue performance across a frame.
class FD3D12Timing
{
public:
FD3D12Queue& Queue;
D3D12_QUERY_DATA_PIPELINE_STATISTICS PipelineStats {};
#if RHI_NEW_GPU_PROFILER
// Timer calibration data
uint64 GPUFrequency = 0, GPUTimestamp = 0;
uint64 CPUFrequency = 0, CPUTimestamp = 0;
UE::RHI::GPUProfiler::FEventStream EventStream;
#else
TArray<uint64> Timestamps;
int32 TimestampIndex = 0;
uint64 BusyCycles = 0;
uint64 GetCurrentTimestamp() const { return Timestamps[TimestampIndex]; }
uint64 GetPreviousTimestamp() const { return Timestamps[TimestampIndex - 1]; }
bool HasMoreTimestamps() const { return TimestampIndex < Timestamps.Num(); }
bool IsStartingWork() const { return (TimestampIndex & 0x01) == 0x00; }
void AdvanceTimestamp() { TimestampIndex++; }
#endif
FD3D12Timing(FD3D12Queue& Queue);
};
// Encapsulates a single D3D command queue, and maintains the
// state required by the submission thread for managing the queue.
class FD3D12Queue final
{
public:
FD3D12Device* const Device;
ED3D12QueueType const QueueType;
// The underlying D3D queue object
TRefCountPtr<ID3D12CommandQueue> D3DCommandQueue;
// A single D3D fence to manage completion of work on this queue
FD3D12Fence Fence;
struct : public TQueue<FD3D12Payload*, EQueueMode::Mpsc>
{
FD3D12Payload* Peek()
{
if (FD3D12Payload** Result = TQueue::Peek())
return *Result;
return nullptr;
}
} PendingSubmission, PendingInterrupt;
FD3D12Payload* PayloadToSubmit = nullptr;
FD3D12CommandAllocator* BarrierAllocator = nullptr;
FD3D12QueryAllocator BarrierTimestamps;
uint32 NumCommandListsInBatch = 0;
FD3D12BatchedPayloadObjects BatchedObjects;
// A pool of reusable command list/allocator/context objects
struct
{
TD3D12ObjectPool<FD3D12ContextCommon > Contexts;
TD3D12ObjectPool<FD3D12CommandAllocator> Allocators;
TD3D12ObjectPool<FD3D12CommandList > Lists;
} ObjectPool;
// The active timing struct on this queue. Updated / accessed by the interrupt thread.
FD3D12Timing* Timing = nullptr;
TUniquePtr<FD3D12DiagnosticBuffer> DiagnosticBuffer;
#if D3D12_RHI_RAYTRACING
FD3D12Buffer* RayTracingDispatchRaysDescBuffer = nullptr;
#endif
// On some hardware, some auxiliary queue types may not support tile mapping and a separate queue must be used
bool bSupportsTileMapping = true;
static constexpr uint32 MaxBatchedPayloads = 128;
using FPayloadArray = TArray<FD3D12Payload*, TInlineAllocator<MaxBatchedPayloads>>;
// Batches the current payload's command lists, returning the latest fence value signaled for this queue.
uint64 FinalizePayload(bool bRequiresSignal, FPayloadArray& PayloadsToHandDown);
// Call the underlying ID3D12Queue::ExecuteCommandLists function
void ExecuteCommandLists(TArrayView<ID3D12CommandList*> D3DCommandLists
#if ENABLE_RESIDENCY_MANAGEMENT
, TArrayView<FD3D12ResidencySet*> ResidencySets
#endif
);
FD3D12Queue(FD3D12Device* Device, ED3D12QueueType QueueType);
~FD3D12Queue();
#if RHI_NEW_GPU_PROFILER
UE::RHI::GPUProfiler::FQueue GetProfilerQueue() const;
#endif
private:
// Internal fence which may be used before calling ExecuteCommandLists
FD3D12Fence ExecuteCommandListsFence;
};
class FD3D12Device final : public FD3D12SingleNodeGPUObject, public FNoncopyable, public FD3D12AdapterChild
{
public:
FD3D12Device(FRHIGPUMask InGPUMask, FD3D12Adapter* InAdapter);
~FD3D12Device();
ID3D12Device* GetDevice();
#if (RHI_NEW_GPU_PROFILER == 0)
void RegisterGPUWork(uint32 NumPrimitives = 0, uint32 NumVertices = 0) { GPUProfilingData.RegisterGPUWork(NumPrimitives, NumVertices); }
void RegisterGPUDispatch(FIntVector GroupCount) { GPUProfilingData.RegisterGPUDispatch(GroupCount); }
// GPU Profiler
FORCEINLINE FD3D12GPUProfiler& GetGPUProfiler() { return GPUProfilingData; }
#endif
uint64 GetTimestampFrequency(ED3D12QueueType QueueType);
#if (RHI_NEW_GPU_PROFILER == 0)
FGPUTimingCalibrationTimestamp GetCalibrationTimestamp(ED3D12QueueType QueueType);
#endif
// Misc
void BlockUntilIdle();
D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoUncached(const FD3D12ResourceDesc& InDesc);
D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(const FD3D12ResourceDesc& InDesc);
// Specialized wrapper of ID3D12Device::CopyDescriptors for a common case of a single descriptor range. Similar to CopyDescriptorsSimple(), except source is provided as an array.
static void CopyDescriptors(ID3D12Device* D3DDevice, D3D12_CPU_DESCRIPTOR_HANDLE Destination, const D3D12_CPU_DESCRIPTOR_HANDLE* Source, uint32 NumSourceDescriptors, D3D12_DESCRIPTOR_HEAP_TYPE Type);
void CopyDescriptors(D3D12_CPU_DESCRIPTOR_HANDLE Destination, const D3D12_CPU_DESCRIPTOR_HANDLE* Source, uint32 NumSourceDescriptors, D3D12_DESCRIPTOR_HEAP_TYPE Type)
{
CopyDescriptors(GetDevice(), Destination, Source, NumSourceDescriptors, Type);
}
void InitExplicitDescriptorHeap();
FD3D12ExplicitDescriptorHeapCache* GetExplicitDescriptorHeapCache() { return ExplicitDescriptorHeapCache; }
// Ray Tracing
#if D3D12_RHI_RAYTRACING
void InitRayTracing();
void CleanupRayTracing();
ID3D12Device5* GetDevice5();
ID3D12Device7* GetDevice7();
ID3D12Device9* GetDevice9();
FD3D12RayTracingPipelineCache* GetRayTracingPipelineCache () { return RayTracingPipelineCache; }
FD3D12Buffer* GetRayTracingDispatchRaysDescBuffer (ED3D12QueueType QueueType) { return Queues[(uint32)QueueType].RayTracingDispatchRaysDescBuffer; }
FD3D12RayTracingCompactionRequestHandler* GetRayTracingCompactionRequestHandler() { return RayTracingCompactionRequestHandler; }
TRefCountPtr<ID3D12StateObject> DeserializeRayTracingStateObject(D3D12_SHADER_BYTECODE Bytecode, ID3D12RootSignature* RootSignature);
void GetRaytracingAccelerationStructurePrebuildInfo(
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS* pDesc,
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO* pInfo);
// Queries ray tracing pipeline state object metrics such as VGPR usage (if available/supported). Returns true if query succeeded.
bool GetRayTracingPipelineInfo(ID3D12StateObject* Pipeline, FD3D12RayTracingPipelineInfo* OutInfo);
#endif // D3D12_RHI_RAYTRACING
// Heaps
inline FD3D12GlobalOnlineSamplerHeap& GetGlobalSamplerHeap() { return GlobalSamplerHeap; }
inline const D3D12_HEAP_PROPERTIES& GetConstantBufferPageProperties() { return ConstantBufferPageProperties; }
// Descriptor Managers
inline FD3D12DescriptorHeapManager& GetDescriptorHeapManager () { return DescriptorHeapManager; }
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
FD3D12BindlessDescriptorAllocator& GetBindlessDescriptorAllocator() { return BindlessDescriptorAllocator; }
FD3D12BindlessDescriptorManager& GetBindlessDescriptorManager() { return BindlessDescriptorManager; }
#endif
inline FD3D12OnlineDescriptorManager& GetOnlineDescriptorManager () { return OnlineDescriptorManager; }
inline FD3D12OfflineDescriptorManager& GetOfflineDescriptorManager (ERHIDescriptorHeapType InType)
{
check(InType < ERHIDescriptorHeapType::Count);
return OfflineDescriptorManagers[static_cast<int>(InType)];
}
const FD3D12DefaultViews& GetDefaultViews() const { return DefaultViews; }
// Memory Allocators
inline FD3D12DefaultBufferAllocator& GetDefaultBufferAllocator() { return DefaultBufferAllocator; }
inline FD3D12FastAllocator& GetDefaultFastAllocator () { return DefaultFastAllocator; }
inline FD3D12TextureAllocatorPool& GetTextureAllocator () { return TextureAllocator; }
// Residency
inline FD3D12ResidencyManager& GetResidencyManager() { return ResidencyManager; }
// Samplers
FD3D12SamplerState* CreateSampler(const FSamplerStateInitializerRHI& Initializer, FD3D12SamplerState* FirstLinkedObject);
void CreateSamplerInternal(const D3D12_SAMPLER_DESC& Desc, D3D12_CPU_DESCRIPTOR_HANDLE Descriptor);
// Command Allocators
FD3D12CommandAllocator* ObtainCommandAllocator (ED3D12QueueType QueueType);
void ReleaseCommandAllocator(FD3D12CommandAllocator* Allocator);
// Contexts
FD3D12CommandContext& GetDefaultCommandContext() { return *ImmediateCommandContext; }
FD3D12ContextCommon* ObtainContext (ED3D12QueueType QueueType);
FD3D12ContextCopy* ObtainContextCopy () { return static_cast<FD3D12ContextCopy* >(ObtainContext(ED3D12QueueType::Copy )); }
FD3D12CommandContext* ObtainContextCompute () { return static_cast<FD3D12CommandContext*>(ObtainContext(ED3D12QueueType::Async )); }
FD3D12CommandContext* ObtainContextGraphics () { return static_cast<FD3D12CommandContext*>(ObtainContext(ED3D12QueueType::Direct)); }
void ReleaseContext (FD3D12ContextCommon* Context);
// Queries
TRefCountPtr<FD3D12QueryHeap> ObtainQueryHeap (ED3D12QueueType QueueType, D3D12_QUERY_TYPE QueryType);
void ReleaseQueryHeap(FD3D12QueryHeap* QueryHeap);
// Command Lists
FD3D12CommandList* ObtainCommandList (FD3D12CommandAllocator* CommandAllocator, FD3D12QueryAllocator* TimestampAllocator, FD3D12QueryAllocator* PipelineStatsAllocator);
void ReleaseCommandList(FD3D12CommandList* CommandList);
// Queues
FD3D12Queue& GetQueue(ED3D12QueueType QueueType) { return Queues[(uint32)QueueType]; }
TArrayView<FD3D12Queue> GetQueues() { return Queues; }
// shared code for different D3D12 devices (e.g. PC DirectX12 and XboxOne) called
// after device creation and GRHISupportsAsyncTextureCreation was set and before resource init
void SetupAfterDeviceCreation();
void CleanupResources();
// Wrapper of ID3D12Device::CreateCommandList
HRESULT CreateCommandList(
UINT nodeMask,
D3D12_COMMAND_LIST_TYPE type,
ID3D12CommandAllocator* pCommandAllocator,
ID3D12PipelineState* pInitialState,
REFIID riid,
void** ppCommandList
);
#if !D3D12RHI_SUPPORTS_UNCOMPRESSED_UAV
void CreateUnorderedAccessViewAlias(
ID3D12Resource* InResource,
ID3D12Resource* InCounterResource,
const D3D12_RESOURCE_DESC& InAliasResourceDesc,
const D3D12_UNORDERED_ACCESS_VIEW_DESC& InAliasViewDesc,
D3D12_CPU_DESCRIPTOR_HANDLE InOfflineCpuHandle);
#endif
TRefCountPtr<ID3D12CommandQueue> TileMappingQueue;
FD3D12Fence TileMappingFence;
private:
// called by SetupAfterDeviceCreation() when the device gets initialized
void CreateDefaultViews();
void UpdateMSAASettings();
void UpdateConstantBufferPageProperties();
#if (RHI_NEW_GPU_PROFILER == 0)
FD3D12GPUProfiler GPUProfilingData;
#endif
struct FResidencyManager : public FD3D12ResidencyManager
{
FResidencyManager(FD3D12Device& Parent);
~FResidencyManager();
} ResidencyManager;
FD3D12DescriptorHeapManager DescriptorHeapManager;
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
FD3D12BindlessDescriptorAllocator& BindlessDescriptorAllocator;
FD3D12BindlessDescriptorManager BindlessDescriptorManager;
#endif
TArray<FD3D12OfflineDescriptorManager, TInlineAllocator<(uint32)ERHIDescriptorHeapType::Count>> OfflineDescriptorManagers;
FD3D12GlobalOnlineSamplerHeap GlobalSamplerHeap;
FD3D12OnlineDescriptorManager OnlineDescriptorManager;
FD3D12DefaultViews DefaultViews;
TStaticArray<TD3D12ObjectPool<FD3D12QueryHeap>, 4> QueryHeapPool { InPlace };
FD3D12CommandContext* ImmediateCommandContext = nullptr;
TArray<FD3D12Queue, TInlineAllocator<(uint32)ED3D12QueueType::Count>> Queues;
TLruCache<D3D12_SAMPLER_DESC, TRefCountPtr<FD3D12SamplerState>> SamplerCache;
uint32 SamplerID = 0;
/** Hashmap used to cache resource allocation size information */
FRWLock ResourceAllocationInfoMapMutex;
TMap<uint64, D3D12_RESOURCE_ALLOCATION_INFO> ResourceAllocationInfoMap;
// set by UpdateMSAASettings(), get by GetMSAAQuality()
// [SampleCount] = Quality, 0xffffffff if not supported
uint32 AvailableMSAAQualities[DX_MAX_MSAA_COUNT + 1];
// set by UpdateConstantBufferPageProperties, get by GetConstantBufferPageProperties
D3D12_HEAP_PROPERTIES ConstantBufferPageProperties;
FD3D12DefaultBufferAllocator DefaultBufferAllocator;
FD3D12FastAllocator DefaultFastAllocator;
FD3D12TextureAllocatorPool TextureAllocator;
#if D3D12_RHI_RAYTRACING
FD3D12RayTracingPipelineCache* RayTracingPipelineCache = nullptr;
FD3D12RayTracingCompactionRequestHandler* RayTracingCompactionRequestHandler = nullptr;
// #dxr_todo UE-72158: unify RT descriptor cache with main FD3D12DescriptorCache
#endif
FD3D12ExplicitDescriptorHeapCache* ExplicitDescriptorHeapCache = nullptr;
void DestroyExplicitDescriptorCache();
};