839 lines
30 KiB
C++
839 lines
30 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "D3D12Device.h"
|
|
#include "D3D12RHIPrivate.h"
|
|
#include "D3D12IntelExtensions.h"
|
|
#include "D3D12RayTracing.h"
|
|
#include "D3D12ExplicitDescriptorCache.h"
|
|
|
|
static TAutoConsoleVariable<int32> CVarD3D12GPUTimeout(
|
|
TEXT("r.D3D12.GPUTimeout"),
|
|
1,
|
|
TEXT("0: Disable GPU Timeout; use with care as it could freeze your PC!\n")
|
|
TEXT("1: Enable GPU Timeout; operation taking long on the GPU will fail(default)\n"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
TAutoConsoleVariable<int32> CVarD3D12ExtraDiagnosticBufferMemory(
|
|
TEXT("r.D3D12.DiagnosticBufferExtraMemory"),
|
|
0,
|
|
TEXT("Extra allocated memory for diagnostic buffer"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
static uint32 GetQueryHeapPoolIndex(D3D12_QUERY_HEAP_TYPE HeapType)
|
|
{
|
|
switch (HeapType)
|
|
{
|
|
default: checkNoEntry(); [[fallthrough]];
|
|
case D3D12_QUERY_HEAP_TYPE_OCCLUSION: return 0;
|
|
case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return 1;
|
|
case D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP: return 2;
|
|
case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return 3;
|
|
}
|
|
}
|
|
|
|
FD3D12Timing::FD3D12Timing(FD3D12Queue& Queue)
|
|
: Queue(Queue)
|
|
#if RHI_NEW_GPU_PROFILER
|
|
, EventStream(Queue.GetProfilerQueue())
|
|
#endif
|
|
{}
|
|
|
|
FD3D12Queue::FD3D12Queue(FD3D12Device* Device, ED3D12QueueType QueueType)
|
|
: Device(Device)
|
|
, QueueType(QueueType)
|
|
, Fence(this)
|
|
, BarrierTimestamps(Device, QueueType, D3D12_QUERY_TYPE_TIMESTAMP)
|
|
, bSupportsTileMapping(FD3D12DynamicRHI::GetD3DRHI()->QueueSupportsTileMapping(QueueType))
|
|
, ExecuteCommandListsFence(this)
|
|
{
|
|
FD3D12Adapter* Adapter = Device->GetParentAdapter();
|
|
|
|
D3D12_COMMAND_QUEUE_DESC CommandQueueDesc = {};
|
|
CommandQueueDesc.Type = GetD3DCommandListType((ED3D12QueueType)QueueType);
|
|
CommandQueueDesc.Priority = 0;
|
|
CommandQueueDesc.NodeMask = Device->GetGPUMask().GetNative();
|
|
CommandQueueDesc.Flags = (CVarD3D12GPUTimeout.GetValueOnAnyThread() == 0)
|
|
? D3D12_COMMAND_QUEUE_FLAG_DISABLE_GPU_TIMEOUT
|
|
: D3D12_COMMAND_QUEUE_FLAG_NONE;
|
|
|
|
FD3D12DynamicRHI::GetD3DRHI()->CreateCommandQueue(Device, CommandQueueDesc, D3DCommandQueue);
|
|
D3DCommandQueue->SetName(*FString::Printf(TEXT("%s Queue (GPU %d)"), GetD3DCommandQueueTypeName(QueueType), Device->GetGPUIndex()));
|
|
|
|
VERIFYD3D12RESULT(Device->GetDevice()->CreateFence(
|
|
0,
|
|
D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(Fence.D3DFence.GetInitReference())
|
|
));
|
|
Fence.D3DFence->SetName(*FString::Printf(TEXT("%s Queue Fence (GPU %d)"), GetD3DCommandQueueTypeName(QueueType), Device->GetGPUIndex()));
|
|
|
|
|
|
VERIFYD3D12RESULT(Device->GetDevice()->CreateFence(
|
|
0,
|
|
D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(ExecuteCommandListsFence.D3DFence.GetInitReference())
|
|
));
|
|
ExecuteCommandListsFence.D3DFence->SetName(*FString::Printf(TEXT("%s ExecuteCommandListsFence (GPU %d)"), GetD3DCommandQueueTypeName(QueueType), Device->GetGPUIndex()));
|
|
}
|
|
|
|
FD3D12Queue::~FD3D12Queue()
|
|
{
|
|
check(PendingSubmission.IsEmpty());
|
|
check(PendingInterrupt.IsEmpty());
|
|
}
|
|
|
|
#if RHI_NEW_GPU_PROFILER
|
|
UE::RHI::GPUProfiler::FQueue FD3D12Queue::GetProfilerQueue() const
|
|
{
|
|
UE::RHI::GPUProfiler::FQueue Queue;
|
|
Queue.GPU = Device->GetGPUIndex();
|
|
Queue.Index = 0;
|
|
|
|
switch (QueueType)
|
|
{
|
|
default: checkNoEntry(); [[fallthrough]];
|
|
case ED3D12QueueType::Direct: Queue.Type = UE::RHI::GPUProfiler::FQueue::EType::Graphics; break;
|
|
case ED3D12QueueType::Async : Queue.Type = UE::RHI::GPUProfiler::FQueue::EType::Compute ; break;
|
|
case ED3D12QueueType::Copy : Queue.Type = UE::RHI::GPUProfiler::FQueue::EType::Copy ; break;
|
|
}
|
|
|
|
return Queue;
|
|
}
|
|
#endif // RHI_NEW_GPU_PROFILER
|
|
|
|
FD3D12Device::FD3D12Device(FRHIGPUMask InGPUMask, FD3D12Adapter* InAdapter)
|
|
: FD3D12SingleNodeGPUObject(InGPUMask)
|
|
, FD3D12AdapterChild (InAdapter)
|
|
, TileMappingFence (nullptr)
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
, GPUProfilingData (this)
|
|
#endif
|
|
, ResidencyManager (*this)
|
|
, DescriptorHeapManager (this)
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
, BindlessDescriptorAllocator(InAdapter->GetBindlessDescriptorAllocator())
|
|
, BindlessDescriptorManager(this, InAdapter->GetBindlessDescriptorAllocator())
|
|
#endif
|
|
, GlobalSamplerHeap (this)
|
|
, OnlineDescriptorManager (this)
|
|
, SamplerCache (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE)
|
|
, DefaultBufferAllocator (this, FRHIGPUMask::All()) //Note: Cross node buffers are possible
|
|
, DefaultFastAllocator (this, FRHIGPUMask::All(), D3D12_HEAP_TYPE_UPLOAD, 1024 * 1024 * 4)
|
|
, TextureAllocator (this, FRHIGPUMask::All())
|
|
{
|
|
check(IsInGameThread());
|
|
|
|
for (uint32 HeapType = 0; HeapType < (uint32)ERHIDescriptorHeapType::Count; ++HeapType)
|
|
{
|
|
OfflineDescriptorManagers.Emplace(this, (ERHIDescriptorHeapType)HeapType);
|
|
}
|
|
|
|
for (uint32 QueueType = 0; QueueType < (uint32)ED3D12QueueType::Count; ++QueueType)
|
|
{
|
|
Queues.Emplace(this, (ED3D12QueueType)QueueType);
|
|
}
|
|
|
|
// Some hardware is not capable of running tile mapping operations on all queue types.
|
|
// Direct queue is used as a fallback if tile updates are requested on unsupported queue.
|
|
TileMappingQueue = Queues[size_t(ED3D12QueueType::Direct)].D3DCommandQueue;
|
|
VERIFYD3D12RESULT(GetDevice()->CreateFence(
|
|
0,
|
|
D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(TileMappingFence.D3DFence.GetInitReference())
|
|
));
|
|
#if NAME_OBJECTS
|
|
TileMappingFence.D3DFence->SetName(TEXT("TileMappingFence"));
|
|
#endif
|
|
}
|
|
|
|
FD3D12Device::~FD3D12Device()
|
|
{
|
|
#if D3D12_RHI_RAYTRACING
|
|
delete RayTracingCompactionRequestHandler;
|
|
RayTracingCompactionRequestHandler = nullptr;
|
|
#endif
|
|
|
|
DestroyExplicitDescriptorCache(); // #dxr_todo UE-72158: unify RT descriptor cache with main FD3D12DescriptorCache
|
|
|
|
// Cleanup the allocator near the end, as some resources may be returned to the allocator or references are shared by multiple GPUs
|
|
DefaultBufferAllocator.FreeDefaultBufferPools();
|
|
|
|
DefaultFastAllocator.Destroy();
|
|
|
|
TextureAllocator.CleanUpAllocations();
|
|
TextureAllocator.Destroy();
|
|
|
|
SamplerCache.Empty();
|
|
}
|
|
|
|
FD3D12Device::FResidencyManager::FResidencyManager(FD3D12Device& Parent)
|
|
{
|
|
#if ENABLE_RESIDENCY_MANAGEMENT
|
|
IDXGIAdapter3* DxgiAdapter3 = nullptr;
|
|
VERIFYD3D12RESULT(Parent.GetParentAdapter()->GetAdapter()->QueryInterface(IID_PPV_ARGS(&DxgiAdapter3)));
|
|
const uint32 ResidencyMangerGPUIndex = GVirtualMGPU ? 0 : Parent.GetGPUIndex(); // GPU node index is used by residency manager to query budget
|
|
D3DX12Residency::InitializeResidencyManager(*this, Parent.GetDevice(), ResidencyMangerGPUIndex, DxgiAdapter3, RESIDENCY_PIPELINE_DEPTH);
|
|
#endif // ENABLE_RESIDENCY_MANAGEMENT
|
|
}
|
|
|
|
FD3D12Device::FResidencyManager::~FResidencyManager()
|
|
{
|
|
#if ENABLE_RESIDENCY_MANAGEMENT
|
|
D3DX12Residency::DestroyResidencyManager(*this);
|
|
#endif
|
|
}
|
|
|
|
ID3D12Device* FD3D12Device::GetDevice()
|
|
{
|
|
return GetParentAdapter()->GetD3DDevice();
|
|
}
|
|
|
|
#if D3D12_RHI_RAYTRACING
|
|
ID3D12Device5* FD3D12Device::GetDevice5()
|
|
{
|
|
return GetParentAdapter()->GetD3DDevice5();
|
|
}
|
|
|
|
ID3D12Device7* FD3D12Device::GetDevice7()
|
|
{
|
|
return GetParentAdapter()->GetD3DDevice7();
|
|
}
|
|
|
|
ID3D12Device9* FD3D12Device::GetDevice9()
|
|
{
|
|
return GetParentAdapter()->GetD3DDevice9();
|
|
}
|
|
#endif // D3D12_RHI_RAYTRACING
|
|
|
|
#if D3D12_SUPPORTS_DXGI_DEBUG
|
|
typedef HRESULT(WINAPI *FDXGIGetDebugInterface1)(UINT, REFIID, void **);
|
|
#endif
|
|
|
|
static D3D12_FEATURE_DATA_FORMAT_SUPPORT GetFormatSupport(ID3D12Device* InDevice, DXGI_FORMAT InFormat)
|
|
{
|
|
D3D12_FEATURE_DATA_FORMAT_SUPPORT FormatSupport{};
|
|
FormatSupport.Format = InFormat;
|
|
|
|
InDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &FormatSupport, sizeof(FormatSupport));
|
|
|
|
return FormatSupport;
|
|
}
|
|
|
|
void FD3D12Device::SetupAfterDeviceCreation()
|
|
{
|
|
ID3D12Device* Direct3DDevice = GetParentAdapter()->GetD3DDevice();
|
|
|
|
for (uint32 FormatIndex = PF_Unknown; FormatIndex < PF_MAX; FormatIndex++)
|
|
{
|
|
FPixelFormatInfo& PixelFormatInfo = GPixelFormats[FormatIndex];
|
|
const DXGI_FORMAT PlatformFormat = static_cast<DXGI_FORMAT>(PixelFormatInfo.PlatformFormat);
|
|
|
|
EPixelFormatCapabilities Capabilities = EPixelFormatCapabilities::None;
|
|
|
|
if (PlatformFormat != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
const D3D12_FEATURE_DATA_FORMAT_SUPPORT FormatSupport = GetFormatSupport(Direct3DDevice, PlatformFormat);
|
|
const D3D12_FEATURE_DATA_FORMAT_SUPPORT SRVFormatSupport = GetFormatSupport(Direct3DDevice, UE::DXGIUtilities::FindShaderResourceFormat(PlatformFormat, false));
|
|
const D3D12_FEATURE_DATA_FORMAT_SUPPORT UAVFormatSupport = GetFormatSupport(Direct3DDevice, UE::DXGIUtilities::FindUnorderedAccessFormat(PlatformFormat));
|
|
const D3D12_FEATURE_DATA_FORMAT_SUPPORT RTVFormatSupport = GetFormatSupport(Direct3DDevice, UE::DXGIUtilities::FindShaderResourceFormat(PlatformFormat, false));
|
|
const D3D12_FEATURE_DATA_FORMAT_SUPPORT DSVFormatSupport = GetFormatSupport(Direct3DDevice, UE::DXGIUtilities::FindDepthStencilFormat(PlatformFormat));
|
|
|
|
auto ConvertCap1 = [&Capabilities](const D3D12_FEATURE_DATA_FORMAT_SUPPORT& InSupport, EPixelFormatCapabilities UnrealCap, D3D12_FORMAT_SUPPORT1 InFlags)
|
|
{
|
|
if (EnumHasAnyFlags(InSupport.Support1, InFlags))
|
|
{
|
|
EnumAddFlags(Capabilities, UnrealCap);
|
|
}
|
|
};
|
|
auto ConvertCap2 = [&Capabilities](const D3D12_FEATURE_DATA_FORMAT_SUPPORT& InSupport, EPixelFormatCapabilities UnrealCap, D3D12_FORMAT_SUPPORT2 InFlags)
|
|
{
|
|
if (EnumHasAnyFlags(InSupport.Support2, InFlags))
|
|
{
|
|
EnumAddFlags(Capabilities, UnrealCap);
|
|
}
|
|
};
|
|
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::Texture1D, D3D12_FORMAT_SUPPORT1_TEXTURE1D);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::Texture2D, D3D12_FORMAT_SUPPORT1_TEXTURE2D);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::Texture3D, D3D12_FORMAT_SUPPORT1_TEXTURE3D);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::TextureCube, D3D12_FORMAT_SUPPORT1_TEXTURECUBE);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::Buffer, D3D12_FORMAT_SUPPORT1_BUFFER);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::VertexBuffer, D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::IndexBuffer, D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER);
|
|
|
|
if (EnumHasAnyFlags(Capabilities, EPixelFormatCapabilities::AnyTexture))
|
|
{
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::RenderTarget, D3D12_FORMAT_SUPPORT1_RENDER_TARGET);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::DepthStencil, D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL);
|
|
ConvertCap1(FormatSupport, EPixelFormatCapabilities::TextureMipmaps, D3D12_FORMAT_SUPPORT1_MIP);
|
|
ConvertCap1(SRVFormatSupport, EPixelFormatCapabilities::TextureLoad, D3D12_FORMAT_SUPPORT1_SHADER_LOAD);
|
|
ConvertCap1(SRVFormatSupport, EPixelFormatCapabilities::TextureSample | EPixelFormatCapabilities::TextureFilterable, D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE);
|
|
ConvertCap1(SRVFormatSupport, EPixelFormatCapabilities::TextureGather, D3D12_FORMAT_SUPPORT1_SHADER_GATHER);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::TextureAtomics, D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE);
|
|
ConvertCap1(SRVFormatSupport, EPixelFormatCapabilities::TextureBlendable, D3D12_FORMAT_SUPPORT1_BLENDABLE);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::TextureStore, D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE);
|
|
}
|
|
|
|
if (EnumHasAnyFlags(Capabilities, EPixelFormatCapabilities::Buffer))
|
|
{
|
|
ConvertCap1(SRVFormatSupport, EPixelFormatCapabilities::BufferLoad, D3D12_FORMAT_SUPPORT1_BUFFER);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::BufferStore, D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::BufferAtomics, D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE);
|
|
}
|
|
|
|
ConvertCap1(UAVFormatSupport, EPixelFormatCapabilities::UAV, D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::TypedUAVLoad, D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD);
|
|
ConvertCap2(UAVFormatSupport, EPixelFormatCapabilities::TypedUAVStore, D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE);
|
|
}
|
|
|
|
PixelFormatInfo.Capabilities = Capabilities;
|
|
}
|
|
|
|
GRHISupportsArrayIndexFromAnyShader = true;
|
|
GRHISupportsStencilRefFromPixelShader = false; // TODO: Sort out DXC shader database SM6.0 usage. DX12 supports this feature, but need to improve DXC support.
|
|
|
|
#if PLATFORM_WINDOWS
|
|
// Check if we're running under GPU capture
|
|
bool bUnderGPUCapture = false;
|
|
|
|
// RenderDoc
|
|
if (D3D12RHI_IsRenderDocPresent(Direct3DDevice))
|
|
{
|
|
// Running under RenderDoc, so enable capturing mode
|
|
bUnderGPUCapture = true;
|
|
}
|
|
|
|
// Intel GPA
|
|
{
|
|
TRefCountPtr<IUnknown> IntelGPA;
|
|
static const IID IntelGPAID = { 0xCCFFEF16, 0x7B69, 0x468F, {0xBC, 0xE3, 0xCD, 0x95, 0x33, 0x69, 0xA3, 0x9A} };
|
|
|
|
if (SUCCEEDED(Direct3DDevice->QueryInterface(IntelGPAID, (void**)(IntelGPA.GetInitReference()))))
|
|
{
|
|
// Running under Intel GPA, so enable capturing mode
|
|
bUnderGPUCapture = true;
|
|
}
|
|
}
|
|
|
|
// AMD RGP profiler
|
|
if (GEmitRgpFrameMarkers && FD3D12DynamicRHI::GetD3DRHI()->GetAmdAgsContext())
|
|
{
|
|
// Running on AMD with RGP profiling enabled, so enable capturing mode
|
|
bUnderGPUCapture = true;
|
|
}
|
|
|
|
#if USE_PIX
|
|
// PIX (note that DXGIGetDebugInterface1 requires Windows 8.1 and up)
|
|
if (FPlatformMisc::VerifyWindowsVersion(6, 3))
|
|
{
|
|
FDXGIGetDebugInterface1 DXGIGetDebugInterface1FnPtr = nullptr;
|
|
|
|
// CreateDXGIFactory2 is only available on Win8.1+, find it if it exists
|
|
HMODULE DxgiDLL = LoadLibraryA("dxgi.dll");
|
|
if (DxgiDLL)
|
|
{
|
|
#pragma warning(push)
|
|
#pragma warning(disable: 4191) // disable the "unsafe conversion from 'FARPROC' to 'blah'" warning
|
|
DXGIGetDebugInterface1FnPtr = (FDXGIGetDebugInterface1)(GetProcAddress(DxgiDLL, "DXGIGetDebugInterface1"));
|
|
#pragma warning(pop)
|
|
FreeLibrary(DxgiDLL);
|
|
}
|
|
|
|
if (DXGIGetDebugInterface1FnPtr)
|
|
{
|
|
IID GraphicsAnalysisID;
|
|
if (SUCCEEDED(IIDFromString(L"{9F251514-9D4D-4902-9D60-18988AB7D4B5}", &GraphicsAnalysisID)))
|
|
{
|
|
TRefCountPtr<IUnknown> GraphicsAnalysis;
|
|
if (SUCCEEDED(DXGIGetDebugInterface1FnPtr(0, GraphicsAnalysisID, (void**)GraphicsAnalysis.GetInitReference())))
|
|
{
|
|
// Running under PIX, so enable capturing mode
|
|
bUnderGPUCapture = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // USE_PIX
|
|
|
|
if (bUnderGPUCapture)
|
|
{
|
|
FDynamicRHI::EnableIdealGPUCaptureOptions(true);
|
|
}
|
|
#endif // PLATFORM_WINDOWS
|
|
|
|
|
|
const int32 MaximumResourceHeapSize = GetParentAdapter()->GetMaxDescriptorsForHeapType(ERHIDescriptorHeapType::Standard);
|
|
const int32 MaximumSamplerHeapSize = GetParentAdapter()->GetMaxDescriptorsForHeapType(ERHIDescriptorHeapType::Sampler);
|
|
|
|
// This value can be tuned on a per app basis. I.e. most apps will never run into descriptor heap pressure so
|
|
// can make this global heap smaller
|
|
check(GGlobalResourceDescriptorHeapSize <= MaximumResourceHeapSize || MaximumResourceHeapSize < 0);
|
|
check(GGlobalSamplerDescriptorHeapSize <= MaximumSamplerHeapSize);
|
|
|
|
check(GGlobalSamplerHeapSize <= MaximumSamplerHeapSize);
|
|
|
|
check(GOnlineDescriptorHeapSize <= GGlobalResourceDescriptorHeapSize);
|
|
|
|
bool bFullyBindless = false;
|
|
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
BindlessDescriptorManager.Init();
|
|
|
|
bFullyBindless = IsBindlessFullyEnabled(BindlessDescriptorManager.GetConfiguration());
|
|
#endif
|
|
|
|
DescriptorHeapManager.Init(bFullyBindless ? 0 : GGlobalResourceDescriptorHeapSize, bFullyBindless ? 0 : GGlobalSamplerDescriptorHeapSize);
|
|
|
|
if (!bFullyBindless)
|
|
{
|
|
GlobalSamplerHeap.Init(GGlobalSamplerHeapSize);
|
|
}
|
|
|
|
{
|
|
const uint32 HeapSize = bFullyBindless ? GBindlessOnlineDescriptorHeapSize : GOnlineDescriptorHeapSize;
|
|
const uint32 BlockSize = bFullyBindless ? GBindlessOnlineDescriptorHeapBlockSize : GOnlineDescriptorHeapBlockSize;
|
|
|
|
OnlineDescriptorManager.Init(HeapSize, BlockSize, bFullyBindless);
|
|
}
|
|
|
|
// Make sure we create the default views before the first command context
|
|
CreateDefaultViews();
|
|
|
|
// Needs to be called before creating command contexts
|
|
UpdateConstantBufferPageProperties();
|
|
|
|
UpdateMSAASettings();
|
|
|
|
#if D3D12_RHI_RAYTRACING
|
|
check(RayTracingCompactionRequestHandler == nullptr);
|
|
RayTracingCompactionRequestHandler = new FD3D12RayTracingCompactionRequestHandler(this);
|
|
for (FD3D12Queue& Queue : Queues)
|
|
{
|
|
const size_t DispatchRaysSize = sizeof(D3D12_DISPATCH_RAYS_DESC);
|
|
|
|
const D3D12_RESOURCE_DESC DispatchRaysBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(DispatchRaysSize, D3D12RHI_RESOURCE_FLAG_ALLOW_INDIRECT_BUFFER | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
|
const FRHIBufferCreateDesc CreateDesc =
|
|
FRHIBufferCreateDesc::Create(TEXT("DispatchRaysDescBuffer"), DispatchRaysSize, 0, EBufferUsageFlags::DrawIndirect)
|
|
.SetGPUMask(GetGPUMask())
|
|
.SetInitialState(ERHIAccess::IndirectArgs);
|
|
|
|
Queue.RayTracingDispatchRaysDescBuffer = GetParentAdapter()->CreateRHIBuffer(
|
|
DispatchRaysBufferDesc,
|
|
256,
|
|
CreateDesc,
|
|
ED3D12ResourceStateMode::MultiState,
|
|
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT
|
|
);
|
|
}
|
|
#endif // D3D12_RHI_RAYTRACING
|
|
|
|
check(!ImmediateCommandContext);
|
|
ImmediateCommandContext = FD3D12DynamicRHI::GetD3DRHI()->CreateCommandContext(this, ED3D12QueueType::Direct, true);
|
|
|
|
// Setup diagnostic buffer that contains GPU messages as well as breadcrumb data to to track GPU progress on this command queue (when GPU crash debugging is enabled).
|
|
// The buffer is always allocated and bound to shaders that require it, but breadcrumbs are controlled by UE::RHI::UseGPUCrashBreadcrumbs() and WITH_RHI_BREADCRUMBS.
|
|
for (FD3D12Queue& Queue : Queues)
|
|
{
|
|
Queue.DiagnosticBuffer = MakeUnique<FD3D12DiagnosticBuffer>(Queue);
|
|
}
|
|
}
|
|
|
|
void FD3D12Device::CleanupResources()
|
|
{
|
|
for (FD3D12OfflineDescriptorManager& Manager : OfflineDescriptorManagers)
|
|
{
|
|
Manager.CleanupResources();
|
|
}
|
|
OnlineDescriptorManager.CleanupResources();
|
|
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
BindlessDescriptorManager.CleanupResources();
|
|
#endif
|
|
|
|
#if D3D12_RHI_RAYTRACING
|
|
CleanupRayTracing();
|
|
#endif
|
|
}
|
|
|
|
void FD3D12Device::CreateDefaultViews()
|
|
{
|
|
{
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc{};
|
|
SRVDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
SRVDesc.Texture2D.MipLevels = 1;
|
|
SRVDesc.Texture2D.MostDetailedMip = 0;
|
|
SRVDesc.Texture2D.ResourceMinLODClamp = 0.0f;
|
|
|
|
DefaultViews.NullSRV = GetOfflineDescriptorManager(ERHIDescriptorHeapType::Standard).AllocateHeapSlot();
|
|
GetDevice()->CreateShaderResourceView(nullptr, &SRVDesc, DefaultViews.NullSRV);
|
|
}
|
|
|
|
{
|
|
D3D12_RENDER_TARGET_VIEW_DESC RTVDesc{};
|
|
RTVDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
|
RTVDesc.Texture2D.MipSlice = 0;
|
|
|
|
DefaultViews.NullRTV = GetOfflineDescriptorManager(ERHIDescriptorHeapType::RenderTarget).AllocateHeapSlot();
|
|
GetDevice()->CreateRenderTargetView(nullptr, &RTVDesc, DefaultViews.NullRTV);
|
|
}
|
|
|
|
{
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc{};
|
|
UAVDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
|
UAVDesc.Texture2D.MipSlice = 0;
|
|
|
|
DefaultViews.NullUAV = GetOfflineDescriptorManager(ERHIDescriptorHeapType::Standard).AllocateHeapSlot();
|
|
GetDevice()->CreateUnorderedAccessView(nullptr, nullptr, &UAVDesc, DefaultViews.NullUAV);
|
|
}
|
|
|
|
{
|
|
D3D12_DEPTH_STENCIL_VIEW_DESC DSVDesc{};
|
|
DSVDesc.Format = DXGI_FORMAT_D32_FLOAT;
|
|
DSVDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
|
DSVDesc.Texture2D.MipSlice = 0;
|
|
|
|
DefaultViews.NullDSV = GetOfflineDescriptorManager(ERHIDescriptorHeapType::DepthStencil).AllocateHeapSlot();
|
|
GetDevice()->CreateDepthStencilView(nullptr, &DSVDesc, DefaultViews.NullDSV);
|
|
}
|
|
|
|
{
|
|
D3D12_CONSTANT_BUFFER_VIEW_DESC CBVDesc{};
|
|
|
|
DefaultViews.NullCBV = GetOfflineDescriptorManager(ERHIDescriptorHeapType::Standard).AllocateHeapSlot();
|
|
GetDevice()->CreateConstantBufferView(&CBVDesc, DefaultViews.NullCBV);
|
|
}
|
|
|
|
{
|
|
const FSamplerStateInitializerRHI SamplerDesc(SF_Trilinear, AM_Clamp, AM_Clamp, AM_Clamp);
|
|
DefaultViews.DefaultSampler = CreateSampler(SamplerDesc, GetGPUIndex() > 0 ? GetParentAdapter()->GetDevice(0)->DefaultViews.DefaultSampler : nullptr);
|
|
|
|
// The default sampler must have ID=0
|
|
// FD3D12DescriptorCache::SetSamplers relies on this
|
|
check(DefaultViews.DefaultSampler->ID == 0);
|
|
}
|
|
}
|
|
|
|
void FD3D12Device::UpdateConstantBufferPageProperties()
|
|
{
|
|
//In genera, constant buffers should use write-combine memory (i.e. upload heaps) for optimal performance
|
|
bool bForceWriteBackConstantBuffers = false;
|
|
|
|
if (bForceWriteBackConstantBuffers)
|
|
{
|
|
ConstantBufferPageProperties = GetDevice()->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD);
|
|
ConstantBufferPageProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
|
}
|
|
else
|
|
{
|
|
ConstantBufferPageProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
|
|
}
|
|
}
|
|
|
|
void FD3D12Device::UpdateMSAASettings()
|
|
{
|
|
check(DX_MAX_MSAA_COUNT == 8);
|
|
|
|
// quality levels are only needed for CSAA which we cannot use with custom resolves
|
|
|
|
// 0xffffffff means not available
|
|
AvailableMSAAQualities[0] = 0xffffffff;
|
|
AvailableMSAAQualities[1] = 0xffffffff;
|
|
AvailableMSAAQualities[2] = 0;
|
|
AvailableMSAAQualities[3] = 0xffffffff;
|
|
AvailableMSAAQualities[4] = 0;
|
|
AvailableMSAAQualities[5] = 0xffffffff;
|
|
AvailableMSAAQualities[6] = 0xffffffff;
|
|
AvailableMSAAQualities[7] = 0xffffffff;
|
|
AvailableMSAAQualities[8] = 0;
|
|
}
|
|
|
|
void FD3D12Device::BlockUntilIdle()
|
|
{
|
|
// Submit a new sync point to each queue
|
|
TArray<FD3D12Payload*> Payloads;
|
|
Payloads.Reserve(int32(ED3D12QueueType::Count));
|
|
|
|
TArray<FD3D12SyncPointRef, TInlineAllocator<(uint32)ED3D12QueueType::Count>> SyncPoints;
|
|
|
|
for (uint32 QueueTypeIndex = 0; QueueTypeIndex < (uint32)ED3D12QueueType::Count; ++QueueTypeIndex)
|
|
{
|
|
FD3D12SyncPointRef SyncPoint = FD3D12SyncPoint::Create(ED3D12SyncPointType::GPUAndCPU);
|
|
|
|
FD3D12Payload* Payload = new FD3D12Payload(GetQueue((ED3D12QueueType)QueueTypeIndex));
|
|
Payload->SyncPointsToSignal.Add(SyncPoint);
|
|
Payload->bAlwaysSignal = true;
|
|
|
|
Payloads.Add(Payload);
|
|
SyncPoints.Add(SyncPoint);
|
|
}
|
|
|
|
FD3D12DynamicRHI::GetD3DRHI()->SubmitPayloads(MoveTemp(Payloads));
|
|
|
|
// Block this thread until the sync points have signaled.
|
|
for (FD3D12SyncPointRef& SyncPoint : SyncPoints)
|
|
{
|
|
SyncPoint->Wait();
|
|
}
|
|
}
|
|
|
|
D3D12_RESOURCE_ALLOCATION_INFO FD3D12Device::GetResourceAllocationInfoUncached(const FD3D12ResourceDesc& InDesc)
|
|
{
|
|
D3D12_RESOURCE_ALLOCATION_INFO Result;
|
|
|
|
#if INTEL_EXTENSIONS
|
|
if (InDesc.bRequires64BitAtomicSupport && IsRHIDeviceIntel() && GDX12INTCAtomicUInt64Emulation)
|
|
{
|
|
FD3D12ResourceDesc LocalDesc = InDesc;
|
|
|
|
INTC_D3D12_RESOURCE_DESC_0001 IntelLocalDesc{};
|
|
IntelLocalDesc.pD3D12Desc = &LocalDesc;
|
|
IntelLocalDesc.EmulatedTyped64bitAtomics = true;
|
|
|
|
Result = INTC_D3D12_GetResourceAllocationInfo(FD3D12DynamicRHI::GetD3DRHI()->GetIntelExtensionContext(), 0, 1, &IntelLocalDesc);
|
|
}
|
|
else
|
|
#endif
|
|
#if D3D12RHI_SUPPORTS_UNCOMPRESSED_UAV
|
|
if (InDesc.SupportsUncompressedUAV())
|
|
{
|
|
// Convert the desc to the version required by GetResourceAllocationInfo3
|
|
const CD3DX12_RESOURCE_DESC1 LocalDesc(InDesc);
|
|
|
|
const TArray<DXGI_FORMAT, TInlineAllocator<4>> CastableFormats = InDesc.GetCastableFormats();
|
|
|
|
const UINT32 NumCastableFormats = CastableFormats.Num();
|
|
D3D12_RESOURCE_ALLOCATION_INFO1* NoExtraAllocationInfo = nullptr;
|
|
|
|
const DXGI_FORMAT* const Formats[] = { CastableFormats.GetData() };
|
|
|
|
Result = GetParentAdapter()->GetD3DDevice12()->GetResourceAllocationInfo3(0, 1, &LocalDesc, &NumCastableFormats, Formats, NoExtraAllocationInfo);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
Result = GetDevice()->GetResourceAllocationInfo(0, 1, &InDesc);
|
|
if (Result.SizeInBytes == UINT64_MAX)
|
|
{
|
|
// The description provided caused an error per the docs. This will almost certainly crash outside this fn.
|
|
UE_LOG(LogD3D12RHI, Error, TEXT("D3D12 GetResourceAllocationInfo failed - likely a resource was requested that has invalid allocation info (e.g. is an invalid texture size)"));
|
|
UE_LOG(LogD3D12RHI, Error, TEXT(" W %llu H %d depth %d mips %d pf %d"), InDesc.Width, InDesc.Height, InDesc.DepthOrArraySize, InDesc.MipLevels, InDesc.PixelFormat);
|
|
}
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
D3D12_RESOURCE_ALLOCATION_INFO FD3D12Device::GetResourceAllocationInfo(const FD3D12ResourceDesc& InDesc)
|
|
{
|
|
D3D12_RESOURCE_ALLOCATION_INFO Result;
|
|
|
|
const uint64 Hash = CityHash64((const char*)&InDesc, sizeof(FD3D12ResourceDesc));
|
|
|
|
// By default there'll be more threads trying to read this than to write it.
|
|
ResourceAllocationInfoMapMutex.ReadLock();
|
|
D3D12_RESOURCE_ALLOCATION_INFO* CachedInfo = ResourceAllocationInfoMap.Find(Hash);
|
|
if (CachedInfo)
|
|
{
|
|
// We need to hold on to the lock until we copy the cached data, because CachedInfo points to the map storage, which
|
|
// can be reallocated on insertion.
|
|
Result = *CachedInfo;
|
|
ResourceAllocationInfoMapMutex.ReadUnlock();
|
|
return Result;
|
|
}
|
|
|
|
ResourceAllocationInfoMapMutex.ReadUnlock();
|
|
|
|
Result = GetResourceAllocationInfoUncached(InDesc);
|
|
|
|
ResourceAllocationInfoMapMutex.WriteLock();
|
|
// Try search again with write lock because could have been added already
|
|
CachedInfo = ResourceAllocationInfoMap.Find(Hash);
|
|
if (CachedInfo == nullptr)
|
|
{
|
|
ResourceAllocationInfoMap.Add(Hash, Result);
|
|
}
|
|
ResourceAllocationInfoMapMutex.WriteUnlock();
|
|
|
|
return Result;
|
|
}
|
|
|
|
FD3D12ContextCommon* FD3D12Device::ObtainContext(ED3D12QueueType QueueType)
|
|
{
|
|
FD3D12ContextCommon* Context = Queues[(uint32)QueueType].ObjectPool.Contexts.Pop();
|
|
if (!Context)
|
|
{
|
|
switch (QueueType)
|
|
{
|
|
default: checkNoEntry(); // fallthrough
|
|
case ED3D12QueueType::Direct: Context = FD3D12DynamicRHI::GetD3DRHI()->CreateCommandContext(this, QueueType, false); break;
|
|
case ED3D12QueueType::Async : Context = FD3D12DynamicRHI::GetD3DRHI()->CreateCommandContext(this, QueueType, false); break;
|
|
case ED3D12QueueType::Copy : Context = new FD3D12ContextCopy(this); break;
|
|
}
|
|
}
|
|
|
|
check(Context);
|
|
return Context;
|
|
}
|
|
|
|
void FD3D12Device::ReleaseContext(FD3D12ContextCommon* Context)
|
|
{
|
|
check(Context && !Context->IsOpen());
|
|
|
|
Queues[(uint32)Context->QueueType].ObjectPool.Contexts.Push(Context);
|
|
}
|
|
|
|
FD3D12CommandAllocator* FD3D12Device::ObtainCommandAllocator(ED3D12QueueType QueueType)
|
|
{
|
|
FD3D12CommandAllocator* Allocator = Queues[(uint32)QueueType].ObjectPool.Allocators.Pop();
|
|
if (!Allocator)
|
|
{
|
|
Allocator = new FD3D12CommandAllocator(this, QueueType);
|
|
}
|
|
|
|
check(Allocator);
|
|
return Allocator;
|
|
}
|
|
|
|
void FD3D12Device::ReleaseCommandAllocator(FD3D12CommandAllocator* Allocator)
|
|
{
|
|
check(Allocator);
|
|
Allocator->Reset();
|
|
Queues[(uint32)Allocator->QueueType].ObjectPool.Allocators.Push(Allocator);
|
|
}
|
|
|
|
FD3D12CommandList* FD3D12Device::ObtainCommandList(FD3D12CommandAllocator* CommandAllocator, FD3D12QueryAllocator* TimestampAllocator, FD3D12QueryAllocator* PipelineStatsAllocator)
|
|
{
|
|
check(CommandAllocator->Device == this);
|
|
|
|
FD3D12CommandList* List = Queues[(uint32)CommandAllocator->QueueType].ObjectPool.Lists.Pop();
|
|
if (!List)
|
|
{
|
|
List = new FD3D12CommandList(CommandAllocator, TimestampAllocator, PipelineStatsAllocator);
|
|
}
|
|
else
|
|
{
|
|
List->Reset(CommandAllocator, TimestampAllocator, PipelineStatsAllocator);
|
|
}
|
|
|
|
check(List);
|
|
return List;
|
|
}
|
|
|
|
void FD3D12Device::ReleaseCommandList(FD3D12CommandList* CommandList)
|
|
{
|
|
check(CommandList);
|
|
Queues[(uint32)CommandList->QueueType].ObjectPool.Lists.Push(CommandList);
|
|
}
|
|
|
|
TRefCountPtr<FD3D12QueryHeap> FD3D12Device::ObtainQueryHeap(ED3D12QueueType QueueType, D3D12_QUERY_TYPE QueryType)
|
|
{
|
|
D3D12_QUERY_HEAP_TYPE HeapType;
|
|
switch (QueryType)
|
|
{
|
|
default:
|
|
checkNoEntry();
|
|
return nullptr;
|
|
|
|
case D3D12_QUERY_TYPE_OCCLUSION:
|
|
HeapType = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
|
|
break;
|
|
|
|
case D3D12_QUERY_TYPE_TIMESTAMP:
|
|
if (QueueType == ED3D12QueueType::Copy)
|
|
{
|
|
// Support for copy queue timestamps is driver dependent.
|
|
if (!GetParentAdapter()->AreCopyQueueTimestampQueriesSupported())
|
|
return nullptr; // Not supported
|
|
|
|
HeapType = D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP;
|
|
}
|
|
else
|
|
{
|
|
HeapType = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
|
}
|
|
break;
|
|
|
|
case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
|
|
#if D3D12RHI_ENABLE_PIPELINE_STATISTICS
|
|
if (QueueType != ED3D12QueueType::Direct)
|
|
{
|
|
// Only graphics/direct queues support pipeline statistics
|
|
return nullptr;
|
|
}
|
|
HeapType = D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
|
|
break;
|
|
#else
|
|
return nullptr;
|
|
#endif
|
|
}
|
|
|
|
FD3D12QueryHeap* QueryHeap = QueryHeapPool[GetQueryHeapPoolIndex(HeapType)].Pop();
|
|
if (!QueryHeap)
|
|
{
|
|
QueryHeap = new FD3D12QueryHeap(this, QueryType, HeapType);
|
|
}
|
|
|
|
check(QueryHeap->QueryType == QueryType);
|
|
check(QueryHeap->HeapType == HeapType);
|
|
|
|
return QueryHeap;
|
|
}
|
|
|
|
void FD3D12Device::ReleaseQueryHeap(FD3D12QueryHeap* QueryHeap)
|
|
{
|
|
check(QueryHeap);
|
|
QueryHeapPool[GetQueryHeapPoolIndex(QueryHeap->HeapType)].Push(QueryHeap);
|
|
}
|
|
|
|
uint64 FD3D12Device::GetTimestampFrequency(ED3D12QueueType QueueType)
|
|
{
|
|
check(QueueType != ED3D12QueueType::Copy || GetParentAdapter()->AreCopyQueueTimestampQueriesSupported());
|
|
|
|
uint64 Frequency;
|
|
VERIFYD3D12RESULT(Queues[(uint32)QueueType].D3DCommandQueue->GetTimestampFrequency(&Frequency));
|
|
return Frequency;
|
|
}
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
FGPUTimingCalibrationTimestamp FD3D12Device::GetCalibrationTimestamp(ED3D12QueueType QueueType)
|
|
{
|
|
check(QueueType != ED3D12QueueType::Copy || GetParentAdapter()->AreCopyQueueTimestampQueriesSupported());
|
|
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(D3D12GetCalibrationTimestamp);
|
|
|
|
uint64 GPUTimestampFrequency = GetTimestampFrequency(QueueType);
|
|
|
|
LARGE_INTEGER CPUTimestempFrequency;
|
|
QueryPerformanceFrequency(&CPUTimestempFrequency);
|
|
|
|
uint64 GPUTimestamp, CPUTimestamp;
|
|
VERIFYD3D12RESULT(Queues[(uint32)QueueType].D3DCommandQueue->GetClockCalibration(&GPUTimestamp, &CPUTimestamp));
|
|
|
|
FGPUTimingCalibrationTimestamp Result = {};
|
|
|
|
Result.GPUMicroseconds = uint64(GPUTimestamp * (1e6 / GPUTimestampFrequency));
|
|
Result.CPUMicroseconds = uint64(CPUTimestamp * (1e6 / CPUTimestempFrequency.QuadPart));
|
|
|
|
return Result;
|
|
}
|
|
#endif // (RHI_NEW_GPU_PROFILER == 0)
|
|
|
|
void FD3D12Device::InitExplicitDescriptorHeap()
|
|
{
|
|
check(ExplicitDescriptorHeapCache == nullptr);
|
|
ExplicitDescriptorHeapCache = new FD3D12ExplicitDescriptorHeapCache(this);
|
|
|
|
// Note: ExplicitDescriptorHeapCache is destroyed in ~FD3D12Device, after all deferred deletion is processed
|
|
}
|
|
|
|
void FD3D12Device::DestroyExplicitDescriptorCache()
|
|
{
|
|
delete ExplicitDescriptorHeapCache;
|
|
ExplicitDescriptorHeapCache = nullptr;
|
|
}
|
|
|