Files
UnrealEngine/Engine/Source/Runtime/D3D12RHI/Private/D3D12Submission.h
2025-05-18 13:04:45 +08:00

290 lines
7.2 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "Async/TaskGraphInterfaces.h"
#include "D3D12BindlessDescriptors.h"
#include "D3D12Query.h"
#include "D3D12Queue.h"
#include "D3D12RHICommon.h"
#include "Templates/Function.h"
#include "Templates/RefCounting.h"
#include "RHIBreadcrumbs.h"
#include "GPUProfiler.h"
enum class ED3D12QueueType;
class FD3D12CommandAllocator;
class FD3D12CommandList;
class FD3D12DynamicRHI;
class FD3D12QueryHeap;
class FD3D12Queue;
class FD3D12Timing;
class FD3D12Buffer;
class FD3D12Resource;
class FD3D12SyncPoint;
using FD3D12SyncPointRef = TRefCountPtr<FD3D12SyncPoint>;
enum class ED3D12SyncPointType
{
// Sync points of this type do not include an FGraphEvent, so cannot
// report completion to the CPU (via either IsComplete() or Wait())
GPUOnly,
// Sync points of this type include an FGraphEvent. The IsComplete() and Wait() functions
// can be used to poll for completion from the CPU, or block the CPU, respectively.
GPUAndCPU,
};
// Fence type used by the device queues to manage GPU completion
struct FD3D12Fence
{
FD3D12Queue* const OwnerQueue;
TRefCountPtr<ID3D12Fence> D3DFence;
uint64 NextCompletionValue = 1;
std::atomic<uint64> LastSignaledValue = 0;
bool bInterruptAwaited = false;
FD3D12Fence(FD3D12Queue* OwnerQueue)
: OwnerQueue(OwnerQueue)
{}
};
// Used by FD3D12SyncPoint and the submission thread to fix up signaled fence values at the end-of-pipe
struct FD3D12ResolvedFence
{
FD3D12Fence& Fence;
uint64 Value = 0;
FD3D12ResolvedFence(FD3D12Fence& Fence, uint64 Value)
: Fence(Fence)
, Value(Value)
{}
};
//
// A sync point is a logical point on a GPU queue's timeline that can be awaited by other queues, or the CPU.
// These are used throughout the RHI as a way to abstract the underlying D3D12 fences. The submission thread
// manages the underlying fences and signaled values, and reports completion to the relevant sync points via
// an FGraphEvent.
//
// Sync points are one-shot, meaning they represent a single timeline point, and are released after use, via ref-counting.
// Use FD3D12SyncPoint::Create() to make a new sync point and hold a reference to it via a FD3D12SyncPointRef object.
//
class FD3D12SyncPoint final : public FThreadSafeRefCountedObject
{
friend FD3D12DynamicRHI;
friend FD3D12Queue;
static TLockFreePointerListUnordered<void, PLATFORM_CACHE_LINE_SIZE> MemoryPool;
// No copying or moving
FD3D12SyncPoint(FD3D12SyncPoint const&) = delete;
FD3D12SyncPoint(FD3D12SyncPoint&&) = delete;
TOptional<FD3D12ResolvedFence> ResolvedFence;
FGraphEventRef GraphEvent;
FD3D12SyncPoint(ED3D12SyncPointType Type)
{
if (Type == ED3D12SyncPointType::GPUAndCPU)
{
GraphEvent = FGraphEvent::CreateGraphEvent();
}
}
public:
static FD3D12SyncPointRef Create(ED3D12SyncPointType Type)
{
LLM_SCOPE_BYNAME(TEXT("RHIMisc/CreateSyncPoint"));
return new FD3D12SyncPoint(Type);
}
bool IsComplete() const
{
checkf(GraphEvent, TEXT("This sync point was not created with a CPU event. Cannot check completion on the CPU."));
return GraphEvent->IsComplete();
}
void Wait() const;
FGraphEvent* GetGraphEvent() const
{
checkf(GraphEvent, TEXT("This sync point was not created with a CPU event."));
return GraphEvent;
}
ED3D12SyncPointType GetType() const
{
return GraphEvent != nullptr
? ED3D12SyncPointType::GPUAndCPU
: ED3D12SyncPointType::GPUOnly;
}
void* operator new(size_t Size)
{
check(Size == sizeof(FD3D12SyncPoint));
void* Memory = MemoryPool.Pop();
if (!Memory)
{
Memory = FMemory::Malloc(sizeof(FD3D12SyncPoint), alignof(FD3D12SyncPoint));
}
return Memory;
}
void operator delete(void* Pointer)
{
MemoryPool.Push(Pointer);
}
};
struct FD3D12CommitReservedResourceDesc
{
FD3D12Resource* Resource = nullptr;
uint64 CommitSizeInBytes = 0;
};
struct FD3D12BatchedPayloadObjects
{
TArray<FD3D12QueryLocation> TimestampQueries;
TArray<FD3D12QueryLocation> OcclusionQueries;
TArray<FD3D12QueryLocation> PipelineStatsQueries;
TMap<TRefCountPtr<FD3D12QueryHeap>, TArray<FD3D12QueryRange>> QueryRanges;
bool IsEmpty() const
{
return
TimestampQueries .Num() == 0
&& OcclusionQueries .Num() == 0
&& PipelineStatsQueries.Num() == 0
&& QueryRanges .Num() == 0
;
}
};
// Hacky base class to avoid 8 bytes of padding after the vtable
struct FD3D12PayloadBaseFixLayout
{
virtual ~FD3D12PayloadBaseFixLayout() = default;
};
// A single unit of work (specific to a single GPU node and queue type) to be processed by the submission thread.
struct FD3D12PayloadBase : public FD3D12PayloadBaseFixLayout
{
// Used to signal FD3D12ManualFence instances on the submission thread.
struct FManualFence
{
// The D3D fence to signal
TRefCountPtr<ID3D12Fence> Fence;
// The value to signal the fence with.
uint64 Value;
FManualFence() = default;
FManualFence(TRefCountPtr<ID3D12Fence>&& Fence, uint64 Value)
: Fence(MoveTemp(Fence))
, Value(Value)
{}
};
// Constants
FD3D12Queue& Queue;
// Wait
struct : public TArray<FD3D12SyncPointRef>
{
// Used to pause / resume iteration of the sync point array on the
// submission thread when we find a sync point that is unresolved.
int32 Index = 0;
} SyncPointsToWait;
struct FQueueFence
{
FD3D12Fence& Fence;
uint64 Value;
};
TArray<FQueueFence, TInlineAllocator<GD3D12MaxNumQueues>> QueueFencesToWait;
TArray<FManualFence> ManualFencesToWait;
void AddQueueFenceWait(FD3D12Fence& Fence, uint64 Value);
// UpdateReservedResources
TArray<FD3D12CommitReservedResourceDesc> ReservedResourcesToCommit;
// Flags.
bool bAlwaysSignal = false;
std::atomic<bool> bSubmitted { false };
// Used by RHIRunOnQueue
TFunction<void(ID3D12CommandQueue*)> PreExecuteCallback;
// Execute
TArray<FD3D12CommandList*> CommandListsToExecute;
// Signal
TArray<FManualFence> ManualFencesToSignal;
TArray<FD3D12SyncPointRef> SyncPointsToSignal;
uint64 CompletionFenceValue = 0;
FGraphEventRef SubmissionEvent;
TOptional<uint64> SubmissionTime;
TOptional<FD3D12Timing*> Timing;
// Cleanup
TArray<FD3D12CommandAllocator*> AllocatorsToRelease;
FD3D12BatchedPayloadObjects BatchedObjects;
#if WITH_RHI_BREADCRUMBS
FRHIBreadcrumbRange BreadcrumbRange {};
TSharedPtr<FRHIBreadcrumbAllocatorArray> BreadcrumbAllocators {};
#endif
#if RHI_NEW_GPU_PROFILER
UE::RHI::GPUProfiler::FEventStream EventStream;
TOptional<UE::RHI::GPUProfiler::FEvent::FFrameBoundary> EndFrameEvent;
#endif
virtual ~FD3D12PayloadBase();
virtual void PreExecute();
virtual bool HasPreExecuteWork() const
{
return PreExecuteCallback != nullptr;
}
virtual bool RequiresQueueFenceSignal() const
{
return bAlwaysSignal || SyncPointsToSignal.Num() > 0 || HasPreExecuteWork();
}
virtual bool HasWaitWork() const
{
return ManualFencesToWait.Num() > 0 || QueueFencesToWait.Num() > 0;
}
virtual bool HasUpdateReservedResourcesWork() const
{
return ReservedResourcesToCommit.Num() > 0;
}
virtual bool HasSignalWork() const
{
return RequiresQueueFenceSignal() || ManualFencesToSignal.Num() > 0 || SubmissionEvent != nullptr
#if RHI_NEW_GPU_PROFILER
|| EndFrameEvent.IsSet()
#endif
;
}
protected:
FD3D12PayloadBase(FD3D12Queue& Queue);
};
#include COMPILED_PLATFORM_HEADER(D3D12Submission.h)