// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "Async/TaskGraphInterfaces.h" #include "D3D12BindlessDescriptors.h" #include "D3D12Query.h" #include "D3D12Queue.h" #include "D3D12RHICommon.h" #include "Templates/Function.h" #include "Templates/RefCounting.h" #include "RHIBreadcrumbs.h" #include "GPUProfiler.h" enum class ED3D12QueueType; class FD3D12CommandAllocator; class FD3D12CommandList; class FD3D12DynamicRHI; class FD3D12QueryHeap; class FD3D12Queue; class FD3D12Timing; class FD3D12Buffer; class FD3D12Resource; class FD3D12SyncPoint; using FD3D12SyncPointRef = TRefCountPtr; enum class ED3D12SyncPointType { // Sync points of this type do not include an FGraphEvent, so cannot // report completion to the CPU (via either IsComplete() or Wait()) GPUOnly, // Sync points of this type include an FGraphEvent. The IsComplete() and Wait() functions // can be used to poll for completion from the CPU, or block the CPU, respectively. GPUAndCPU, }; // Fence type used by the device queues to manage GPU completion struct FD3D12Fence { FD3D12Queue* const OwnerQueue; TRefCountPtr D3DFence; uint64 NextCompletionValue = 1; std::atomic LastSignaledValue = 0; bool bInterruptAwaited = false; FD3D12Fence(FD3D12Queue* OwnerQueue) : OwnerQueue(OwnerQueue) {} }; // Used by FD3D12SyncPoint and the submission thread to fix up signaled fence values at the end-of-pipe struct FD3D12ResolvedFence { FD3D12Fence& Fence; uint64 Value = 0; FD3D12ResolvedFence(FD3D12Fence& Fence, uint64 Value) : Fence(Fence) , Value(Value) {} }; // // A sync point is a logical point on a GPU queue's timeline that can be awaited by other queues, or the CPU. // These are used throughout the RHI as a way to abstract the underlying D3D12 fences. The submission thread // manages the underlying fences and signaled values, and reports completion to the relevant sync points via // an FGraphEvent. // // Sync points are one-shot, meaning they represent a single timeline point, and are released after use, via ref-counting. // Use FD3D12SyncPoint::Create() to make a new sync point and hold a reference to it via a FD3D12SyncPointRef object. // class FD3D12SyncPoint final : public FThreadSafeRefCountedObject { friend FD3D12DynamicRHI; friend FD3D12Queue; static TLockFreePointerListUnordered MemoryPool; // No copying or moving FD3D12SyncPoint(FD3D12SyncPoint const&) = delete; FD3D12SyncPoint(FD3D12SyncPoint&&) = delete; TOptional ResolvedFence; FGraphEventRef GraphEvent; FD3D12SyncPoint(ED3D12SyncPointType Type) { if (Type == ED3D12SyncPointType::GPUAndCPU) { GraphEvent = FGraphEvent::CreateGraphEvent(); } } public: static FD3D12SyncPointRef Create(ED3D12SyncPointType Type) { LLM_SCOPE_BYNAME(TEXT("RHIMisc/CreateSyncPoint")); return new FD3D12SyncPoint(Type); } bool IsComplete() const { checkf(GraphEvent, TEXT("This sync point was not created with a CPU event. Cannot check completion on the CPU.")); return GraphEvent->IsComplete(); } void Wait() const; FGraphEvent* GetGraphEvent() const { checkf(GraphEvent, TEXT("This sync point was not created with a CPU event.")); return GraphEvent; } ED3D12SyncPointType GetType() const { return GraphEvent != nullptr ? ED3D12SyncPointType::GPUAndCPU : ED3D12SyncPointType::GPUOnly; } void* operator new(size_t Size) { check(Size == sizeof(FD3D12SyncPoint)); void* Memory = MemoryPool.Pop(); if (!Memory) { Memory = FMemory::Malloc(sizeof(FD3D12SyncPoint), alignof(FD3D12SyncPoint)); } return Memory; } void operator delete(void* Pointer) { MemoryPool.Push(Pointer); } }; struct FD3D12CommitReservedResourceDesc { FD3D12Resource* Resource = nullptr; uint64 CommitSizeInBytes = 0; }; struct FD3D12BatchedPayloadObjects { TArray TimestampQueries; TArray OcclusionQueries; TArray PipelineStatsQueries; TMap, TArray> QueryRanges; bool IsEmpty() const { return TimestampQueries .Num() == 0 && OcclusionQueries .Num() == 0 && PipelineStatsQueries.Num() == 0 && QueryRanges .Num() == 0 ; } }; // Hacky base class to avoid 8 bytes of padding after the vtable struct FD3D12PayloadBaseFixLayout { virtual ~FD3D12PayloadBaseFixLayout() = default; }; // A single unit of work (specific to a single GPU node and queue type) to be processed by the submission thread. struct FD3D12PayloadBase : public FD3D12PayloadBaseFixLayout { // Used to signal FD3D12ManualFence instances on the submission thread. struct FManualFence { // The D3D fence to signal TRefCountPtr Fence; // The value to signal the fence with. uint64 Value; FManualFence() = default; FManualFence(TRefCountPtr&& Fence, uint64 Value) : Fence(MoveTemp(Fence)) , Value(Value) {} }; // Constants FD3D12Queue& Queue; // Wait struct : public TArray { // Used to pause / resume iteration of the sync point array on the // submission thread when we find a sync point that is unresolved. int32 Index = 0; } SyncPointsToWait; struct FQueueFence { FD3D12Fence& Fence; uint64 Value; }; TArray> QueueFencesToWait; TArray ManualFencesToWait; void AddQueueFenceWait(FD3D12Fence& Fence, uint64 Value); // UpdateReservedResources TArray ReservedResourcesToCommit; // Flags. bool bAlwaysSignal = false; std::atomic bSubmitted { false }; // Used by RHIRunOnQueue TFunction PreExecuteCallback; // Execute TArray CommandListsToExecute; // Signal TArray ManualFencesToSignal; TArray SyncPointsToSignal; uint64 CompletionFenceValue = 0; FGraphEventRef SubmissionEvent; TOptional SubmissionTime; TOptional Timing; // Cleanup TArray AllocatorsToRelease; FD3D12BatchedPayloadObjects BatchedObjects; #if WITH_RHI_BREADCRUMBS FRHIBreadcrumbRange BreadcrumbRange {}; TSharedPtr BreadcrumbAllocators {}; #endif #if RHI_NEW_GPU_PROFILER UE::RHI::GPUProfiler::FEventStream EventStream; TOptional EndFrameEvent; #endif virtual ~FD3D12PayloadBase(); virtual void PreExecute(); virtual bool HasPreExecuteWork() const { return PreExecuteCallback != nullptr; } virtual bool RequiresQueueFenceSignal() const { return bAlwaysSignal || SyncPointsToSignal.Num() > 0 || HasPreExecuteWork(); } virtual bool HasWaitWork() const { return ManualFencesToWait.Num() > 0 || QueueFencesToWait.Num() > 0; } virtual bool HasUpdateReservedResourcesWork() const { return ReservedResourcesToCommit.Num() > 0; } virtual bool HasSignalWork() const { return RequiresQueueFenceSignal() || ManualFencesToSignal.Num() > 0 || SubmissionEvent != nullptr #if RHI_NEW_GPU_PROFILER || EndFrameEvent.IsSet() #endif ; } protected: FD3D12PayloadBase(FD3D12Queue& Queue); }; #include COMPILED_PLATFORM_HEADER(D3D12Submission.h)