Files
UnrealEngine/Engine/Source/Runtime/RHI/Public/GPUProfiler.h
2025-05-18 13:04:45 +08:00

972 lines
23 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
GPUProfiler.h: Hierarchical GPU Profiler.
=============================================================================*/
#pragma once
#include "CoreMinimal.h"
#include "Misc/TVariant.h"
#include "ProfilingDebugging/CsvProfiler.h"
#include "RHIBreadcrumbs.h"
#include "RHIStats.h"
#include "Containers/AnsiString.h"
#include "Containers/SpscQueue.h"
#include "Containers/StaticArray.h"
#if RHI_NEW_GPU_PROFILER
namespace UE::RHI::GPUProfiler
{
DECLARE_MULTICAST_DELEGATE(FRHIOnProfileGPU);
extern RHI_API FRHIOnProfileGPU OnProfileGPU;
struct FQueue
{
enum class EType : uint8
{
Graphics,
Compute,
Copy,
SwapChain
};
union
{
struct
{
EType Type;
uint8 GPU;
uint8 Index;
uint8 Padding;
};
uint32 Value = 0;
};
FQueue() = default;
constexpr FQueue(EType Type, uint8 GPU, uint8 Index)
: Type (Type)
, GPU (GPU)
, Index (Index)
, Padding(0)
{}
constexpr bool operator == (FQueue const& RHS) const
{
return Value == RHS.Value;
}
constexpr bool operator != (FQueue const& RHS) const
{
return !(*this == RHS);
}
friend uint32 GetTypeHash(FQueue const& Queue)
{
return GetTypeHash(Queue.Value);
}
TCHAR const* GetTypeString() const
{
switch (Type)
{
case EType::Graphics: return TEXT("Graphics");
case EType::Compute: return TEXT("Compute");
case EType::Copy: return TEXT("Copy");
case EType::SwapChain: return TEXT("Swapchain");
default: return TEXT("<unknown>");
}
}
};
struct FEvent
{
//
// All timestamps are relative to FPlatformTime::Cycles64().
// TOP = Top of Pipe. Timestamps written by the GPU's command processor before work begins.
// BOP = Bottom of Pipe. Timestamps written after the GPU completes work.
//
// Inserted on each call to RHIEndFrame. Marks the end of a profiler frame.
struct FFrameBoundary
{
// CPU timestamp from the platform RHI's submission thread where the frame boundary occured.
uint64 CPUTimestamp;
// The index of the frame that just ended.
// Very first frame of the engine is frame 0 (from boot to first call to RHIEndFrame).
uint32 FrameNumber;
#if STATS
// Should be TOptional<int64> but it is not trivially destructible
bool bStatsFrameSet;
int64 StatsFrame;
#endif
#if WITH_RHI_BREADCRUMBS
// The RHI breadcrumb currently at the top of the stack at the frame boundary.
FRHIBreadcrumbNode* Breadcrumb;
#endif
FFrameBoundary(
uint64 CPUTimestamp
, uint32 FrameNumber
#if WITH_RHI_BREADCRUMBS
, FRHIBreadcrumbNode* Breadcrumb
#endif
#if STATS
, TOptional<int64> StatsFrame
#endif
)
: CPUTimestamp(CPUTimestamp)
, FrameNumber(FrameNumber)
#if STATS
, bStatsFrameSet(StatsFrame.IsSet())
, StatsFrame(StatsFrame.IsSet() ? *StatsFrame : 0)
#endif
#if WITH_RHI_BREADCRUMBS
, Breadcrumb(Breadcrumb)
#endif
{}
};
// When present in the stream, overrides the total GPU time stat with the value it contains.
// Used for platform RHIs which don't support accurate GPU timing.
struct FFrameTime
{
// Same frequency as FPlatformTime::Cycles64()
uint64 TotalGPUTime;
FFrameTime(uint64 InTotalGPUTime)
: TotalGPUTime(InTotalGPUTime)
{}
};
#if WITH_RHI_BREADCRUMBS
struct FBeginBreadcrumb
{
FRHIBreadcrumbNode* const Breadcrumb;
uint64 GPUTimestampTOP;
FBeginBreadcrumb(FRHIBreadcrumbNode* Breadcrumb, uint64 GPUTimestampTOP = 0)
: Breadcrumb(Breadcrumb)
, GPUTimestampTOP(GPUTimestampTOP)
{}
};
struct FEndBreadcrumb
{
FRHIBreadcrumbNode* const Breadcrumb;
uint64 GPUTimestampBOP = 0;
FEndBreadcrumb(FRHIBreadcrumbNode* Breadcrumb, uint64 GPUTimestampBOP = 0)
: Breadcrumb(Breadcrumb)
, GPUTimestampBOP(GPUTimestampBOP)
{}
};
#endif
// Inserted when the GPU starts work on a queue.
struct FBeginWork
{
// CPU timestamp of when the work was submitted to the driver for execution on the GPU.
uint64 CPUTimestamp;
// TOP timestamp of when the work actually started on the GPU.
uint64 GPUTimestampTOP;
FBeginWork(uint64 CPUTimestamp, uint64 GPUTimestampTOP = 0)
: CPUTimestamp(CPUTimestamp)
, GPUTimestampTOP(GPUTimestampTOP)
{}
};
// Inserted when the GPU completes work on a queue and goes idle.
struct FEndWork
{
uint64 GPUTimestampBOP;
FEndWork(uint64 GPUTimestampBOP = 0)
: GPUTimestampBOP(GPUTimestampBOP)
{}
};
struct FStats
{
uint32 NumDraws;
uint32 NumDispatches;
uint32 NumPrimitives;
uint32 NumVertices;
operator bool() const
{
return NumDraws > 0
|| NumDispatches > 0
|| NumPrimitives > 0
|| NumVertices > 0;
}
};
// Can only be inserted when the GPU is marked "idle", i.e. after an FEndWork event.
struct FSignalFence
{
//
// Timestamp when the fence signal was enqueued to the GPU/driver.
//
// The signal on the GPU doesn't happen until after the previous FEndWork
// event's BOP timestamp, or this CPU timestamp, whichever is later.
//
uint64 CPUTimestamp;
// The fence value signaled.
uint64 Value;
FSignalFence(uint64 CPUTimestamp, uint64 Value)
: CPUTimestamp(CPUTimestamp)
, Value(Value)
{}
};
// Can only be inserted when the GPU is marked "idle", i.e. after an FEndWork event.
struct FWaitFence
{
// Timestamp when the fence wait was enqueued to the GPU/driver.
uint64 CPUTimestamp;
// The fence value awaited.
uint64 Value;
// The queue the GPU is waiting for a fence signal from.
FQueue Queue;
FWaitFence(uint64 CPUTimestamp, uint64 Value, FQueue Queue)
: CPUTimestamp(CPUTimestamp)
, Value(Value)
, Queue(Queue)
{}
};
struct FFlip
{
uint64 GPUTimestamp;
};
struct FVsync
{
uint64 GPUTimestamp;
};
using FStorage = TVariant<
FFrameBoundary
, FFrameTime
#if WITH_RHI_BREADCRUMBS
, FBeginBreadcrumb
, FEndBreadcrumb
#endif
, FBeginWork
, FEndWork
, FStats
, FSignalFence
, FWaitFence
, FFlip
, FVsync
>;
enum class EType
{
FrameBoundary = FStorage::IndexOfType<FFrameBoundary >(),
FrameTime = FStorage::IndexOfType<FFrameTime >(),
#if WITH_RHI_BREADCRUMBS
BeginBreadcrumb = FStorage::IndexOfType<FBeginBreadcrumb>(),
EndBreadcrumb = FStorage::IndexOfType<FEndBreadcrumb >(),
#endif
BeginWork = FStorage::IndexOfType<FBeginWork >(),
EndWork = FStorage::IndexOfType<FEndWork >(),
Stats = FStorage::IndexOfType<FStats >(),
SignalFence = FStorage::IndexOfType<FSignalFence >(),
WaitFence = FStorage::IndexOfType<FWaitFence >(),
Flip = FStorage::IndexOfType<FFlip >(),
VSync = FStorage::IndexOfType<FVsync >()
};
FStorage Value;
EType GetType() const
{
return static_cast<EType>(Value.GetIndex());
}
template <typename T>
FEvent(T const& Value)
: Value(TInPlaceType<T>(), Value)
{}
FEvent(FEvent const&) = delete;
FEvent(FEvent&&) = delete;
};
class FEventStream
{
friend struct FEventSink;
private:
struct FChunk
{
struct FHeader
{
FChunk* Next = nullptr;
uint32 Num = 0;
#if WITH_RHI_BREADCRUMBS
FRHIBreadcrumbAllocatorArray BreadcrumbAllocators;
#endif
} Header;
static constexpr uint32 ChunkSizeInBytes = 16 * 1024;
static constexpr uint32 RemainingBytes = ChunkSizeInBytes - Align<uint32>(sizeof(FHeader), alignof(FHeader));
static constexpr uint32 MaxEventsPerChunk = RemainingBytes / Align<uint32>(sizeof(FEvent), alignof(FEvent));
TStaticArray<TTypeCompatibleBytes<FEvent>, MaxEventsPerChunk> Elements;
static RHI_API TLockFreePointerListUnordered<void, PLATFORM_CACHE_LINE_SIZE> MemoryPool;
void* operator new(size_t Size)
{
check(Size == sizeof(FChunk));
void* Memory = MemoryPool.Pop();
if (!Memory)
{
Memory = FMemory::Malloc(sizeof(FChunk), alignof(FChunk));
}
return Memory;
}
void operator delete(void* Pointer)
{
MemoryPool.Push(Pointer);
}
FEvent* GetElement(uint32 Index)
{
return Elements[Index].GetTypedPtr();
}
};
static_assert(sizeof(FChunk) <= FChunk::ChunkSizeInBytes, "Incorrect FChunk size.");
FChunk* First = nullptr;
FChunk* Current = nullptr;
public:
FQueue const Queue;
FEventStream(FQueue const Queue)
: Queue(Queue)
{}
FEventStream(FEventStream const&) = delete;
FEventStream(FEventStream&& Other)
: First (Other.First)
, Current(Other.Current)
, Queue (Other.Queue)
{
Other.First = nullptr;
Other.Current = nullptr;
}
~FEventStream()
{
while (First)
{
FChunk* Next = First->Header.Next;
delete First;
First = Next;
}
}
template <typename TEventType, typename... TArgs>
TEventType& Emplace(TArgs&&... Args)
{
static_assert(std::is_trivially_destructible_v<TEventType>, "Destructors are not called on GPU profiler events, so the types must be trivially destructible.");
if (!Current)
{
Current = new FChunk;
if (!First)
{
First = Current;
}
}
if (Current->Header.Num >= FChunk::MaxEventsPerChunk)
{
FChunk* NewChunk = new FChunk;
Current->Header.Next = NewChunk;
Current = NewChunk;
}
FEvent* Event = Current->GetElement(Current->Header.Num++);
new (Event) FEvent(TEventType(Forward<TArgs>(Args)...));
TEventType& Data = Event->Value.Get<TEventType>();
#if WITH_RHI_BREADCRUMBS
if constexpr (
std::is_same_v<UE::RHI::GPUProfiler::FEvent::FBeginBreadcrumb, TEventType> ||
std::is_same_v<UE::RHI::GPUProfiler::FEvent::FEndBreadcrumb , TEventType> ||
std::is_same_v<UE::RHI::GPUProfiler::FEvent::FFrameBoundary , TEventType>
)
{
if (Data.Breadcrumb)
{
// Attach the breadcrumb allocator for begin/end breadcrumb events.
// This keeps the breadcrumbs alive until the events have been consumed by the profilers.
Current->Header.BreadcrumbAllocators.AddUnique(Data.Breadcrumb->Allocator);
}
}
#endif
return Data;
}
bool IsEmpty() const
{
return First == nullptr;
}
void Append(FEventStream&& Other)
{
check(Queue == Other.Queue);
if (IsEmpty())
{
Current = Other.Current;
First = Other.First;
}
else if (!Other.IsEmpty())
{
Current->Header.Next = Other.First;
Current = Other.Current;
}
Other.Current = nullptr;
Other.First = nullptr;
}
};
struct FEventSink
{
protected:
struct FIterator
{
friend FEventSink;
private:
TSharedRef<FEventStream> Stream;
FEventStream::FChunk* Current;
uint32 Index = 0;
public:
FIterator(TSharedRef<FEventStream> const& Stream)
: Stream(Stream)
, Current(Stream->First)
{}
FEvent const* Peek() const
{
return Current ? Current->GetElement(Index) : nullptr;
}
FEvent const* Pop()
{
FEvent const* Result = Peek();
if (Result)
{
++Index;
while (Current && Index >= Current->Header.Num)
{
Current = Current->Header.Next;
Index = 0;
}
}
return Result;
}
};
RHI_API FEventSink();
RHI_API ~FEventSink();
FEventSink(FEventSink const&) = delete;
FEventSink(FEventSink&&) = delete;
public:
virtual void ProcessStreams(TConstArrayView<TSharedRef<FEventStream>> EventStreams) = 0;
virtual void InitializeQueues(TConstArrayView<FQueue> Queues) = 0;
};
RHI_API void ProcessEvents(TArrayView<FEventStream> EventStreams);
RHI_API void InitializeQueues(TConstArrayView<FQueue> Queues);
struct FGPUStat
{
enum class EType
{
Busy,
Wait,
Idle
};
TCHAR const* const StatName;
TCHAR const* const DisplayName;
#if CSV_PROFILER_STATS
TOptional<FCsvDeclaredStat> CsvStat;
#endif
private:
#if STATS
static FString GetIDString(FQueue Queue, bool bFriendly);
static TCHAR const* GetTypeString(EType Type);
struct FStatCategory
{
FAnsiString const GroupName;
FString const GroupDesc;
FStatCategory(FQueue Queue);
static TMap<FQueue, TUniquePtr<FStatCategory>> Categories;
static FStatCategory& GetCategory(FQueue Queue);
};
struct FStatInstance
{
struct FInner
{
#if STATS
FName StatName;
TUniquePtr<FDynamicStat> Stat;
#endif
};
FInner Busy, Wait, Idle;
};
TMap<FQueue, FStatInstance> Instances;
FStatInstance::FInner& GetStatInstance(FQueue Queue, EType Type);
#endif
public:
FGPUStat(TCHAR const* StatName, TCHAR const* DisplayName)
: StatName (StatName)
, DisplayName(DisplayName)
{}
#if STATS
TStatId GetStatId(FQueue Queue, EType Type);
#endif
};
template <typename TNameProvider>
struct TGPUStat : public FGPUStat
{
TGPUStat()
: FGPUStat(TNameProvider::GetStatName(), TNameProvider::GetDisplayName())
{}
};
template <typename TNameProvider>
struct TGPUStatWithDrawcallCategory : public TGPUStat<TNameProvider>
{
#if HAS_GPU_STATS
FRHIDrawStatsCategory DrawcallCategory;
#endif
};
}
#else
/** Stats for a single perf event node. */
class FGPUProfilerEventNodeStats : public FRefCountedObject
{
public:
FGPUProfilerEventNodeStats() :
NumDraws(0),
NumPrimitives(0),
NumVertices(0),
NumDispatches(0),
GroupCount(FIntVector(0, 0, 0)),
NumTotalDispatches(0),
NumTotalDraws(0),
NumTotalPrimitives(0),
NumTotalVertices(0),
TimingResult(0),
NumEvents(0)
{
}
FGPUProfilerEventNodeStats(const FGPUProfilerEventNodeStats& rhs)
{
NumDraws = rhs.NumDraws;
NumPrimitives = rhs.NumPrimitives;
NumVertices = rhs.NumVertices;
NumDispatches = rhs.NumDispatches;
NumTotalDispatches = rhs.NumTotalDispatches;
NumTotalDraws = rhs.NumDraws;
NumTotalPrimitives = rhs.NumPrimitives;
NumTotalVertices = rhs.NumVertices;
TimingResult = rhs.TimingResult;
NumEvents = rhs.NumEvents;
}
/** Exclusive number of draw calls rendered in this event. */
uint32 NumDraws;
/** Exclusive number of primitives rendered in this event. */
uint32 NumPrimitives;
/** Exclusive number of vertices rendered in this event. */
uint32 NumVertices;
/** Compute stats */
uint32 NumDispatches;
FIntVector GroupCount;
uint32 NumTotalDispatches;
/** Inclusive number of draw calls rendered in this event and children. */
uint32 NumTotalDraws;
/** Inclusive number of primitives rendered in this event and children. */
uint32 NumTotalPrimitives;
/** Inclusive number of vertices rendered in this event and children. */
uint32 NumTotalVertices;
/** GPU time spent inside the perf event's begin and end, in ms. */
float TimingResult;
/** Inclusive number of other perf events that this is the parent of. */
uint32 NumEvents;
const FGPUProfilerEventNodeStats operator+=(const FGPUProfilerEventNodeStats& rhs)
{
NumDraws += rhs.NumDraws;
NumPrimitives += rhs.NumPrimitives;
NumVertices += rhs.NumVertices;
NumDispatches += rhs.NumDispatches;
NumTotalDispatches += rhs.NumTotalDispatches;
NumTotalDraws += rhs.NumDraws;
NumTotalPrimitives += rhs.NumPrimitives;
NumTotalVertices += rhs.NumVertices;
TimingResult += rhs.TimingResult;
NumEvents += rhs.NumEvents;
return *this;
}
};
/** Stats for a single perf event node. */
class FGPUProfilerEventNode : public FGPUProfilerEventNodeStats
{
public:
FGPUProfilerEventNode(const TCHAR* InName, FGPUProfilerEventNode* InParent) :
FGPUProfilerEventNodeStats(),
Name(InName),
Parent(InParent)
{
}
~FGPUProfilerEventNode() {}
FString Name;
/** Pointer to parent node so we can walk up the tree on appEndDrawEvent. */
FGPUProfilerEventNode* Parent;
/** Children perf event nodes. */
TArray<TRefCountPtr<FGPUProfilerEventNode> > Children;
virtual float GetTiming() { return 0.0f; }
virtual void StartTiming() {}
virtual void StopTiming() {}
};
/** An entire frame of perf event nodes, including ancillary timers. */
struct FGPUProfilerEventNodeFrame
{
virtual ~FGPUProfilerEventNodeFrame() {}
/** Root nodes of the perf event tree. */
TArray<TRefCountPtr<FGPUProfilerEventNode> > EventTree;
/** Start this frame of per tracking */
virtual void StartFrame() {}
/** End this frame of per tracking, but do not block yet */
virtual void EndFrame() {}
/** Dumps perf event information, blocking on GPU. */
RHI_API void DumpEventTree();
/** Calculates root timing base frequency (if needed by this RHI) */
virtual float GetRootTimingResults() { return 0.0f; }
/** D3D11 Hack */
virtual void LogDisjointQuery() {}
virtual bool PlatformDisablesVSync() const { return false; }
};
/**
* Two timestamps performed on GPU and CPU at nearly the same time.
* This can be used to visualize GPU and CPU timing events on the same timeline.
*/
struct FGPUTimingCalibrationTimestamp
{
uint64 GPUMicroseconds = 0;
uint64 CPUMicroseconds = 0;
};
/**
* Holds information if this platform's GPU allows timing
*/
struct FGPUTiming
{
public:
/**
* Whether GPU timing measurements are supported by the driver.
*
* @return true if GPU timing measurements are supported by the driver.
*/
static bool IsSupported()
{
return GIsSupported;
}
/**
* Returns the frequency for the timing values, in number of ticks per seconds.
*
* @return Frequency for the timing values, in number of ticks per seconds, or 0 if the feature isn't supported.
*/
static uint64 GetTimingFrequency(uint32 GPUIndex = 0)
{
return GTimingFrequency[GPUIndex];
}
/**
* Returns a pair of timestamps performed on GPU and CPU at nearly the same time, in microseconds.
*
* @return CPU and GPU timestamps, in microseconds. Both are 0 if feature isn't supported.
*/
static FGPUTimingCalibrationTimestamp GetCalibrationTimestamp(uint32 GPUIndex = 0)
{
return GCalibrationTimestamp[GPUIndex];
}
typedef void (PlatformStaticInitialize)(void*);
static void StaticInitialize(void* UserData, PlatformStaticInitialize* PlatformFunction)
{
if (!GAreGlobalsInitialized && PlatformFunction)
{
(*PlatformFunction)(UserData);
if (GetTimingFrequency() != 0)
{
GIsSupported = true;
}
else
{
GIsSupported = false;
}
GAreGlobalsInitialized = true;
}
}
protected:
/** Whether the static variables have been initialized. */
RHI_API static bool GAreGlobalsInitialized;
/** Whether GPU timing measurements are supported by the driver. */
RHI_API static bool GIsSupported;
static void SetTimingFrequency(uint64 TimingFrequency, uint32 GPUIndex = 0)
{
GTimingFrequency[GPUIndex] = TimingFrequency;
}
static void SetCalibrationTimestamp(FGPUTimingCalibrationTimestamp CalibrationTimestamp, uint32 GPUIndex = 0)
{
GCalibrationTimestamp[GPUIndex] = CalibrationTimestamp;
}
private:
/** Frequency for the timing values, in number of ticks per seconds, or 0 if the feature isn't supported. */
RHI_API static TStaticArray<uint64, MAX_NUM_GPUS> GTimingFrequency;
/**
* Two timestamps performed on GPU and CPU at nearly the same time.
* This can be used to visualize GPU and CPU timing events on the same timeline.
* Both values may be 0 if timer calibration is not available on current platform.
*/
RHI_API static TStaticArray<FGPUTimingCalibrationTimestamp, MAX_NUM_GPUS> GCalibrationTimestamp;
};
/**
* Encapsulates GPU profiling logic and data.
* There's only one global instance of this struct so it should only contain global data, nothing specific to a frame.
*/
struct FGPUProfiler
{
/** Whether we are currently tracking perf events or not. */
bool bTrackingEvents;
/** Whether we are currently tracking data for gpucrash debugging or not */
bool bTrackingGPUCrashData;
/** A latched version of GTriggerGPUProfile. This is a form of pseudo-thread safety. We read the value once a frame only. */
bool bLatchedGProfilingGPU;
/** A latched version of GTriggerGPUHitchProfile. This is a form of pseudo-thread safety. We read the value once a frame only. */
bool bLatchedGProfilingGPUHitches;
/** The previous latched version of GTriggerGPUHitchProfile.*/
bool bPreviousLatchedGProfilingGPUHitches;
/** Original state of GEmitDrawEvents before it was overridden for profiling. */
bool bOriginalGEmitDrawEvents;
/** GPU hitch profile history debounce...after a hitch, we just ignore frames for a while */
int32 GPUHitchDebounce;
/** scope depth to record crash data depth. to limit perf/mem requirements */
int32 GPUCrashDataDepth;
/** Current perf event node frame. */
FGPUProfilerEventNodeFrame* CurrentEventNodeFrame = nullptr;
/** Current perf event node. */
FGPUProfilerEventNode* CurrentEventNode;
int32 StackDepth;
FGPUProfiler() :
bTrackingEvents(false),
bTrackingGPUCrashData(false),
bLatchedGProfilingGPU(false),
bLatchedGProfilingGPUHitches(false),
bPreviousLatchedGProfilingGPUHitches(false),
bOriginalGEmitDrawEvents(false),
GPUHitchDebounce(0),
GPUCrashDataDepth(-1),
CurrentEventNodeFrame(NULL),
CurrentEventNode(NULL),
StackDepth(0)
{
}
virtual ~FGPUProfiler()
{
}
void RegisterGPUWork(uint32 NumDraws, uint32 NumPrimitives, uint32 NumVertices)
{
if (bTrackingEvents && CurrentEventNode)
{
check(IsInRenderingThread() || IsInRHIThread());
CurrentEventNode->NumDraws += NumDraws;
CurrentEventNode->NumPrimitives += NumPrimitives;
CurrentEventNode->NumVertices += NumVertices;
}
}
void RegisterGPUWork(uint32 NumPrimitives = 0, uint32 NumVertices = 0)
{
RegisterGPUWork(1, NumPrimitives, NumVertices);
}
void RegisterGPUDispatch(FIntVector GroupCount)
{
if (bTrackingEvents && CurrentEventNode)
{
check(IsInRenderingThread() || IsInRHIThread());
CurrentEventNode->NumDispatches++;
CurrentEventNode->GroupCount = GroupCount;
}
}
virtual FGPUProfilerEventNode* CreateEventNode(const TCHAR* InName, FGPUProfilerEventNode* InParent)
{
return new FGPUProfilerEventNode(InName, InParent);
}
RHI_API virtual void PushEvent(const TCHAR* Name, FColor Color);
RHI_API virtual void PopEvent();
bool IsProfilingGPU() const { return bTrackingEvents; }
};
#endif
//
// Type used to pipe GPU frame timings from the end-of-pipe / RHI threads up to the game / render threads.
// Stores a history of GPU frame timings, which can be retrieved by engine code via:
//
// static FRHIGPUFrameTimeHistory::FState GPUFrameTimeState;
// uint64 GPUFrameTimeCycles64;
// while (GPUFrameTimeState.PopFrameCycles(GPUFrameTimeCycles64) != FRHIGPUFrameTimeHistory::EResult::Empty)
// {
// ...
// }
//
class FRHIGPUFrameTimeHistory
{
public:
enum class EResult
{
// The next frame timing has been retrieved
Ok,
// The next frame timing has been retrieved, but the client has also missed some frames.
Disjoint,
// No new frame timing data available.
Empty
};
class FState
{
friend FRHIGPUFrameTimeHistory;
uint64 NextIndex = 0;
public:
RHI_API EResult PopFrameCycles(uint64& OutCycles64);
};
private:
// Total number of GPU frame timings to store
static constexpr uint32 MaxLength = 16;
uint64 NextIndex = 0;
TStaticArray<uint64, MaxLength> History { InPlace, 0 };
FCriticalSection CS;
EResult PopFrameCycles(FState& State, uint64& OutCycles64);
public:
// Called by platform RHIs to submit new GPU timing data
RHI_API void PushFrameCycles(double GPUFrequency, uint64 GPUCycles);
};
extern RHI_API FRHIGPUFrameTimeHistory GRHIGPUFrameTimeHistory;