327 lines
10 KiB
C++
327 lines
10 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "D3D12GPUProfiler.h"
|
|
#include "D3D12Adapter.h"
|
|
#include "D3D12Device.h"
|
|
#include "Engine/Engine.h"
|
|
#include "Engine/GameViewportClient.h"
|
|
|
|
int32 GD3D12RHIStablePowerState = 0;
|
|
static FAutoConsoleVariableRef CVarD3D12RHIStablePowerState(
|
|
TEXT("D3D12.StablePowerState"),
|
|
GD3D12RHIStablePowerState,
|
|
TEXT("Enable stable power state. This increases GPU timing measurement accuracy but may decrease overall GPU clock rate.\n")
|
|
TEXT(" 0 (default): off\n")
|
|
TEXT(" 1 : set during profiling\n")
|
|
TEXT(" 2 : set on startup\n"),
|
|
ECVF_Default
|
|
);
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
|
|
FD3D12BufferedGPUTiming::FD3D12BufferedGPUTiming(FD3D12Device* InParent)
|
|
: FD3D12DeviceChild(InParent)
|
|
{
|
|
}
|
|
|
|
FD3D12BufferedGPUTiming::~FD3D12BufferedGPUTiming() = default;
|
|
|
|
void FD3D12BufferedGPUTiming::Initialize(FD3D12Adapter* ParentAdapter)
|
|
{
|
|
StaticInitialize(ParentAdapter, [](void* UserData)
|
|
{
|
|
// Are the static variables initialized?
|
|
check(!GAreGlobalsInitialized);
|
|
|
|
FD3D12Adapter* ParentAdapter = (FD3D12Adapter*)UserData;
|
|
CalibrateTimers(ParentAdapter);
|
|
});
|
|
}
|
|
|
|
void FD3D12BufferedGPUTiming::CalibrateTimers(FD3D12Adapter* ParentAdapter)
|
|
{
|
|
for (uint32 GPUIndex : FRHIGPUMask::All())
|
|
{
|
|
FD3D12Device* Device = ParentAdapter->GetDevice(GPUIndex);
|
|
|
|
uint64 TimingFrequency = Device->GetTimestampFrequency(ED3D12QueueType::Direct);
|
|
SetTimingFrequency(TimingFrequency, GPUIndex);
|
|
|
|
FGPUTimingCalibrationTimestamp CalibrationTimestamp = Device->GetCalibrationTimestamp(ED3D12QueueType::Direct);
|
|
SetCalibrationTimestamp(CalibrationTimestamp, GPUIndex);
|
|
}
|
|
}
|
|
|
|
void FD3D12BufferedGPUTiming::StartTiming()
|
|
{
|
|
FD3D12Device* Device = GetParentDevice();
|
|
ID3D12Device* D3DDevice = Device->GetDevice();
|
|
|
|
// Issue a timestamp query for the 'start' time.
|
|
if (GIsSupported && !bIsTiming)
|
|
{
|
|
// Check to see if stable power state cvar has changed
|
|
const bool bStablePowerStateCVar = GD3D12RHIStablePowerState != 0;
|
|
if (bStablePowerState != bStablePowerStateCVar)
|
|
{
|
|
if (SUCCEEDED(D3DDevice->SetStablePowerState(bStablePowerStateCVar)))
|
|
{
|
|
// SetStablePowerState succeeded. Update timing frequency.
|
|
uint64 TimingFrequency = Device->GetTimestampFrequency(ED3D12QueueType::Direct);
|
|
SetTimingFrequency(TimingFrequency, Device->GetGPUIndex());
|
|
bStablePowerState = bStablePowerStateCVar;
|
|
}
|
|
else
|
|
{
|
|
// SetStablePowerState failed. This can occur if SDKLayers is not present on the system.
|
|
CVarD3D12RHIStablePowerState->Set(0, ECVF_SetByConsole);
|
|
}
|
|
}
|
|
|
|
FD3D12CommandContext& CmdContext = Device->GetDefaultCommandContext();
|
|
CmdContext.InsertTimestamp(ED3D12Units::Raw, &Begin.Result);
|
|
|
|
Begin.SyncPoint = CmdContext.GetContextSyncPoint();
|
|
|
|
bIsTiming = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12BufferedGPUTiming::EndTiming()
|
|
{
|
|
// Issue a timestamp query for the 'end' time.
|
|
if (GIsSupported && bIsTiming)
|
|
{
|
|
FD3D12CommandContext& CmdContext = GetParentDevice()->GetDefaultCommandContext();
|
|
CmdContext.InsertTimestamp(ED3D12Units::Raw, &End.Result);
|
|
|
|
End.SyncPoint = CmdContext.GetContextSyncPoint();
|
|
|
|
bIsTiming = false;
|
|
}
|
|
}
|
|
|
|
uint64 FD3D12BufferedGPUTiming::GetTiming()
|
|
{
|
|
if (End.SyncPoint)
|
|
End.SyncPoint->Wait();
|
|
|
|
if (Begin.SyncPoint)
|
|
Begin.SyncPoint->Wait();
|
|
|
|
return End.Result >= Begin.Result
|
|
? End.Result - Begin.Result
|
|
: 0;
|
|
}
|
|
|
|
FD3D12EventNode::FD3D12EventNode(const TCHAR* InName, FGPUProfilerEventNode* InParent, FD3D12Device* InParentDevice)
|
|
: FGPUProfilerEventNode(InName, InParent)
|
|
, FD3D12DeviceChild(InParentDevice)
|
|
, Timing(InParentDevice)
|
|
{
|
|
}
|
|
|
|
FD3D12EventNode::~FD3D12EventNode() = default;
|
|
|
|
float FD3D12EventNode::GetTiming()
|
|
{
|
|
float Result = 0;
|
|
|
|
if (Timing.IsSupported())
|
|
{
|
|
// Get the timing result and block the CPU until it is ready
|
|
const uint64 GPUTiming = Timing.GetTiming();
|
|
const uint64 GPUFreq = Timing.GetTimingFrequency(GetParentDevice()->GetGPUIndex());
|
|
|
|
Result = double(GPUTiming) / double(GPUFreq);
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
FD3D12EventNodeFrame::FD3D12EventNodeFrame(FD3D12Device* InParent)
|
|
: FGPUProfilerEventNodeFrame()
|
|
, FD3D12DeviceChild(InParent)
|
|
, RootEventTiming(InParent)
|
|
{
|
|
}
|
|
|
|
FD3D12EventNodeFrame::~FD3D12EventNodeFrame() = default;
|
|
|
|
void FD3D12EventNodeFrame::StartFrame()
|
|
{
|
|
EventTree.Reset();
|
|
RootEventTiming.StartTiming();
|
|
}
|
|
|
|
void FD3D12EventNodeFrame::EndFrame()
|
|
{
|
|
RootEventTiming.EndTiming();
|
|
}
|
|
|
|
float FD3D12EventNodeFrame::GetRootTimingResults()
|
|
{
|
|
double RootResult = 0.0f;
|
|
if (RootEventTiming.IsSupported())
|
|
{
|
|
const uint64 GPUTiming = RootEventTiming.GetTiming();
|
|
const uint64 GPUFreq = RootEventTiming.GetTimingFrequency(GetParentDevice()->GetGPUIndex());
|
|
|
|
RootResult = double(GPUTiming) / double(GPUFreq);
|
|
}
|
|
|
|
return (float)RootResult;
|
|
}
|
|
|
|
void FD3D12GPUProfiler::BeginFrame()
|
|
{
|
|
CurrentEventNode = NULL;
|
|
check(!bTrackingEvents);
|
|
check(!CurrentEventNodeFrame); // this should have already been cleaned up and the end of the previous frame
|
|
|
|
// latch the bools from the game thread into our private copy
|
|
bLatchedGProfilingGPU = GTriggerGPUProfile;
|
|
bLatchedGProfilingGPUHitches = GTriggerGPUHitchProfile;
|
|
if (bLatchedGProfilingGPUHitches)
|
|
{
|
|
bLatchedGProfilingGPU = false; // we do NOT permit an ordinary GPU profile during hitch profiles
|
|
}
|
|
|
|
// if we are starting a hitch profile or this frame is a gpu profile, then save off the state of the draw events
|
|
if (bLatchedGProfilingGPU || (!bPreviousLatchedGProfilingGPUHitches && bLatchedGProfilingGPUHitches))
|
|
{
|
|
bOriginalGEmitDrawEvents = GetEmitDrawEvents();
|
|
}
|
|
|
|
if (bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches)
|
|
{
|
|
if (bLatchedGProfilingGPUHitches && GPUHitchDebounce)
|
|
{
|
|
// if we are doing hitches and we had a recent hitch, wait to recover
|
|
// the reasoning is that collecting the hitch report may itself hitch the GPU
|
|
GPUHitchDebounce--;
|
|
}
|
|
else
|
|
{
|
|
SetEmitDrawEvents(true); // thwart an attempt to turn this off on the game side
|
|
bTrackingEvents = true;
|
|
|
|
CurrentEventNodeFrame = new FD3D12EventNodeFrame(GetParentDevice());
|
|
CurrentEventNodeFrame->StartFrame();
|
|
}
|
|
}
|
|
else if (bPreviousLatchedGProfilingGPUHitches)
|
|
{
|
|
// hitch profiler is turning off, clear history and restore draw events
|
|
GPUHitchEventNodeFrames.Empty();
|
|
SetEmitDrawEvents(bOriginalGEmitDrawEvents);
|
|
}
|
|
bPreviousLatchedGProfilingGPUHitches = bLatchedGProfilingGPUHitches;
|
|
}
|
|
|
|
void FD3D12GPUProfiler::EndFrame()
|
|
{
|
|
const uint32 GPUIndex = GetParentDevice()->GetGPUIndex();
|
|
|
|
// if we have a frame open, close it now.
|
|
if (CurrentEventNodeFrame)
|
|
{
|
|
CurrentEventNodeFrame->EndFrame();
|
|
Parent->GetDefaultCommandContext().FlushCommands();
|
|
}
|
|
|
|
check(!bTrackingEvents || bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches);
|
|
check(!bTrackingEvents || CurrentEventNodeFrame);
|
|
if (bLatchedGProfilingGPU)
|
|
{
|
|
if (bTrackingEvents)
|
|
{
|
|
SetEmitDrawEvents(bOriginalGEmitDrawEvents);
|
|
|
|
UE_LOG(LogD3D12RHI, Log, TEXT(""));
|
|
UE_LOG(LogD3D12RHI, Log, TEXT(""));
|
|
GTriggerGPUProfile = false;
|
|
bLatchedGProfilingGPU = false;
|
|
|
|
// Only dump the event tree and generate the screenshot for the first GPU. Eventually, we may want to collate
|
|
// profiling data for all GPUs into a single tree, but the short term goal is to make profiling in the editor
|
|
// functional at all with "-MaxGPUCount=2" (required to enable multiple GPUs for GPU Lightmass). In the editor,
|
|
// we don't actually render anything on the additional GPUs, but the editor's profile visualizer will pick up
|
|
// whatever event tree we dumped last, which will be the empty one from the last GPU, making the results
|
|
// useless without this code fix. Unreal Insights would be preferred for multi-GPU profiling outside the editor.
|
|
if (GPUIndex == 0)
|
|
{
|
|
CurrentEventNodeFrame->DumpEventTree();
|
|
|
|
if (RHIConfig::ShouldSaveScreenshotAfterProfilingGPU()
|
|
&& GEngine->GameViewport)
|
|
{
|
|
GEngine->GameViewport->Exec(NULL, TEXT("SCREENSHOT"), *GLog);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (bLatchedGProfilingGPUHitches)
|
|
{
|
|
//@todo this really detects any hitch, even one on the game thread.
|
|
// it would be nice to restrict the test to stalls on D3D, but for now...
|
|
// this needs to be out here because bTrackingEvents is false during the hitch debounce
|
|
static double LastTime = -1.0;
|
|
double Now = FPlatformTime::Seconds();
|
|
if (bTrackingEvents)
|
|
{
|
|
/** How long, in seconds a frame much be to be considered a hitch **/
|
|
const float HitchThreshold = RHIConfig::GetGPUHitchThreshold();
|
|
float ThisTime = Now - LastTime;
|
|
bool bHitched = (ThisTime > HitchThreshold) && LastTime > 0.0 && CurrentEventNodeFrame;
|
|
if (bHitched)
|
|
{
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("*******************************************************************************"));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("********** Hitch detected on CPU, frametime = %6.1fms"), ThisTime * 1000.0f);
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("*******************************************************************************"));
|
|
|
|
for (int32 Frame = 0; Frame < GPUHitchEventNodeFrames.Num(); Frame++)
|
|
{
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT(""));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT(""));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("********** GPU Frame: Current - %d"), GPUHitchEventNodeFrames.Num() - Frame);
|
|
GPUHitchEventNodeFrames[Frame].DumpEventTree();
|
|
}
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT(""));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT(""));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("********** GPU Frame: Current"));
|
|
CurrentEventNodeFrame->DumpEventTree();
|
|
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("*******************************************************************************"));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("********** End Hitch GPU Profile"));
|
|
UE_LOG(LogD3D12RHI, Warning, TEXT("*******************************************************************************"));
|
|
if (GEngine->GameViewport)
|
|
{
|
|
GEngine->GameViewport->Exec(NULL, TEXT("SCREENSHOT"), *GLog);
|
|
}
|
|
|
|
GPUHitchDebounce = 5; // don't trigger this again for a while
|
|
GPUHitchEventNodeFrames.Empty(); // clear history
|
|
}
|
|
else if (CurrentEventNodeFrame) // this will be null for discarded frames while recovering from a recent hitch
|
|
{
|
|
/** How many old frames to buffer for hitch reports **/
|
|
static const int32 HitchHistorySize = 4;
|
|
|
|
if (GPUHitchEventNodeFrames.Num() >= HitchHistorySize)
|
|
{
|
|
GPUHitchEventNodeFrames.RemoveAt(0);
|
|
}
|
|
GPUHitchEventNodeFrames.Add((FD3D12EventNodeFrame*)CurrentEventNodeFrame);
|
|
CurrentEventNodeFrame = NULL; // prevent deletion of this below; ke kept it in the history
|
|
}
|
|
}
|
|
LastTime = Now;
|
|
}
|
|
bTrackingEvents = false;
|
|
delete CurrentEventNodeFrame;
|
|
CurrentEventNodeFrame = NULL;
|
|
}
|
|
|
|
#endif // (RHI_NEW_GPU_PROFILER == 0)
|