Files
UnrealEngine/Engine/Source/Runtime/VulkanRHI/Private/VulkanUtil.cpp
2025-05-18 13:04:45 +08:00

1091 lines
33 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VulkanUtil.cpp: Vulkan Utility implementation.
=============================================================================*/
#include "VulkanRHIPrivate.h"
#include "VulkanUtil.h"
#include "VulkanPendingState.h"
#include "VulkanContext.h"
#include "VulkanMemory.h"
#include "Engine/GameEngine.h"
#include "Misc/App.h"
#include "Misc/OutputDeviceRedirector.h"
#include "RHIValidationContext.h"
#include "PipelineStateCache.h"
#include "Async/ParallelFor.h"
#include "HAL/FileManager.h"
#include "RenderCore.h"
#include "RHICoreNvidiaAftermath.h"
#include "VulkanRayTracing.h"
#include "VulkanQuery.h"
#if PLATFORM_ANDROID
#include "Android/AndroidStats.h"
#endif
FVulkanDynamicRHI* GVulkanRHI = nullptr;
extern CORE_API bool GIsGPUCrashed;
void SetVulkanResourceName(FVulkanDevice* Device, FVulkanTexture* Texture, const TCHAR* Name)
{
#if VULKAN_ENABLE_IMAGE_TRACKING_LAYER
VulkanRHI::BindDebugLabelName(Texture->Image, Name);
#endif
#if VULKAN_ENABLE_DUMP_LAYER
// TODO: this dies in the printf on android. Needs investigation.
#if !PLATFORM_ANDROID
VulkanRHI::PrintfBegin(*FString::Printf(TEXT("vkDebugMarkerSetObjectNameEXT(0x%p=%s)\n"), Texture->Image, Name));
#endif
#endif
#if VULKAN_ENABLE_DRAW_MARKERS
if (PFN_vkSetDebugUtilsObjectNameEXT SetDebugName = Device->GetSetDebugName())
{
FAnsiString AnsiName(Name);
VulkanRHI::SetDebugName(SetDebugName, Device->GetInstanceHandle(), Texture->Image, GetData(AnsiName));
}
#endif
}
#if (RHI_NEW_GPU_PROFILER == 0)
static FString EventDeepString(TEXT("EventTooDeep"));
static const uint32 EventDeepCRC = FCrc::StrCrc32<TCHAR>(*EventDeepString);
/**
* Initializes the static variables, if necessary.
*/
void FVulkanGPUTiming::PlatformStaticInitialize(void* UserData)
{
GIsSupported = false;
// Are the static variables initialized?
check( !GAreGlobalsInitialized );
FVulkanGPUTiming* Caller = (FVulkanGPUTiming*)UserData;
if (Caller && Caller->Device && FVulkanPlatform::SupportsTimestampRenderQueries())
{
const VkPhysicalDeviceLimits& Limits = Caller->Device->GetDeviceProperties().limits;
bool bSupportsTimestamps = (Limits.timestampComputeAndGraphics == VK_TRUE);
if (!bSupportsTimestamps)
{
UE_LOG(LogVulkanRHI, Warning, TEXT("Timestamps not supported on Device"));
return;
}
SetTimingFrequency((uint64)((1000.0 * 1000.0 * 1000.0) / Limits.timestampPeriod));
CalibrateTimers(*Caller->Device);
GIsSupported = true;
}
}
void FVulkanGPUTiming::CalibrateTimers(FVulkanDevice& Device)
{
if (Device.GetOptionalExtensions().HasEXTCalibratedTimestamps)
{
FGPUTimingCalibrationTimestamp CalibrationTimestamp = Device.GetCalibrationTimestamp();
SetCalibrationTimestamp(CalibrationTimestamp);
}
}
FVulkanGPUTiming::~FVulkanGPUTiming()
{
}
/**
* Initializes all Vulkan resources and if necessary, the static variables.
*/
void FVulkanGPUTiming::Initialize(uint32 PoolSize)
{
StaticInitialize(this, PlatformStaticInitialize);
bIsTiming = false;
}
/**
* Releases all Vulkan resources.
*/
void FVulkanGPUTiming::Release()
{
}
/**
* Start a GPU timing measurement.
*/
void FVulkanGPUTiming::StartTiming(FVulkanContextCommon* InContext)
{
// Issue a timestamp query for the 'start' time.
if (GIsSupported && !bIsTiming)
{
if (InContext == nullptr)
{
InContext = Context;
}
// In case we aren't reading queries, remove oldest
if (NumPendingQueries >= MaxPendingQueries)
{
DiscardOldestQuery();
if (NumPendingQueries >= MaxPendingQueries)
{
return;
}
}
check(!ActiveQuery);
ActiveQuery = new FPendingQuery;
FVulkanQueryPool* CurrentPool = InContext->GetCurrentTimestampQueryPool();
const uint32 IndexInPool = CurrentPool->ReserveQuery(&ActiveQuery->StartResult);
VulkanRHI::vkCmdWriteTimestamp(InContext->GetCommandBuffer().GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, CurrentPool->GetHandle(), IndexInPool);
ActiveQuery->StartSyncPoint = InContext->GetContextSyncPoint();
bIsTiming = true;
}
}
/**
* End a GPU timing measurement.
* The timing for this particular measurement will be resolved at a later time by the GPU.
*/
void FVulkanGPUTiming::EndTiming(FVulkanContextCommon* InContext)
{
// Issue a timestamp query for the 'end' time.
if (GIsSupported && bIsTiming)
{
if (InContext == nullptr)
{
InContext = Context;
}
check(ActiveQuery);
FVulkanQueryPool* CurrentPool = InContext->GetCurrentTimestampQueryPool();
const uint32 IndexInPool = CurrentPool->ReserveQuery(&ActiveQuery->EndResult);
VulkanRHI::vkCmdWriteTimestamp(InContext->GetCommandBuffer().GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, CurrentPool->GetHandle(), IndexInPool);
ActiveQuery->EndSyncPoint = InContext->GetContextSyncPoint();
PendingQueries.Enqueue(ActiveQuery);
NumPendingQueries++;
ActiveQuery = nullptr;
bIsTiming = false;
bEndTimestampIssued = true;
}
}
void FVulkanGPUTiming::DiscardOldestQuery()
{
FPendingQuery* PendingQuery = nullptr;
if (PendingQueries.Peek(PendingQuery))
{
if (!PendingQuery->StartSyncPoint->IsComplete() || !PendingQuery->EndSyncPoint->IsComplete())
{
FVulkanDynamicRHI::Get().ProcessInterruptQueueUntil(nullptr); // leave null, we don't want to force a wait on the SyncPoint
}
if (PendingQuery->StartSyncPoint->IsComplete() && PendingQuery->EndSyncPoint->IsComplete())
{
PendingQueries.Pop();
delete PendingQuery;
NumPendingQueries--;
}
}
}
/**
* Retrieves the most recently resolved timing measurement.
* The unit is the same as for FPlatformTime::Cycles(). Returns 0 if there are no resolved measurements.
*
* @return Value of the most recently resolved timing, or 0 if no measurements have been resolved by the GPU yet.
*/
uint64 FVulkanGPUTiming::GetTiming(bool bGetCurrentResultsAndBlock)
{
if (GIsSupported)
{
FPendingQuery* PendingQuery = nullptr;
while (PendingQueries.Peek(PendingQuery))
{
if (bGetCurrentResultsAndBlock && PendingQuery->StartSyncPoint.IsValid() && PendingQuery->EndSyncPoint.IsValid())
{
SCOPE_CYCLE_COUNTER(STAT_RenderQueryResultTime);
{
FRenderThreadIdleScope IdleScope(ERenderThreadIdleTypes::WaitingForGPUQuery);
// CPU wait for query results to be ready.
if (!PendingQuery->StartSyncPoint->IsComplete())
{
FVulkanDynamicRHI::Get().ProcessInterruptQueueUntil(PendingQuery->StartSyncPoint);
}
if (!PendingQuery->EndSyncPoint->IsComplete())
{
FVulkanDynamicRHI::Get().ProcessInterruptQueueUntil(PendingQuery->EndSyncPoint);
}
}
}
if (PendingQuery->EndSyncPoint->IsComplete() && PendingQuery->StartSyncPoint->IsComplete())
{
if (PendingQuery->EndResult > PendingQuery->StartResult)
{
// Only keep the most recent result
LastTime = PendingQuery->EndResult - PendingQuery->StartResult;
}
PendingQueries.Pop();
delete PendingQuery;
NumPendingQueries--;
}
else
{
break;
}
}
return LastTime;
}
return 0;
}
/** Start this frame of per tracking */
void FVulkanEventNodeFrame::StartFrame()
{
EventTree.Reset();
RootEventTiming.StartTiming();
}
/** End this frame of per tracking, but do not block yet */
void FVulkanEventNodeFrame::EndFrame()
{
RootEventTiming.EndTiming();
}
float FVulkanEventNodeFrame::GetRootTimingResults()
{
double RootResult = 0.0f;
if (RootEventTiming.IsSupported())
{
const uint64 GPUTiming = RootEventTiming.GetTiming(true);
// In milliseconds
RootResult = (double)GPUTiming / (double)RootEventTiming.GetTimingFrequency();
}
return (float)RootResult;
}
float FVulkanEventNode::GetTiming()
{
float Result = 0;
if (Timing.IsSupported())
{
const uint64 GPUTiming = Timing.GetTiming(true);
// In milliseconds
Result = (double)GPUTiming / (double)Timing.GetTimingFrequency();
}
return Result;
}
FVulkanGPUProfiler::FVulkanGPUProfiler(FVulkanContextCommon* InContext, FVulkanDevice* InDevice)
: bCommandlistSubmitted(false)
, Device(InDevice)
, CmdContext(InContext)
, bBeginFrame(false)
{
BeginFrame();
}
FVulkanGPUProfiler::~FVulkanGPUProfiler()
{
}
void FVulkanGPUProfiler::BeginFrame()
{
#if VULKAN_SUPPORTS_GPU_CRASH_DUMPS
if (UE::RHI::UseGPUCrashDebugging())
{
static auto* CrashCollectionEnableCvar = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.gpucrash.collectionenable"));
static auto* CrashCollectionDataDepth = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.gpucrash.datadepth"));
bTrackingGPUCrashData = CrashCollectionEnableCvar ? CrashCollectionEnableCvar->GetValueOnRenderThread() != 0 : false;
GPUCrashDataDepth = CrashCollectionDataDepth ? CrashCollectionDataDepth->GetValueOnRenderThread() : -1;
if (GPUCrashDataDepth == -1 || GPUCrashDataDepth > GMaxCrashBufferEntries)
{
static bool bChecked = false;
if (!bChecked)
{
bChecked = true;
UE_LOG(LogVulkanRHI, Warning, TEXT("Clamping r.gpucrash.datadepth to %d"), GMaxCrashBufferEntries);
}
GPUCrashDataDepth = GMaxCrashBufferEntries;
}
}
#endif
bCommandlistSubmitted = false;
CurrentEventNode = NULL;
check(!bTrackingEvents);
check(!CurrentEventNodeFrame); // this should have already been cleaned up and the end of the previous frame
bBeginFrame = true;
// latch the bools from the game thread into our private copy
bLatchedGProfilingGPU = GTriggerGPUProfile;
bLatchedGProfilingGPUHitches = GTriggerGPUHitchProfile;
if (bLatchedGProfilingGPUHitches)
{
bLatchedGProfilingGPU = false; // we do NOT permit an ordinary GPU profile during hitch profiles
}
// if we are starting a hitch profile or this frame is a gpu profile, then save off the state of the draw events
if (bLatchedGProfilingGPU || (!bPreviousLatchedGProfilingGPUHitches && bLatchedGProfilingGPUHitches))
{
bOriginalGEmitDrawEvents = GetEmitDrawEvents();
}
if (bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches)
{
if (bLatchedGProfilingGPUHitches && GPUHitchDebounce)
{
// if we are doing hitches and we had a recent hitch, wait to recover
// the reasoning is that collecting the hitch report may itself hitch the GPU
GPUHitchDebounce--;
}
else
{
SetEmitDrawEvents(true); // thwart an attempt to turn this off on the game side
bTrackingEvents = true;
CurrentEventNodeFrame = new FVulkanEventNodeFrame(CmdContext, Device);
CurrentEventNodeFrame->StartFrame();
}
}
else if (bPreviousLatchedGProfilingGPUHitches)
{
// hitch profiler is turning off, clear history and restore draw events
GPUHitchEventNodeFrames.Empty();
SetEmitDrawEvents(bOriginalGEmitDrawEvents);
}
bPreviousLatchedGProfilingGPUHitches = bLatchedGProfilingGPUHitches;
}
void FVulkanGPUProfiler::EndFrameBeforeSubmit()
{
if (GetEmitDrawEvents())
{
// Finish all open nodes
// This is necessary because timestamps must be issued before SubmitDone(), and SubmitDone() happens in RHIEndDrawingViewport instead of RHIEndFrame
while (CurrentEventNode)
{
UE_LOG(LogRHI, Warning, TEXT("POPPING BEFORE SUB"));
PopEvent();
}
bCommandlistSubmitted = true;
}
// if we have a frame open, close it now.
if (CurrentEventNodeFrame)
{
CurrentEventNodeFrame->EndFrame();
}
}
void FVulkanGPUProfiler::EndFrame()
{
EndFrameBeforeSubmit();
check(!bTrackingEvents || bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches);
if (bLatchedGProfilingGPU)
{
if (bTrackingEvents)
{
CmdContext->FlushCommands(EVulkanFlushFlags::None);
SetEmitDrawEvents(bOriginalGEmitDrawEvents);
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
check(CurrentEventNodeFrame);
CurrentEventNodeFrame->DumpEventTree();
GTriggerGPUProfile = false;
bLatchedGProfilingGPU = false;
}
}
else if (bLatchedGProfilingGPUHitches)
{
UE_LOG(LogRHI, Warning, TEXT("GPU hitch tracking not implemented on Vulkan"));
}
bTrackingEvents = false;
if (CurrentEventNodeFrame)
{
delete CurrentEventNodeFrame;
CurrentEventNodeFrame = nullptr;
}
bBeginFrame = false;
}
#if VULKAN_SUPPORTS_GPU_CRASH_DUMPS
void FVulkanGPUProfiler::PushMarkerForCrash(FVulkanCommandBuffer* CmdBuffer, VkBuffer DestBuffer, const TCHAR* Name)
{
uint32 CRC = 0;
if (GPUCrashDataDepth < 0 || PushPopStack.Num() < GPUCrashDataDepth)
{
CRC = FCrc::StrCrc32<TCHAR>(Name);
if (CachedStrings.Num() > 10000)
{
CachedStrings.Empty(10000);
CachedStrings.Emplace(EventDeepCRC, EventDeepString);
}
if (CachedStrings.Find(CRC) == nullptr)
{
CachedStrings.Emplace(CRC, FString(Name));
}
}
else
{
CRC = EventDeepCRC;
}
PushPopStack.Push(CRC);
FVulkanPlatform::WriteCrashMarker(Device->GetOptionalExtensions(), CmdBuffer, DestBuffer, TArrayView<uint32>(PushPopStack), true);
}
void FVulkanGPUProfiler::PopMarkerForCrash(FVulkanCommandBuffer* CmdBuffer, VkBuffer DestBuffer)
{
if (PushPopStack.Num() > 0)
{
PushPopStack.Pop(EAllowShrinking::No);
FVulkanPlatform::WriteCrashMarker(Device->GetOptionalExtensions(), CmdBuffer, DestBuffer, TArrayView<uint32>(PushPopStack), false);
}
}
void FVulkanGPUProfiler::DumpCrashMarkers(void* BufferData)
{
{
if (Device->GetOptionalExtensions().HasAMDBufferMarker)
{
UE_LOG(LogVulkanRHI, Error, TEXT("Breadcrumbs using VK_AMD_buffer_marker extension"));
}
else
{
UE_LOG(LogVulkanRHI, Error, TEXT("Breadcrumbs without extensions"));
}
uint32* Entries = (uint32*)BufferData;
uint32 NumCRCs = *Entries++;
for (uint32 Index = 0; Index < NumCRCs; ++Index)
{
const FString* Frame = CachedStrings.Find(*Entries);
UE_LOG(LogVulkanRHI, Error, TEXT("[GPU Breadcrumb] %i: %s (CRC 0x%x)"), Index, Frame ? *(*Frame) : TEXT("<undefined>"), *Entries);
++Entries;
}
}
if (Device->GetOptionalExtensions().HasNVDiagnosticCheckpoints)
{
struct FCheckpointDataNV : public VkCheckpointDataNV
{
FCheckpointDataNV()
{
ZeroVulkanStruct(*this, VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV);
}
};
TArray<FCheckpointDataNV> Data;
uint32 Num = 0;
VkQueue QueueHandle = Device->GetGraphicsQueue()->GetHandle();
VulkanDynamicAPI::vkGetQueueCheckpointDataNV(QueueHandle, &Num, nullptr);
if (Num > 0)
{
Data.AddDefaulted(Num);
VulkanDynamicAPI::vkGetQueueCheckpointDataNV(QueueHandle, &Num, &Data[0]);
check(Num == Data.Num());
for (uint32 Index = 0; Index < Num; ++Index)
{
check(Data[Index].sType == VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV);
uint32 Value = (uint32)(size_t)Data[Index].pCheckpointMarker;
const FString* Frame = CachedStrings.Find(Value);
UE_LOG(LogVulkanRHI, Error, TEXT("[VK_NV_device_diagnostic_checkpoints] %i: Stage %s (0x%08x), %s (CRC 0x%x)"),
Index, VK_TYPE_TO_STRING(VkPipelineStageFlagBits, Data[Index].stage), Data[Index].stage, Frame ? *(*Frame) : TEXT("<undefined>"), Value);
}
GLog->Panic();
}
}
}
#endif // VULKAN_SUPPORTS_GPU_CRASH_DUMPS
#if NV_AFTERMATH
void AftermathResolveMarkerCallback(const void* Marker, void** ResolvedMarkerData, uint32_t* MarkerSize)
{
#if VULKAN_SUPPORTS_NV_DIAGNOSTICS
FVulkanDevice* VulkanDevice = FVulkanDynamicRHI::Get().GetDevice();
if (VulkanDevice->GetOptionalExtensions().HasNVDiagnosticCheckpoints)
{
const uint32 Value = (uint32)(size_t)Marker;
const FString* MarkerName = VulkanDevice->GetImmediateContext().GetGPUProfiler().CachedStrings.Find(Value);
UE_LOG(LogVulkanRHI, Display, TEXT("[AftermathResolveMarkerCallback] Requested %u [%s]"), Value, MarkerName ? *(*MarkerName) : TEXT("<undefined>"));
if (MarkerName && !MarkerName->IsEmpty() && ResolvedMarkerData && MarkerSize)
{
const TArray<TCHAR, FString::AllocatorType>& CharArray = MarkerName->GetCharArray();
(*ResolvedMarkerData) = (void*)CharArray.GetData();
(*MarkerSize) = CharArray.Num() * CharArray.GetTypeSize();
}
}
#endif // VULKAN_SUPPORTS_NV_DIAGNOSTICS
}
void ConditionallyAddShader(const FVulkanShader* Value, TArray<const FVulkanShader*>& Shaders, TSet<const FVulkanShader*>& ShaderSet)
{
if (Value && !ShaderSet.Contains(Value))
{
Shaders.Add(Value);
ShaderSet.Add(Value);
}
}
bool IsPipelineUnused(const VulkanResourceFrameCounter& Counter, uint32_t Threshold)
{
// Check in case that it's ahead of the adapter, just in case it entered a strange state
const uint32 PipelineFrameIndex = Counter.Get();
return (PipelineFrameIndex <= GFrameNumberRenderThread) && (GFrameNumberRenderThread - PipelineFrameIndex) > Threshold;
}
void AftermathLateAssociate(float TimeLimitSeconds, uint32 FrameLimit)
{
uint64 CycleStart = FPlatformTime::Cycles64();
UE_LOG(LogVulkanRHI, Log, TEXT("Starting late shader associations..."));
TArray<const FVulkanShader*> Shaders;
TSet<const FVulkanShader*> ShaderSet;
uint32_t IgnoredPipelines = 0;
// Get active pipelines, allow one second for consolidation to finish
TArray<TRefCountPtr<FRHIResource>> PipelineResources;
PipelineStateCache::GetPipelineStates(PipelineResources, true, UE::FTimeout(FTimespan::FromSeconds(1)));
// Deduplicate shaders, Aftermath hashes are not local to the parent pipeline
for (FRHIResource* Resource : PipelineResources)
{
if (!Resource)
{
continue;
}
switch (Resource->GetType())
{
default:
checkNoEntry();
case RRT_GraphicsPipelineState:
{
auto* Pipeline = static_cast<FVulkanRHIGraphicsPipelineState*>(Resource);
if (IsPipelineUnused(Pipeline->FrameCounter, FrameLimit))
{
IgnoredPipelines++;
continue;
}
ConditionallyAddShader(static_cast<FVulkanVertexShader*>(Pipeline->GetShader(SF_Vertex)), Shaders, ShaderSet);
ConditionallyAddShader(static_cast<FVulkanGeometryShader*>(Pipeline->GetShader(SF_Geometry)), Shaders, ShaderSet);
ConditionallyAddShader(static_cast<FVulkanGeometryShader*>(Pipeline->GetShader(SF_Amplification)), Shaders, ShaderSet);
ConditionallyAddShader(static_cast<FVulkanGeometryShader*>(Pipeline->GetShader(SF_Mesh)), Shaders, ShaderSet);
ConditionallyAddShader(static_cast<FVulkanPixelShader*>(Pipeline->GetShader(SF_Pixel)), Shaders, ShaderSet);
break;
}
case RRT_ComputePipelineState:
{
auto* Pipeline = static_cast<FVulkanComputePipeline*>(Resource);
if (IsPipelineUnused(Pipeline->FrameCounter, FrameLimit))
{
IgnoredPipelines++;
continue;
}
ConditionallyAddShader(static_cast<FVulkanComputeShader*>(Pipeline->GetComputeShader()), Shaders, ShaderSet);
break;
}
case RRT_RayTracingPipelineState:
{
auto* Pipeline = static_cast<FVulkanRayTracingPipelineState*>(Resource);
if (IsPipelineUnused(Pipeline->FrameCounter, FrameLimit))
{
IgnoredPipelines++;
continue;
}
for (int32 i = 0; i < Pipeline->GetVulkanShaderNum(SF_RayGen); i++)
{
ConditionallyAddShader(Pipeline->GetVulkanShader(SF_RayGen, i), Shaders, ShaderSet);
}
for (int32 i = 0; i < Pipeline->GetVulkanShaderNum(SF_RayCallable); i++)
{
ConditionallyAddShader(Pipeline->GetVulkanShader(SF_RayCallable, i), Shaders, ShaderSet);
}
for (int32 i = 0; i < Pipeline->GetVulkanShaderNum(SF_RayHitGroup); i++)
{
ConditionallyAddShader(Pipeline->GetVulkanShader(SF_RayHitGroup, i), Shaders, ShaderSet);
}
for (int32 i = 0; i < Pipeline->GetVulkanShaderNum(SF_RayMiss); i++)
{
ConditionallyAddShader(Pipeline->GetVulkanShader(SF_RayMiss, i), Shaders, ShaderSet);
}
break;
}
}
}
UE_LOG(LogVulkanRHI, Log, TEXT("Late shader associations ignored %u pipelines based on frame fences"), IgnoredPipelines);
// Parallelize as much as possible to avoid timeouts
ParallelFor(Shaders.Num(), [CycleStart, TimeLimitSeconds, &Shaders](int32 Index)
{
// Aftermath handling is time constrained, if we hit the limit just stop
float Elapsed = FPlatformTime::ToSeconds64(FPlatformTime::Cycles64() - CycleStart);
if (Elapsed >= TimeLimitSeconds)
{
UE_CALL_ONCE([Elapsed]
{
UE_LOG(LogVulkanRHI, Warning, TEXT("Late shader associations timed out at %f.5ms"), Elapsed);
});
return;
}
const FVulkanShader* Shader = Shaders[Index];
FVulkanShader::FSpirvCode Code = Shader->GetSpirvCode();
ANSICHAR EntryPointName[1024];
Shader->GetEntryPoint(EntryPointName, 1024);
UE::RHICore::Nvidia::Aftermath::RegisterShaderBinary(
Code.GetCodeView().GetData(),
Code.GetCodeView().Num() * sizeof(uint32_t),
FString(EntryPointName)
);
});
double TimeMS = FPlatformTime::ToMilliseconds64(FPlatformTime::Cycles64() - CycleStart);
UE_LOG(LogVulkanRHI, Log, TEXT("Created late shader associations, took %f.5ms"), TimeMS);
}
#endif
#endif // (RHI_NEW_GPU_PROFILER == 0)
FVulkanStagingBuffer::~FVulkanStagingBuffer()
{
if (StagingBuffer)
{
check(Device);
Device->GetStagingManager().ReleaseBuffer(nullptr, StagingBuffer);
}
}
void* FVulkanStagingBuffer::Lock(uint32 Offset, uint32 NumBytes)
{
check(!bIsLocked);
bIsLocked = true;
const uint32 EndOffset = Offset + NumBytes;
checkf(EndOffset <= QueuedNumBytes, TEXT("Lock at Offset (%u) and NumBytes (%u) reads beyond the allocated size of the staging buffer (%u)"), Offset, NumBytes, QueuedNumBytes);
// make sure cached memory is invalidated
StagingBuffer->InvalidateMappedMemory();
return (void*)((uint8*)StagingBuffer->GetMappedPointer() + Offset);
}
void FVulkanStagingBuffer::Unlock()
{
check(bIsLocked);
bIsLocked = false;
}
FStagingBufferRHIRef FVulkanDynamicRHI::RHICreateStagingBuffer()
{
return new FVulkanStagingBuffer();
}
void* FVulkanDynamicRHI::RHILockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI, FRHIGPUFence* FenceRHI, uint32 Offset, uint32 NumBytes)
{
FVulkanStagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI);
checkSlow(!FenceRHI || FenceRHI->Poll());
return StagingBuffer->Lock(Offset, NumBytes);
}
void FVulkanDynamicRHI::RHIUnlockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI)
{
FVulkanStagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI);
StagingBuffer->Unlock();
}
extern int32 GAllowTimelineSemaphores;
FVulkanGPUFence::FVulkanGPUFence(FVulkanDevice& InDevice, FName InName)
: FRHIGPUFence(InName)
, Device(InDevice)
{
if (!GAllowTimelineSemaphores)
{
VkEventCreateInfo EventCreateInfo;
ZeroVulkanStruct(EventCreateInfo, VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
VERIFYVULKANRESULT(VulkanRHI::vkCreateEvent(InDevice.GetInstanceHandle(), &EventCreateInfo, VULKAN_CPU_ALLOCATOR, &Event));
VERIFYVULKANRESULT(VulkanRHI::vkResetEvent(InDevice.GetInstanceHandle(), Event));
}
}
FVulkanGPUFence::~FVulkanGPUFence()
{
if (Event)
{
Device.GetDeferredDeletionQueue().EnqueueResource(VulkanRHI::FDeferredDeletionQueue2::EType::Event, Event);
Event = VK_NULL_HANDLE;
}
}
void FVulkanGPUFence::Clear()
{
CompletedSyncPoint = nullptr;
if (Event)
{
VERIFYVULKANRESULT(VulkanRHI::vkResetEvent(Device.GetInstanceHandle(), Event));
SubmittedSyncPoint = nullptr;
}
}
bool FVulkanGPUFence::Poll() const
{
if (CompletedSyncPoint.IsValid() && CompletedSyncPoint->IsComplete())
{
return true;
}
if (Event && SubmittedSyncPoint.IsValid() && SubmittedSyncPoint->IsComplete())
{
return (VulkanRHI::vkGetEventStatus(Device.GetInstanceHandle(), Event) == VK_EVENT_SET);
}
return false;
}
void FVulkanGPUFence::Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const
{
if (!Poll())
{
SCOPED_NAMED_EVENT_TEXT("FVulkanGPUFence_Wait", FColor::Turquoise);
FVulkanDynamicRHI::Get().ProcessInterruptQueueUntil(CompletedSyncPoint);
}
}
FGPUFenceRHIRef FVulkanDynamicRHI::RHICreateGPUFence(const FName& Name)
{
return new FVulkanGPUFence(*Device, Name);
}
void FVulkanDynamicRHI::RHIWriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIGPUFence* FenceRHI)
{
FVulkanGPUFence* Fence = ResourceCast(FenceRHI);
check(Fence);
checkf(!Fence->SubmittedSyncPoint.IsValid() && !Fence->CompletedSyncPoint.IsValid(), TEXT("The fence for the current GPU node has already been issued."));
Fence->CompletedSyncPoint = CreateVulkanSyncPoint();
if (Fence->Event)
{
Fence->SubmittedSyncPoint = FGraphEvent::CreateGraphEvent();
}
Fence->NumPendingWriteCommands.Increment();
RHICmdList.EnqueueLambda([Fence](FRHICommandListBase& CmdList)
{
FVulkanCommandListContext& Context = FVulkanCommandListContext::Get(CmdList);
if (Fence->Event)
{
VulkanRHI::vkCmdSetEvent(Context.GetCommandBuffer().GetHandle(), Fence->Event, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
Context.AddSubmissionEvent(Fence->SubmittedSyncPoint);
Context.AddPendingSyncPoint(Fence->CompletedSyncPoint);
}
else
{
Context.SignalSyncPoint(Fence->CompletedSyncPoint);
}
Fence->NumPendingWriteCommands.Decrement();
});
}
namespace VulkanRHI
{
VkBuffer CreateBuffer(FVulkanDevice* InDevice, VkDeviceSize Size, VkBufferUsageFlags BufferUsageFlags, VkMemoryRequirements& OutMemoryRequirements)
{
VkDevice Device = InDevice->GetInstanceHandle();
VkBuffer Buffer = VK_NULL_HANDLE;
VkBufferCreateInfo BufferCreateInfo;
ZeroVulkanStruct(BufferCreateInfo, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
BufferCreateInfo.size = Size;
BufferCreateInfo.usage = BufferUsageFlags;
VERIFYVULKANRESULT_EXPANDED(VulkanRHI::vkCreateBuffer(Device, &BufferCreateInfo, VULKAN_CPU_ALLOCATOR, &Buffer));
VulkanRHI::vkGetBufferMemoryRequirements(Device, Buffer, &OutMemoryRequirements);
return Buffer;
}
void CheckDeviceFault(FVulkanDevice* InDevice)
{
if (InDevice->GetOptionalExtensions().HasEXTDeviceFault)
{
const VkDevice DeviceHandle = InDevice->GetInstanceHandle();
VkResult Result;
VkDeviceFaultCountsEXT FaultCounts;
ZeroVulkanStruct(FaultCounts, VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT);
Result = VulkanRHI::vkGetDeviceFaultInfoEXT(DeviceHandle, &FaultCounts, nullptr);
if (Result == VK_SUCCESS)
{
VkDeviceFaultInfoEXT FaultInfo;
ZeroVulkanStruct(FaultInfo, VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT);
TArray<VkDeviceFaultAddressInfoEXT> AddressInfos;
AddressInfos.SetNumZeroed(FaultCounts.addressInfoCount);
FaultInfo.pAddressInfos = AddressInfos.GetData();
TArray<VkDeviceFaultVendorInfoEXT> VendorInfos;
VendorInfos.SetNumZeroed(FaultCounts.vendorInfoCount);
FaultInfo.pVendorInfos = VendorInfos.GetData();
TArray<uint8> VendorBinaryData;
VendorBinaryData.SetNumZeroed(FaultCounts.vendorBinarySize);
FaultInfo.pVendorBinaryData = VendorBinaryData.GetData();
Result = VulkanRHI::vkGetDeviceFaultInfoEXT(DeviceHandle, &FaultCounts, &FaultInfo);
if (Result == VK_SUCCESS)
{
// :todo-jn: match these up to resources
auto ReportAddrToStr = [&AddressInfos]() {
FString AddrStr;
for (const VkDeviceFaultAddressInfoEXT& AddrInfo : AddressInfos)
{
const uint64_t LowerAddress = (AddrInfo.reportedAddress & ~(AddrInfo.addressPrecision - 1));
const uint64_t UpperAddress = (AddrInfo.reportedAddress | (AddrInfo.addressPrecision - 1));
AddrStr += FString::Printf(TEXT("\n - %s : 0x%016llX (range:0x%016llX-0x%016llX)"),
VK_TYPE_TO_STRING(VkDeviceFaultAddressTypeEXT, AddrInfo.addressType),
AddrInfo.reportedAddress, LowerAddress, UpperAddress);
}
return AddrStr;
};
auto ReportVendorToStr = [&VendorInfos]() {
FString VendorStr;
for (const VkDeviceFaultVendorInfoEXT& VendorInfo : VendorInfos)
{
VendorStr += FString::Printf(TEXT("\n - %s (code:0x%016llX data:0x%016llX)"),
StringCast<TCHAR>((UTF8CHAR*)VendorInfo.description).Get(), VendorInfo.vendorFaultCode, VendorInfo.vendorFaultData);
}
return VendorStr;
};
UE_LOG(LogVulkanRHI, Error,
TEXT("\nDEVICE FAULT REPORT:\n")
TEXT("* Description: %s\n")
TEXT("* Address Info: %s\n")
TEXT("* Vendor Info: %s\n")
TEXT("* Vendor Binary Size: %llu\n"),
StringCast<TCHAR>((UTF8CHAR*)FaultInfo.description).Get(), *ReportAddrToStr(), *ReportVendorToStr(), FaultCounts.vendorBinarySize
);
}
}
}
}
/**
* Checks that the given result isn't a failure. If it is, the application exits with an appropriate error message.
* @param Result - The result code to check
* @param Code - The code which yielded the result.
* @param VkFunction - Tested function name.
* @param Filename - The filename of the source file containing Code.
* @param Line - The line number of Code within Filename.
*/
void VerifyVulkanResult(VkResult Result, const ANSICHAR* VkFunction, const ANSICHAR* Filename, uint32 Line)
{
bool bDumpMemory = false;
FString ErrorString;
switch (Result)
{
#define VKERRORCASE(x) case x: ErrorString = TEXT(#x)
VKERRORCASE(VK_NOT_READY); break;
VKERRORCASE(VK_TIMEOUT); break;
VKERRORCASE(VK_EVENT_SET); break;
VKERRORCASE(VK_EVENT_RESET); break;
VKERRORCASE(VK_INCOMPLETE); break;
VKERRORCASE(VK_ERROR_OUT_OF_HOST_MEMORY); bDumpMemory = true; break;
VKERRORCASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); bDumpMemory = true; break;
VKERRORCASE(VK_ERROR_INITIALIZATION_FAILED); break;
VKERRORCASE(VK_ERROR_DEVICE_LOST);
GIsGPUCrashed = true;
#if PLATFORM_ANDROID
FAndroidStats::LogGPUStats();
#endif
break;
VKERRORCASE(VK_ERROR_MEMORY_MAP_FAILED); break;
VKERRORCASE(VK_ERROR_LAYER_NOT_PRESENT); break;
VKERRORCASE(VK_ERROR_EXTENSION_NOT_PRESENT); break;
VKERRORCASE(VK_ERROR_FEATURE_NOT_PRESENT); break;
VKERRORCASE(VK_ERROR_INCOMPATIBLE_DRIVER); break;
VKERRORCASE(VK_ERROR_TOO_MANY_OBJECTS); break;
VKERRORCASE(VK_ERROR_FORMAT_NOT_SUPPORTED); break;
VKERRORCASE(VK_ERROR_SURFACE_LOST_KHR); break;
VKERRORCASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); break;
VKERRORCASE(VK_SUBOPTIMAL_KHR); break;
VKERRORCASE(VK_ERROR_OUT_OF_DATE_KHR); break;
VKERRORCASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); break;
VKERRORCASE(VK_ERROR_VALIDATION_FAILED_EXT); break;
VKERRORCASE(VK_ERROR_INVALID_SHADER_NV); break;
VKERRORCASE(VK_ERROR_FRAGMENTED_POOL); break;
VKERRORCASE(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); break;
VKERRORCASE(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); break;
VKERRORCASE(VK_ERROR_NOT_PERMITTED_EXT); break;
#undef VKERRORCASE
default:
break;
}
#if VULKAN_HAS_DEBUGGING_ENABLED
if (Result == VK_ERROR_VALIDATION_FAILED_EXT)
{
if (GValidationCvar.GetValueOnRenderThread() == 0)
{
UE_LOG(LogVulkanRHI, Fatal, TEXT("Failed with Validation error. Try running with r.Vulkan.EnableValidation=1 or -vulkandebug to get information from the validation layers."));
}
}
#endif
UE_LOG(LogVulkanRHI, Error, TEXT("%s failed, VkResult=%d\n at %s:%u \n with error %s"),
ANSI_TO_TCHAR(VkFunction), (int32)Result, ANSI_TO_TCHAR(Filename), Line, *ErrorString);
if (GIsGPUCrashed)
{
FVulkanDevice* Device = GVulkanRHI->GetDevice();
#if RHI_NEW_GPU_PROFILER
checkNoEntry(); // @todo - new gpu profiler
#else
#if VULKAN_SUPPORTS_GPU_CRASH_DUMPS
if (UE::RHI::UseGPUCrashDebugging())
{
Device->GetImmediateContext().GetGPUProfiler().DumpCrashMarkers(Device->GetCrashMarkerMappedPointer());
}
#endif
#endif
CheckDeviceFault(Device);
// Make sure we wait on the Aftermath crash dump before we crash.
#if NV_AFTERMATH
TArray<UE::RHICore::Nvidia::Aftermath::FCrashResult> AftermathResults;
UE::RHICore::Nvidia::Aftermath::OnGPUCrash(AftermathResults);
#endif
UGameEngine* GameEngine = Cast<UGameEngine>(GEngine);
if (GameEngine)
{
GameEngine->OnGPUCrash();
}
}
#if UE_BUILD_DEBUG || UE_BUILD_DEVELOPMENT
if (bDumpMemory)
{
GVulkanRHI->DumpMemory();
}
#endif
UE_LOG(LogVulkanRHI, Fatal, TEXT("%s failed, VkResult=%d\n at %s:%u \n with error %s"),
ANSI_TO_TCHAR(VkFunction), (int32)Result, ANSI_TO_TCHAR(Filename), Line, *ErrorString);
// Force shutdown, we can't do anything useful anymore.
// FPlatformMisc::RequestExit(true, TEXT("VerifyVulkanResult"));
}
}
DEFINE_STAT(STAT_VulkanNumPSOs);
DEFINE_STAT(STAT_VulkanNumGraphicsPSOs);
DEFINE_STAT(STAT_VulkanNumPSOLRU);
DEFINE_STAT(STAT_VulkanNumPSOLRUSize);
DEFINE_STAT(STAT_VulkanPSOLookupTime);
DEFINE_STAT(STAT_VulkanPSOCreationTime);
DEFINE_STAT(STAT_VulkanPSOHeaderInitTime);
DEFINE_STAT(STAT_VulkanPSOVulkanCreationTime);
DEFINE_STAT(STAT_VulkanNumComputePSOs);
DEFINE_STAT(STAT_VulkanPSOKeyMemory);
DEFINE_STAT(STAT_VulkanDrawCallTime);
DEFINE_STAT(STAT_VulkanDispatchCallTime);
DEFINE_STAT(STAT_VulkanDrawCallPrepareTime);
DEFINE_STAT(STAT_VulkanCustomPresentTime);
DEFINE_STAT(STAT_VulkanDispatchCallPrepareTime);
DEFINE_STAT(STAT_VulkanGetOrCreatePipeline);
DEFINE_STAT(STAT_VulkanGetDescriptorSet);
DEFINE_STAT(STAT_VulkanPipelineBind);
DEFINE_STAT(STAT_VulkanNumCmdBuffers);
DEFINE_STAT(STAT_VulkanNumRenderPasses);
DEFINE_STAT(STAT_VulkanNumFrameBuffers);
DEFINE_STAT(STAT_VulkanNumBufferViews);
DEFINE_STAT(STAT_VulkanNumImageViews);
DEFINE_STAT(STAT_VulkanNumPhysicalMemAllocations);
DEFINE_STAT(STAT_VulkanTempFrameAllocationBuffer);
DEFINE_STAT(STAT_VulkanDynamicVBSize);
DEFINE_STAT(STAT_VulkanDynamicIBSize);
DEFINE_STAT(STAT_VulkanDynamicVBLockTime);
DEFINE_STAT(STAT_VulkanDynamicIBLockTime);
DEFINE_STAT(STAT_VulkanUPPrepTime);
DEFINE_STAT(STAT_VulkanUniformBufferCreateTime);
DEFINE_STAT(STAT_VulkanApplyDSUniformBuffers);
DEFINE_STAT(STAT_VulkanApplyPackedUniformBuffers);
DEFINE_STAT(STAT_VulkanBarrierTime);
DEFINE_STAT(STAT_VulkanSRVUpdateTime);
DEFINE_STAT(STAT_VulkanUAVUpdateTime);
DEFINE_STAT(STAT_VulkanDeletionQueue);
DEFINE_STAT(STAT_VulkanQueueSubmit);
DEFINE_STAT(STAT_VulkanQueuePresent);
DEFINE_STAT(STAT_VulkanNumQueries);
DEFINE_STAT(STAT_VulkanNumQueryPools);
DEFINE_STAT(STAT_VulkanWaitQuery);
DEFINE_STAT(STAT_VulkanWaitFence);
DEFINE_STAT(STAT_VulkanResetQuery);
DEFINE_STAT(STAT_VulkanWaitSwapchain);
DEFINE_STAT(STAT_VulkanAcquireBackBuffer);
DEFINE_STAT(STAT_VulkanStagingBuffer);
DEFINE_STAT(STAT_VulkanVkCreateDescriptorPool);
DEFINE_STAT(STAT_VulkanNumDescPools);
DEFINE_STAT(STAT_VulkanUpdateUniformBuffers);
DEFINE_STAT(STAT_VulkanUpdateUniformBuffersRename);
#if VULKAN_ENABLE_AGGRESSIVE_STATS
DEFINE_STAT(STAT_VulkanUpdateDescriptorSets);
DEFINE_STAT(STAT_VulkanNumUpdateDescriptors);
DEFINE_STAT(STAT_VulkanNumDescSets);
DEFINE_STAT(STAT_VulkanSetUniformBufferTime);
DEFINE_STAT(STAT_VulkanVkUpdateDS);
DEFINE_STAT(STAT_VulkanBindVertexStreamsTime);
#endif
DEFINE_STAT(STAT_VulkanNumDescSetsTotal);