Files
UnrealEngine/Engine/Source/Runtime/VulkanRHI/Private/VulkanQuery.cpp
2025-05-18 13:04:45 +08:00

386 lines
13 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VulkanQuery.cpp: Vulkan query RHI implementation.
=============================================================================*/
#include "VulkanRHIPrivate.h"
#include "VulkanDevice.h"
#include "VulkanResources.h"
#include "VulkanContext.h"
#include "VulkanCommandBuffer.h"
#include "VulkanQuery.h"
#include "EngineGlobals.h"
#include "RenderCore.h"
#if VULKAN_QUERY_CALLSTACK
#include "HAL/PlatformStackwalk.h"
#endif
static int32 GTimestampQueryStage = 0;
FAutoConsoleVariableRef CVarTimestampQueryStage(
TEXT("r.Vulkan.TimestampQueryStage"),
GTimestampQueryStage,
TEXT("Defines which pipeline stage is used for timestamp queries.\n")
TEXT(" 0: Use VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, less precise measures but less likely to alter performance (default)\n")
TEXT(" 1: Use VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, more precise measures but may alter performance on some platforms"),
ECVF_RenderThreadSafe
);
static int32 GTimingQueryPoolSize = 128;
FAutoConsoleVariableRef CVarTimingQueryPoolSize(
TEXT("r.Vulkan.TimingQueryPoolSize"),
GTimingQueryPoolSize,
TEXT("Amount of timing queries per pool in reusable query pools. (Default: 128)\n"),
ECVF_ReadOnly
);
static int32 GQueryPoolDeletionDelay = 10;
FAutoConsoleVariableRef CVarQueryPoolDeletionDelay(
TEXT("r.Vulkan.QueryPoolDeletionDelay"),
GQueryPoolDeletionDelay,
TEXT("Amount of frames to wait before deleting an unused query pools. (Default: 10)\n"),
ECVF_ReadOnly
);
FVulkanQueryPool::FVulkanQueryPool(FVulkanDevice& InDevice, uint32 InMaxQueries, EVulkanQueryPoolType InQueryType)
: Device(InDevice)
, QueryPool(VK_NULL_HANDLE)
, MaxQueries(InMaxQueries)
, QueryType(InQueryType)
{
INC_DWORD_STAT(STAT_VulkanNumQueryPools);
VkQueryPoolCreateInfo PoolCreateInfo;
ZeroVulkanStruct(PoolCreateInfo, VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
PoolCreateInfo.queryType = GetVkQueryType(InQueryType);
PoolCreateInfo.queryCount = MaxQueries;
VERIFYVULKANRESULT(VulkanRHI::vkCreateQueryPool(Device.GetInstanceHandle(), &PoolCreateInfo, VULKAN_CPU_ALLOCATOR, &QueryPool));
}
FVulkanQueryPool::~FVulkanQueryPool()
{
DEC_DWORD_STAT(STAT_VulkanNumQueryPools);
VulkanRHI::vkDestroyQueryPool(Device.GetInstanceHandle(), QueryPool, VULKAN_CPU_ALLOCATOR);
QueryPool = VK_NULL_HANDLE;
}
void FVulkanQueryPool::ReserveQuery(FVulkanRenderQuery* Query)
{
checkSlow(QueryType == Query->GetQueryPoolType());
Query->IndexInPool = CurrentQueryCount++;
QueryRefs[Query->IndexInPool] = Query;
QueryResults[Query->IndexInPool] = &Query->Result;
}
uint32 FVulkanQueryPool::ReserveQuery(uint64* ResultPtr)
{
checkSlow(QueryType == EVulkanQueryPoolType::Timestamp);
const uint32 IndexInPool = CurrentQueryCount++;
QueryRefs[IndexInPool] = nullptr;
QueryResults[IndexInPool] = ResultPtr;
return IndexInPool;
}
void FVulkanQueryPool::Reset(FVulkanCommandBuffer& InCmdBuffer)
{
CurrentQueryCount = 0;
UnusedFrameCount = 0;
QueryRefs.Empty(MaxQueries);
QueryResults.Empty(MaxQueries);
QueryRefs.AddDefaulted(MaxQueries);
QueryResults.SetNumZeroed(MaxQueries);
if (Device.GetOptionalExtensions().HasEXTHostQueryReset)
{
VulkanRHI::vkResetQueryPoolEXT(GetDevice().GetInstanceHandle(), QueryPool, 0, MaxQueries);
}
else
{
VulkanRHI::vkCmdResetQueryPool(InCmdBuffer.GetHandle(), QueryPool, 0, MaxQueries);
}
}
bool FVulkanQueryPool::IsStale() const
{
return UnusedFrameCount >= GQueryPoolDeletionDelay;
}
void FVulkanCommandListContext::BeginOcclusionQueryBatch(uint32 NumQueriesInBatch)
{
TArray<FVulkanQueryPool*>& OcclusionPoolArray = GetQueryPoolArray(EVulkanQueryPoolType::Occlusion);
FVulkanQueryPool* NewOcclusionQueryPool = Device.AcquireOcclusionQueryPool(NumQueriesInBatch);
NewOcclusionQueryPool->Reset(GetCommandBuffer());
OcclusionPoolArray.Add(NewOcclusionQueryPool);
}
FVulkanQueryPool* FVulkanDevice::AcquireOcclusionQueryPool(uint32 NumQueries)
{
FScopeLock Lock(&QueryPoolLock);
// At least add one query
NumQueries = FMath::Max(1u, AlignArbitrary(NumQueries, 256));
OcclusionQueryPoolSize = FMath::Max(OcclusionQueryPoolSize, NumQueries);
TArray<FVulkanQueryPool*>& FreeOcclusionPools = FreeQueryPools[(int32)EVulkanQueryPoolType::Occlusion];
// Destroy pools that can't accomodate our new minimum size
for (int32 Index = FreeOcclusionPools.Num() - 1; Index >= 0; --Index)
{
FVulkanQueryPool* Pool = FreeOcclusionPools[Index];
checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Occlusion));
if (Pool->GetMaxQueries() < OcclusionQueryPoolSize)
{
delete Pool;
FreeOcclusionPools.RemoveAtSwap(Index, EAllowShrinking::No);
}
}
if (FreeOcclusionPools.Num())
{
FVulkanQueryPool* Pool = FreeOcclusionPools.Pop(EAllowShrinking::No);
checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Occlusion));
return Pool;
}
FVulkanQueryPool* Pool = new FVulkanQueryPool(*this, OcclusionQueryPoolSize, EVulkanQueryPoolType::Occlusion);
return Pool;
}
FVulkanQueryPool* FVulkanDevice::AcquireTimingQueryPool()
{
FScopeLock Lock(&QueryPoolLock);
TArray<FVulkanQueryPool*>& FreeTimingPools = FreeQueryPools[(int32)EVulkanQueryPoolType::Timestamp];
if (FreeTimingPools.Num())
{
FVulkanQueryPool* Pool = FreeTimingPools.Pop(EAllowShrinking::No);
checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Timestamp));
return Pool;
}
return new FVulkanQueryPool(*this, GTimingQueryPoolSize, EVulkanQueryPoolType::Timestamp);
}
void FVulkanDevice::ReleaseQueryPool(FVulkanQueryPool* Pool)
{
FScopeLock Lock(&QueryPoolLock);
FreeQueryPools[(int32)Pool->GetPoolType()].Add(Pool);
}
void FVulkanDevice::RemoveStaleQueryPools()
{
FScopeLock Lock(&QueryPoolLock);
for (TArray<FVulkanQueryPool*>& PoolArray : FreeQueryPools)
{
for (int32 Index = PoolArray.Num() - 1; Index >= 0; --Index)
{
FVulkanQueryPool* Pool = PoolArray[Index];
checkSlow(Pool);
if (Pool->IsStale())
{
delete Pool;
PoolArray.RemoveAtSwap(Index, EAllowShrinking::No);
}
else
{
Pool->IncrementUnusedFrameCount();
}
}
}
}
FVulkanRenderQuery::FVulkanRenderQuery(ERenderQueryType InType)
: QueryType(InType)
{
INC_DWORD_STAT(STAT_VulkanNumQueries);
}
FVulkanRenderQuery::~FVulkanRenderQuery()
{
check(!SyncPoint.IsValid() || SyncPoint->IsComplete());
DEC_DWORD_STAT(STAT_VulkanNumQueries);
}
FRenderQueryRHIRef FVulkanDynamicRHI::RHICreateRenderQuery(ERenderQueryType QueryType)
{
ensureMsgf((QueryType == RQT_Occlusion) || (QueryType == RQT_AbsoluteTime), TEXT("Unknown QueryType %d"), QueryType);
return new FVulkanRenderQuery(QueryType);
}
bool FVulkanDynamicRHI::RHIGetRenderQueryResult(FRHIRenderQuery* QueryRHI, uint64& OutQueryResult, bool bWait, uint32 GPUIndex)
{
FVulkanRenderQuery* Query = ResourceCast(QueryRHI);
if (!ensureMsgf(Query->SyncPoint, TEXT("Attempt to get result data for an FRHIRenderQuery that was never used in a command list.")))
{
OutQueryResult = 0;
return false;
}
if (!Query->SyncPoint->IsComplete())
{
if (bWait)
{
FRenderThreadIdleScope IdleScope(ERenderThreadIdleTypes::WaitingForGPUQuery);
ProcessInterruptQueueUntil(Query->SyncPoint);
}
else
{
return false;
}
}
checkSlow(Query->SyncPoint->IsComplete());
if (Query->QueryType == RQT_Occlusion)
{
OutQueryResult = Query->Result;
return true;
}
else if (Query->QueryType == RQT_AbsoluteTime)
{
const VkPhysicalDeviceLimits& Limits = Device->GetDeviceProperties().limits;
const double TimingFrequency = (double)((uint64)((1000.0 * 1000.0 * 1000.0) / Limits.timestampPeriod));
OutQueryResult = (uint64)((double(Query->Result) / TimingFrequency) * 1000.0 * 1000.0);
return true;
}
return false;
}
void FVulkanDynamicRHI::RHIEndRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery)
{
const uint32 GPUIndex = 0;
FVulkanRenderQuery* Query = ResourceCast(RenderQuery);
auto& QueryBatchData = RHICmdList.GetQueryBatchData(Query->QueryType);
if (QueryBatchData[GPUIndex])
{
// This query belongs to a batch. Use the sync point we created earlier
Query->SyncPoint = static_cast<FVulkanSyncPoint*>(QueryBatchData[GPUIndex]);
}
else
{
// Queries issued outside of a batch use one sync point per query.
// check(Query->SyncPoint == nullptr);
Query->SyncPoint = CreateVulkanSyncPoint();
RHICmdList.EnqueueLambda([SyncPoint = Query->SyncPoint](FRHICommandListBase& ExecutingCmdList)
{
FVulkanCommandListContext& Context = FVulkanCommandListContext::Get(ExecutingCmdList);
Context.AddPendingSyncPoint(SyncPoint);
});
}
// Enqueue the RHI command to record the EndQuery() call on the context.
FDynamicRHI::RHIEndRenderQuery_TopOfPipe(RHICmdList, RenderQuery);
}
void FVulkanDynamicRHI::RHIBeginRenderQueryBatch_TopOfPipe(FRHICommandListBase& RHICmdList, ERenderQueryType QueryType)
{
// Each query batch uses a single sync point to signal when the results are ready.
const uint32 GPUIndex = 0;
auto& QueryBatchData = RHICmdList.GetQueryBatchData(QueryType);
checkf(QueryBatchData[GPUIndex] == nullptr, TEXT("A query batch for this type has already begun on this command list."));
FVulkanSyncPointRef SyncPoint = CreateVulkanSyncPoint();
// Keep a reference in the RHI command list, so we can retrieve it later in BeginQuery/EndQuery/EndBatch.
QueryBatchData[GPUIndex] = SyncPoint.GetReference();
SyncPoint->AddRef();
}
void FVulkanDynamicRHI::RHIEndRenderQueryBatch_TopOfPipe(FRHICommandListBase& RHICmdList, ERenderQueryType QueryType)
{
const uint32 GPUIndex = 0;
auto& QueryBatchData = RHICmdList.GetQueryBatchData(QueryType);
checkf(QueryBatchData[GPUIndex], TEXT("A query batch for this type is not open on this command list."));
FVulkanSyncPointRef SyncPoint = static_cast<FVulkanSyncPoint*>(QueryBatchData[GPUIndex]);
// Clear the sync point reference on the RHI command list
SyncPoint->Release();
QueryBatchData[GPUIndex] = nullptr;
RHICmdList.EnqueueLambda([SyncPoint = MoveTemp(SyncPoint)](FRHICommandListBase& ExecutingCmdList)
{
FVulkanCommandListContext& Context = FVulkanCommandListContext::Get(ExecutingCmdList);
Context.AddPendingSyncPoint(SyncPoint);
});
}
void FVulkanCommandListContext::RHIBeginRenderQuery(FRHIRenderQuery* QueryRHI)
{
FVulkanRenderQuery* Query = ResourceCast(QueryRHI);
if (Query->QueryType == RQT_Occlusion)
{
FVulkanQueryPool* CurrentOcclusionQueryPool = GetCurrentOcclusionQueryPool();
CurrentOcclusionQueryPool->ReserveQuery(Query);
VulkanRHI::vkCmdBeginQuery(GetCommandBuffer().GetHandle(), CurrentOcclusionQueryPool->GetHandle(), Query->IndexInPool, VK_QUERY_CONTROL_PRECISE_BIT);
}
else if (Query->QueryType == RQT_AbsoluteTime)
{
ensureMsgf(0, TEXT("Timing queries should NOT call RHIBeginRenderQuery()!"));
}
}
void FVulkanCommandListContext::RHIEndRenderQuery(FRHIRenderQuery* QueryRHI)
{
FVulkanRenderQuery* Query = ResourceCast(QueryRHI);
check(Query->SyncPoint.IsValid());
if (Query->QueryType == RQT_Occlusion)
{
FVulkanQueryPool* CurrentPool = GetCurrentOcclusionQueryPool();
VulkanRHI::vkCmdEndQuery(GetCommandBuffer().GetHandle(), CurrentPool->GetHandle(), Query->IndexInPool);
}
else if (Query->QueryType == RQT_AbsoluteTime)
{
FVulkanQueryPool* CurrentPool = GetCurrentTimestampQueryPool();
CurrentPool->ReserveQuery(Query);
const VkPipelineStageFlagBits QueryPipelineStage = GTimestampQueryStage ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VulkanRHI::vkCmdWriteTimestamp(GetCommandBuffer().GetHandle(), QueryPipelineStage, CurrentPool->GetHandle(), Query->IndexInPool);
}
}
#if (RHI_NEW_GPU_PROFILER == 0)
void FVulkanCommandListContext::RHICalibrateTimers(FRHITimestampCalibrationQuery* CalibrationQuery)
{
if (Device.GetOptionalExtensions().HasEXTCalibratedTimestamps)
{
FGPUTimingCalibrationTimestamp CalibrationTimestamp = Device.GetCalibrationTimestamp();
CalibrationQuery->CPUMicroseconds[0] = CalibrationTimestamp.CPUMicroseconds;
CalibrationQuery->GPUMicroseconds[0] = CalibrationTimestamp.GPUMicroseconds;
}
}
#endif
FVulkanQueryPool* FVulkanCommandListContext::GetCurrentOcclusionQueryPool()
{
TArray<FVulkanQueryPool*>& OcclusionPoolArray = GetQueryPoolArray(EVulkanQueryPoolType::Occlusion);
checkSlow(OcclusionPoolArray.Num() && !OcclusionPoolArray.Last()->IsFull());
return OcclusionPoolArray.Last();
}
FVulkanQueryPool* FVulkanContextCommon::GetCurrentTimestampQueryPool(FVulkanPayload& Payload)
{
TArray<FVulkanQueryPool*>& TimestampPoolArray = Payload.QueryPools[(int32)EVulkanQueryPoolType::Timestamp];
if ((TimestampPoolArray.Num() == 0) || TimestampPoolArray.Last()->IsFull())
{
FVulkanQueryPool* NewPool = Device.AcquireTimingQueryPool();
if (Payload.CommandBuffers.Num() == 0)
{
PrepareNewCommandBuffer(Payload);
}
NewPool->Reset(*Payload.CommandBuffers.Last());
TimestampPoolArray.Add(NewPool);
}
return TimestampPoolArray.Last();
}