// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= VulkanQuery.cpp: Vulkan query RHI implementation. =============================================================================*/ #include "VulkanRHIPrivate.h" #include "VulkanDevice.h" #include "VulkanResources.h" #include "VulkanContext.h" #include "VulkanCommandBuffer.h" #include "VulkanQuery.h" #include "EngineGlobals.h" #include "RenderCore.h" #if VULKAN_QUERY_CALLSTACK #include "HAL/PlatformStackwalk.h" #endif static int32 GTimestampQueryStage = 0; FAutoConsoleVariableRef CVarTimestampQueryStage( TEXT("r.Vulkan.TimestampQueryStage"), GTimestampQueryStage, TEXT("Defines which pipeline stage is used for timestamp queries.\n") TEXT(" 0: Use VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, less precise measures but less likely to alter performance (default)\n") TEXT(" 1: Use VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, more precise measures but may alter performance on some platforms"), ECVF_RenderThreadSafe ); static int32 GTimingQueryPoolSize = 128; FAutoConsoleVariableRef CVarTimingQueryPoolSize( TEXT("r.Vulkan.TimingQueryPoolSize"), GTimingQueryPoolSize, TEXT("Amount of timing queries per pool in reusable query pools. (Default: 128)\n"), ECVF_ReadOnly ); static int32 GQueryPoolDeletionDelay = 10; FAutoConsoleVariableRef CVarQueryPoolDeletionDelay( TEXT("r.Vulkan.QueryPoolDeletionDelay"), GQueryPoolDeletionDelay, TEXT("Amount of frames to wait before deleting an unused query pools. (Default: 10)\n"), ECVF_ReadOnly ); FVulkanQueryPool::FVulkanQueryPool(FVulkanDevice& InDevice, uint32 InMaxQueries, EVulkanQueryPoolType InQueryType) : Device(InDevice) , QueryPool(VK_NULL_HANDLE) , MaxQueries(InMaxQueries) , QueryType(InQueryType) { INC_DWORD_STAT(STAT_VulkanNumQueryPools); VkQueryPoolCreateInfo PoolCreateInfo; ZeroVulkanStruct(PoolCreateInfo, VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); PoolCreateInfo.queryType = GetVkQueryType(InQueryType); PoolCreateInfo.queryCount = MaxQueries; VERIFYVULKANRESULT(VulkanRHI::vkCreateQueryPool(Device.GetInstanceHandle(), &PoolCreateInfo, VULKAN_CPU_ALLOCATOR, &QueryPool)); } FVulkanQueryPool::~FVulkanQueryPool() { DEC_DWORD_STAT(STAT_VulkanNumQueryPools); VulkanRHI::vkDestroyQueryPool(Device.GetInstanceHandle(), QueryPool, VULKAN_CPU_ALLOCATOR); QueryPool = VK_NULL_HANDLE; } void FVulkanQueryPool::ReserveQuery(FVulkanRenderQuery* Query) { checkSlow(QueryType == Query->GetQueryPoolType()); Query->IndexInPool = CurrentQueryCount++; QueryRefs[Query->IndexInPool] = Query; QueryResults[Query->IndexInPool] = &Query->Result; } uint32 FVulkanQueryPool::ReserveQuery(uint64* ResultPtr) { checkSlow(QueryType == EVulkanQueryPoolType::Timestamp); const uint32 IndexInPool = CurrentQueryCount++; QueryRefs[IndexInPool] = nullptr; QueryResults[IndexInPool] = ResultPtr; return IndexInPool; } void FVulkanQueryPool::Reset(FVulkanCommandBuffer& InCmdBuffer) { CurrentQueryCount = 0; UnusedFrameCount = 0; QueryRefs.Empty(MaxQueries); QueryResults.Empty(MaxQueries); QueryRefs.AddDefaulted(MaxQueries); QueryResults.SetNumZeroed(MaxQueries); if (Device.GetOptionalExtensions().HasEXTHostQueryReset) { VulkanRHI::vkResetQueryPoolEXT(GetDevice().GetInstanceHandle(), QueryPool, 0, MaxQueries); } else { VulkanRHI::vkCmdResetQueryPool(InCmdBuffer.GetHandle(), QueryPool, 0, MaxQueries); } } bool FVulkanQueryPool::IsStale() const { return UnusedFrameCount >= GQueryPoolDeletionDelay; } void FVulkanCommandListContext::BeginOcclusionQueryBatch(uint32 NumQueriesInBatch) { TArray& OcclusionPoolArray = GetQueryPoolArray(EVulkanQueryPoolType::Occlusion); FVulkanQueryPool* NewOcclusionQueryPool = Device.AcquireOcclusionQueryPool(NumQueriesInBatch); NewOcclusionQueryPool->Reset(GetCommandBuffer()); OcclusionPoolArray.Add(NewOcclusionQueryPool); } FVulkanQueryPool* FVulkanDevice::AcquireOcclusionQueryPool(uint32 NumQueries) { FScopeLock Lock(&QueryPoolLock); // At least add one query NumQueries = FMath::Max(1u, AlignArbitrary(NumQueries, 256)); OcclusionQueryPoolSize = FMath::Max(OcclusionQueryPoolSize, NumQueries); TArray& FreeOcclusionPools = FreeQueryPools[(int32)EVulkanQueryPoolType::Occlusion]; // Destroy pools that can't accomodate our new minimum size for (int32 Index = FreeOcclusionPools.Num() - 1; Index >= 0; --Index) { FVulkanQueryPool* Pool = FreeOcclusionPools[Index]; checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Occlusion)); if (Pool->GetMaxQueries() < OcclusionQueryPoolSize) { delete Pool; FreeOcclusionPools.RemoveAtSwap(Index, EAllowShrinking::No); } } if (FreeOcclusionPools.Num()) { FVulkanQueryPool* Pool = FreeOcclusionPools.Pop(EAllowShrinking::No); checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Occlusion)); return Pool; } FVulkanQueryPool* Pool = new FVulkanQueryPool(*this, OcclusionQueryPoolSize, EVulkanQueryPoolType::Occlusion); return Pool; } FVulkanQueryPool* FVulkanDevice::AcquireTimingQueryPool() { FScopeLock Lock(&QueryPoolLock); TArray& FreeTimingPools = FreeQueryPools[(int32)EVulkanQueryPoolType::Timestamp]; if (FreeTimingPools.Num()) { FVulkanQueryPool* Pool = FreeTimingPools.Pop(EAllowShrinking::No); checkSlow(Pool && (Pool->GetPoolType() == EVulkanQueryPoolType::Timestamp)); return Pool; } return new FVulkanQueryPool(*this, GTimingQueryPoolSize, EVulkanQueryPoolType::Timestamp); } void FVulkanDevice::ReleaseQueryPool(FVulkanQueryPool* Pool) { FScopeLock Lock(&QueryPoolLock); FreeQueryPools[(int32)Pool->GetPoolType()].Add(Pool); } void FVulkanDevice::RemoveStaleQueryPools() { FScopeLock Lock(&QueryPoolLock); for (TArray& PoolArray : FreeQueryPools) { for (int32 Index = PoolArray.Num() - 1; Index >= 0; --Index) { FVulkanQueryPool* Pool = PoolArray[Index]; checkSlow(Pool); if (Pool->IsStale()) { delete Pool; PoolArray.RemoveAtSwap(Index, EAllowShrinking::No); } else { Pool->IncrementUnusedFrameCount(); } } } } FVulkanRenderQuery::FVulkanRenderQuery(ERenderQueryType InType) : QueryType(InType) { INC_DWORD_STAT(STAT_VulkanNumQueries); } FVulkanRenderQuery::~FVulkanRenderQuery() { check(!SyncPoint.IsValid() || SyncPoint->IsComplete()); DEC_DWORD_STAT(STAT_VulkanNumQueries); } FRenderQueryRHIRef FVulkanDynamicRHI::RHICreateRenderQuery(ERenderQueryType QueryType) { ensureMsgf((QueryType == RQT_Occlusion) || (QueryType == RQT_AbsoluteTime), TEXT("Unknown QueryType %d"), QueryType); return new FVulkanRenderQuery(QueryType); } bool FVulkanDynamicRHI::RHIGetRenderQueryResult(FRHIRenderQuery* QueryRHI, uint64& OutQueryResult, bool bWait, uint32 GPUIndex) { FVulkanRenderQuery* Query = ResourceCast(QueryRHI); if (!ensureMsgf(Query->SyncPoint, TEXT("Attempt to get result data for an FRHIRenderQuery that was never used in a command list."))) { OutQueryResult = 0; return false; } if (!Query->SyncPoint->IsComplete()) { if (bWait) { FRenderThreadIdleScope IdleScope(ERenderThreadIdleTypes::WaitingForGPUQuery); ProcessInterruptQueueUntil(Query->SyncPoint); } else { return false; } } checkSlow(Query->SyncPoint->IsComplete()); if (Query->QueryType == RQT_Occlusion) { OutQueryResult = Query->Result; return true; } else if (Query->QueryType == RQT_AbsoluteTime) { const VkPhysicalDeviceLimits& Limits = Device->GetDeviceProperties().limits; const double TimingFrequency = (double)((uint64)((1000.0 * 1000.0 * 1000.0) / Limits.timestampPeriod)); OutQueryResult = (uint64)((double(Query->Result) / TimingFrequency) * 1000.0 * 1000.0); return true; } return false; } void FVulkanDynamicRHI::RHIEndRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery) { const uint32 GPUIndex = 0; FVulkanRenderQuery* Query = ResourceCast(RenderQuery); auto& QueryBatchData = RHICmdList.GetQueryBatchData(Query->QueryType); if (QueryBatchData[GPUIndex]) { // This query belongs to a batch. Use the sync point we created earlier Query->SyncPoint = static_cast(QueryBatchData[GPUIndex]); } else { // Queries issued outside of a batch use one sync point per query. // check(Query->SyncPoint == nullptr); Query->SyncPoint = CreateVulkanSyncPoint(); RHICmdList.EnqueueLambda([SyncPoint = Query->SyncPoint](FRHICommandListBase& ExecutingCmdList) { FVulkanCommandListContext& Context = FVulkanCommandListContext::Get(ExecutingCmdList); Context.AddPendingSyncPoint(SyncPoint); }); } // Enqueue the RHI command to record the EndQuery() call on the context. FDynamicRHI::RHIEndRenderQuery_TopOfPipe(RHICmdList, RenderQuery); } void FVulkanDynamicRHI::RHIBeginRenderQueryBatch_TopOfPipe(FRHICommandListBase& RHICmdList, ERenderQueryType QueryType) { // Each query batch uses a single sync point to signal when the results are ready. const uint32 GPUIndex = 0; auto& QueryBatchData = RHICmdList.GetQueryBatchData(QueryType); checkf(QueryBatchData[GPUIndex] == nullptr, TEXT("A query batch for this type has already begun on this command list.")); FVulkanSyncPointRef SyncPoint = CreateVulkanSyncPoint(); // Keep a reference in the RHI command list, so we can retrieve it later in BeginQuery/EndQuery/EndBatch. QueryBatchData[GPUIndex] = SyncPoint.GetReference(); SyncPoint->AddRef(); } void FVulkanDynamicRHI::RHIEndRenderQueryBatch_TopOfPipe(FRHICommandListBase& RHICmdList, ERenderQueryType QueryType) { const uint32 GPUIndex = 0; auto& QueryBatchData = RHICmdList.GetQueryBatchData(QueryType); checkf(QueryBatchData[GPUIndex], TEXT("A query batch for this type is not open on this command list.")); FVulkanSyncPointRef SyncPoint = static_cast(QueryBatchData[GPUIndex]); // Clear the sync point reference on the RHI command list SyncPoint->Release(); QueryBatchData[GPUIndex] = nullptr; RHICmdList.EnqueueLambda([SyncPoint = MoveTemp(SyncPoint)](FRHICommandListBase& ExecutingCmdList) { FVulkanCommandListContext& Context = FVulkanCommandListContext::Get(ExecutingCmdList); Context.AddPendingSyncPoint(SyncPoint); }); } void FVulkanCommandListContext::RHIBeginRenderQuery(FRHIRenderQuery* QueryRHI) { FVulkanRenderQuery* Query = ResourceCast(QueryRHI); if (Query->QueryType == RQT_Occlusion) { FVulkanQueryPool* CurrentOcclusionQueryPool = GetCurrentOcclusionQueryPool(); CurrentOcclusionQueryPool->ReserveQuery(Query); VulkanRHI::vkCmdBeginQuery(GetCommandBuffer().GetHandle(), CurrentOcclusionQueryPool->GetHandle(), Query->IndexInPool, VK_QUERY_CONTROL_PRECISE_BIT); } else if (Query->QueryType == RQT_AbsoluteTime) { ensureMsgf(0, TEXT("Timing queries should NOT call RHIBeginRenderQuery()!")); } } void FVulkanCommandListContext::RHIEndRenderQuery(FRHIRenderQuery* QueryRHI) { FVulkanRenderQuery* Query = ResourceCast(QueryRHI); check(Query->SyncPoint.IsValid()); if (Query->QueryType == RQT_Occlusion) { FVulkanQueryPool* CurrentPool = GetCurrentOcclusionQueryPool(); VulkanRHI::vkCmdEndQuery(GetCommandBuffer().GetHandle(), CurrentPool->GetHandle(), Query->IndexInPool); } else if (Query->QueryType == RQT_AbsoluteTime) { FVulkanQueryPool* CurrentPool = GetCurrentTimestampQueryPool(); CurrentPool->ReserveQuery(Query); const VkPipelineStageFlagBits QueryPipelineStage = GTimestampQueryStage ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; VulkanRHI::vkCmdWriteTimestamp(GetCommandBuffer().GetHandle(), QueryPipelineStage, CurrentPool->GetHandle(), Query->IndexInPool); } } #if (RHI_NEW_GPU_PROFILER == 0) void FVulkanCommandListContext::RHICalibrateTimers(FRHITimestampCalibrationQuery* CalibrationQuery) { if (Device.GetOptionalExtensions().HasEXTCalibratedTimestamps) { FGPUTimingCalibrationTimestamp CalibrationTimestamp = Device.GetCalibrationTimestamp(); CalibrationQuery->CPUMicroseconds[0] = CalibrationTimestamp.CPUMicroseconds; CalibrationQuery->GPUMicroseconds[0] = CalibrationTimestamp.GPUMicroseconds; } } #endif FVulkanQueryPool* FVulkanCommandListContext::GetCurrentOcclusionQueryPool() { TArray& OcclusionPoolArray = GetQueryPoolArray(EVulkanQueryPoolType::Occlusion); checkSlow(OcclusionPoolArray.Num() && !OcclusionPoolArray.Last()->IsFull()); return OcclusionPoolArray.Last(); } FVulkanQueryPool* FVulkanContextCommon::GetCurrentTimestampQueryPool(FVulkanPayload& Payload) { TArray& TimestampPoolArray = Payload.QueryPools[(int32)EVulkanQueryPoolType::Timestamp]; if ((TimestampPoolArray.Num() == 0) || TimestampPoolArray.Last()->IsFull()) { FVulkanQueryPool* NewPool = Device.AcquireTimingQueryPool(); if (Payload.CommandBuffers.Num() == 0) { PrepareNewCommandBuffer(Payload); } NewPool->Reset(*Payload.CommandBuffers.Last()); TimestampPoolArray.Add(NewPool); } return TimestampPoolArray.Last(); }