// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= OpenGLQuery.cpp: OpenGL query RHI implementation. =============================================================================*/ #include "CoreMinimal.h" #include "Stats/Stats.h" #include "OpenGLDrv.h" #include "OpenGLDrvPrivate.h" #include "RenderCore.h" FOpenGLRenderQuery::FActiveQueries FOpenGLRenderQuery::ActiveQueries; FOpenGLRenderQuery::FQueryPool FOpenGLRenderQuery::PooledQueries; FOpenGLRenderQuery::~FOpenGLRenderQuery() { VERIFY_GL_SCOPE(); ReleaseGlQuery(); } void FOpenGLRenderQuery::Link() { // The renderer might re-use a query without reading its results back first. // Ensure this query is unlinked, so it can be re-linked at the end of the list. Unlink(); if (!ActiveQueries.First) { check(!ActiveQueries.Last); check(Next == nullptr); ActiveQueries.First = this; Prev = &ActiveQueries.First; } else { check(ActiveQueries.Last); check(ActiveQueries.Last->Next == nullptr); ActiveQueries.Last->Next = this; Prev = &ActiveQueries.Last->Next; } ActiveQueries.Last = this; } void FOpenGLRenderQuery::Unlink() { if (!IsLinked()) return; if (ActiveQueries.Last == this) { // This is the last node in the list, so the "ActiveQueries.Last" pointer needs fixing up. if (Prev == &ActiveQueries.First) { // This is also the first node in the list, meaning there's only 1 node total. // Just clear the "ActiveQueries.Last" pointer. ActiveQueries.Last = nullptr; } else { // // There's at least one real node before us. // // "Prev" points to the "Next" member field of the previous node. // Subtract the "Next" field offset to get the actual previous node address. // ActiveQueries.Last = reinterpret_cast(reinterpret_cast(Prev) - offsetof(FOpenGLRenderQuery, Next)); } } if (Next) { Next->Prev = Prev; } if (Prev) { *Prev = Next; } Next = nullptr; Prev = nullptr; } void FOpenGLRenderQuery::AcquireGlQuery() { VERIFY_GL_SCOPE(); if (Resource != 0) { // Already acquired return; } while (ActiveQueries.First && ActiveQueries.Count >= GRHIMaximumInFlightQueries) { // We can't start another query until more become available, due to the query count limit. // Block for results on the oldest in-flight queries. ActiveQueries.First->CacheResult(true); } ActiveQueries.Count++; if (PooledQueries[Type].Num()) { Resource = PooledQueries[Type].Pop(); } else { FOpenGL::GenQueries(1, &Resource); } } void FOpenGLRenderQuery::ReleaseGlQuery() { VERIFY_GL_SCOPE(); if (Resource == 0) { // Already released check(!IsLinked()); return; } check(ActiveQueries.Count > 0); ActiveQueries.Count--; PooledQueries[Type].Add(Resource); Resource = 0; Unlink(); } void FOpenGLRenderQuery::Begin() { VERIFY_GL_SCOPE(); check(!IsLinked()); AcquireGlQuery(); check(Resource); switch(Type) { default: checkNoEntry(); break; case EType::Occlusion: FOpenGL::BeginQuery( FOpenGL::SupportsExactOcclusionQueries() ? UGL_SAMPLES_PASSED : UGL_ANY_SAMPLES_PASSED , Resource ); break; #if RHI_NEW_GPU_PROFILER == 0 case EType::Disjoint: FOpenGL::BeginQuery(UGL_TIME_ELAPSED, Resource); break; #endif }; } void FOpenGLRenderQuery::End(uint64* InTarget) { VERIFY_GL_SCOPE(); AcquireGlQuery(); check(Resource); switch (Type) { case EType::Occlusion: check(Resource); FOpenGL::EndQuery(FOpenGL::SupportsExactOcclusionQueries() ? UGL_SAMPLES_PASSED : UGL_ANY_SAMPLES_PASSED ); break; case EType::Timestamp: FOpenGL::QueryTimestampCounter(Resource); break; #if RHI_NEW_GPU_PROFILER case EType::Profiler: FOpenGL::QueryTimestampCounter(Resource); break; #else case EType::Disjoint: FOpenGL::EndQuery(UGL_TIME_ELAPSED); break; #endif } BOPCounter++; Target = InTarget; Link(); } bool FOpenGLRenderQuery::IsCached() { return BOPCounter == LastCachedBOPCounter.load(std::memory_order_relaxed); } bool FOpenGLRenderQuery::CacheResult(bool bWait) { VERIFY_GL_SCOPE(); if (IsCached()) { // Value has been cached and no newer query operation has started. check(!IsLinked()); return true; } check(Resource); if (!bWait) { // If we don't want to wait, we need to check if the result is available first. GLuint IsAvailable = GL_FALSE; FOpenGL::GetQueryObject(Resource, FOpenGL::QM_ResultAvailable, &IsAvailable); if (IsAvailable == GL_FALSE) { // Not ready yet. return false; } } // Read the result back (and block if its not ready) switch (Type) { default: checkNoEntry(); break; case EType::Occlusion: { GLuint Result32 = 0; FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Result32); SetResult(Result32 * (FOpenGL::SupportsExactOcclusionQueries() ? 1 : 500000)); // half a mega pixel display } break; case EType::Timestamp: { GLuint64 Value = 0; FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value); // Convert to microseconds (GL queries are in nanoseconds) SetResult(Value / 1000); } break; #if RHI_NEW_GPU_PROFILER case EType::Profiler: { FOpenGLDynamicRHI& RHI = FOpenGLDynamicRHI::Get(); // TimerQueryDisjoint is a one-shot state in the driver, it is not pipelined. // If it returns true, all timers we've submitted after this timer but haven't // yet resolved should be discarded for having invalid data. if (FOpenGL::TimerQueryDisjoint()) { for (FOpenGLRenderQuery* Other = this; Other; Other = Other->Next) { if (Other->Type == EType::Profiler) { Other->SetResult(RHI.Profiler.ResolveQuery(0, Other->Target, true)); // Return the query to the pool RHI.Profiler.QueryPool.Push(Other); } } } else { GLuint64 Value; FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value); SetResult(RHI.Profiler.ResolveQuery(Value, Target, false)); // Return the query to the pool RHI.Profiler.QueryPool.Push(this); } } break; #else case EType::Disjoint: { // TimerQueryDisjoint is a one-shot state in the driver, it is not pipelined. // If it returns true, all timers we've submitted after this timer but haven't // yet resolved should be discarded for having invalid data. if (FOpenGL::TimerQueryDisjoint()) { for (FOpenGLRenderQuery* Other = this; Other; Other = Other->Next) { if (Other->Type == EType::Disjoint) { Other->SetResult(InvalidDisjointMask); } } } else { GLuint64 Value; FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value); // Convert to microseconds (GL queries are in nanoseconds) SetResult(Value / 1000); } } break; #endif } return true; } void FOpenGLRenderQuery::SetResult(uint64 Value) { if (Target) { *Target = Value; Target = nullptr; } Result = Value; ReleaseGlQuery(); LastCachedBOPCounter.store(BOPCounter, std::memory_order_release); } bool FOpenGLRenderQuery_RHI::GetResult(bool bWait, uint64& OutResult) { if (TOPCounter == LastCachedBOPCounter.load(std::memory_order_acquire)) { // Early return for queries we already have the result for. check(!IsLinked()); OutResult = FOpenGLRenderQuery::GetResult(); return true; } if (!bWait) { // // The query has not yet completed, and we don't want to wait for the query result. // Return. The RHI thread will poll for results later. // OutResult = 0; return false; } FRHICommandListImmediate& RHICmdList = FRHICommandListImmediate::Get(); // // The query has not yet completed, and we want to wait for results. // Append an RHI thread command that will force a readback of the GL query, then flush the RHI thread. // RHICmdList.EnqueueLambda([this](FRHICommandListImmediate&) { // Fetch the query result if it hasn't arrived yet... CacheResult(true); }); // Wait for the above lambda to execute RHICmdList.ImmediateFlush(EImmediateFlushType::FlushRHIThread); checkf(TOPCounter == LastCachedBOPCounter, TEXT("Attempting to get data from an RHI render query which was never issued.")); check(!IsLinked()); OutResult = FOpenGLRenderQuery::GetResult(); return true; } bool FOpenGLRenderQuery::PollQueryResults(FOpenGLRenderQuery* TargetQuery) { if (!PlatformOpenGLThreadHasRenderingContext()) { // Don't poll queries if this thread doesn't own the GL context. return false; } if (TargetQuery && TargetQuery->IsCached()) { return true; } if (ActiveQueries.First) { TRACE_CPUPROFILER_EVENT_SCOPE(PollQueryResults); do { FOpenGLRenderQuery* Current = ActiveQueries.First; if (!Current->CacheResult(/*bWait = */ false)) { // Not complete yet return false; } if (Current == TargetQuery) { return true; } } while (ActiveQueries.First); } return TargetQuery == nullptr; } void FOpenGLRenderQuery::Cleanup() { VERIFY_GL_SCOPE(); check(ActiveQueries.Count == 0); for (auto& Array : PooledQueries) { for (GLuint Resource : Array) { FOpenGL::DeleteQueries(1, &Resource); } Array.Reset(); } } FRenderQueryRHIRef FOpenGLDynamicRHI::RHICreateRenderQuery(ERenderQueryType QueryType) { check(QueryType == RQT_Occlusion || QueryType == RQT_AbsoluteTime); if (QueryType == RQT_AbsoluteTime && FOpenGL::SupportsTimestampQueries() == false) { return nullptr; } return new FOpenGLRenderQuery_RHI(QueryType); } void FOpenGLDynamicRHI::RHIBeginRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery) { if (!RenderQuery) return; FDynamicRHI::RHIBeginRenderQuery_TopOfPipe(RHICmdList, RenderQuery); } void FOpenGLDynamicRHI::RHIEndRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery) { if (!RenderQuery) return; ResourceCast(RenderQuery)->End_TopOfPipe(); FDynamicRHI::RHIEndRenderQuery_TopOfPipe(RHICmdList, RenderQuery); } void FOpenGLDynamicRHI::RHIBeginRenderQuery(FRHIRenderQuery* RenderQuery) { ResourceCast(RenderQuery)->Begin(); } void FOpenGLDynamicRHI::RHIEndRenderQuery(FRHIRenderQuery* RenderQuery) { FOpenGLRenderQuery_RHI* Query = ResourceCast(RenderQuery); Query->End(); } bool FOpenGLDynamicRHI::RHIGetRenderQueryResult(FRHIRenderQuery* QueryRHI, uint64& OutResult, bool bWait, uint32 GPUIndex) { if (!QueryRHI) { OutResult = 0; return true; } FOpenGLRenderQuery_RHI* Query = ResourceCast(QueryRHI); return Query->GetResult(bWait, OutResult); } void FOpenGLEventQuery::IssueEvent() { VERIFY_GL_SCOPE(); if(Sync) { FOpenGL::DeleteSync(Sync); Sync = UGLsync(); } Sync = FOpenGL::FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); FOpenGL::Flush(); checkSlow(FOpenGL::IsSync(Sync)); } void FOpenGLEventQuery::WaitForCompletion() { VERIFY_GL_SCOPE(); QUICK_SCOPE_CYCLE_COUNTER(STAT_FOpenGLEventQuery_WaitForCompletion); checkSlow(FOpenGL::IsSync(Sync)); // Wait up to 1/2 second for sync execution FOpenGL::EFenceResult Status = FOpenGL::ClientWaitSync( Sync, 0, 500*1000*1000); switch (Status) { case FOpenGL::FR_AlreadySignaled: case FOpenGL::FR_ConditionSatisfied: break; case FOpenGL::FR_TimeoutExpired: UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms)")); break; case FOpenGL::FR_WaitFailed: UE_LOG(LogRHI, Log, TEXT("Wait on GPU failed in driver")); break; default: UE_LOG(LogRHI, Log, TEXT("Unknown error while waiting on GPU")); check(0); break; } } FOpenGLEventQuery::FOpenGLEventQuery() { VERIFY_GL_SCOPE(); // Initialize the query by issuing an initial event. IssueEvent(); check(FOpenGL::IsSync(Sync)); } FOpenGLEventQuery::~FOpenGLEventQuery() { VERIFY_GL_SCOPE(); FOpenGL::DeleteSync(Sync); } /*============================================================================= * class FOpenGLBufferedGPUTiming *=============================================================================*/ #if (RHI_NEW_GPU_PROFILER == 0) /** * Constructor. * * @param InOpenGLRHI RHI interface * @param InBufferSize Number of buffered measurements */ FOpenGLBufferedGPUTiming::FOpenGLBufferedGPUTiming(int32 InBufferSize) : BufferSize(InBufferSize) { } /** * Initializes the static variables, if necessary. */ void FOpenGLBufferedGPUTiming::PlatformStaticInitialize(void* UserData) { // Are the static variables initialized? if ( !GAreGlobalsInitialized ) { GIsSupported = FOpenGL::SupportsTimestampQueries(); SetTimingFrequency(1000 * 1000 * 1000); GAreGlobalsInitialized = true; } } /** * Initializes all OpenGL resources and if necessary, the static variables. */ static TArray TimerQueryPool; static FOpenGLRenderQuery* GetTimeQuery() { if (TimerQueryPool.Num()) { return TimerQueryPool.Pop(); } return new FOpenGLRenderQuery(FOpenGLRenderQuery::EType::Timestamp); } void FOpenGLBufferedGPUTiming::InitResources() { StaticInitialize(nullptr, PlatformStaticInitialize); CurrentTimestamp = 0; NumIssuedTimestamps = 0; bIsTiming = false; GIsSupported = FOpenGL::SupportsTimestampQueries(); if ( GIsSupported ) { StartTimestamps.Reserve(BufferSize); EndTimestamps.Reserve(BufferSize); for(int32 BufferIndex = 0; BufferIndex < BufferSize; ++BufferIndex) { StartTimestamps.Add(GetTimeQuery()); EndTimestamps.Add(GetTimeQuery()); } } } /** * Releases all OpenGL resources. */ void FOpenGLBufferedGPUTiming::ReleaseResources() { VERIFY_GL_SCOPE(); for (FOpenGLRenderQuery* Query : StartTimestamps) { TimerQueryPool.Add(Query); } for (FOpenGLRenderQuery* Query : EndTimestamps) { TimerQueryPool.Add(Query); } StartTimestamps.Reset(); EndTimestamps.Reset(); } /** * Start a GPU timing measurement. */ void FOpenGLBufferedGPUTiming::StartTiming() { VERIFY_GL_SCOPE(); // Issue a timestamp query for the 'start' time. if ( GIsSupported && !bIsTiming ) { int32 NewTimestampIndex = (CurrentTimestamp + 1) % BufferSize; StartTimestamps[NewTimestampIndex]->End(); CurrentTimestamp = NewTimestampIndex; bIsTiming = true; } } /** * End a GPU timing measurement. * The timing for this particular measurement will be resolved at a later time by the GPU. */ void FOpenGLBufferedGPUTiming::EndTiming() { VERIFY_GL_SCOPE(); // Issue a timestamp query for the 'end' time. if ( GIsSupported && bIsTiming ) { checkSlow( CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize ); EndTimestamps[CurrentTimestamp]->End(); NumIssuedTimestamps = FMath::Min(NumIssuedTimestamps + 1, BufferSize); bIsTiming = false; } } /** * Retrieves the most recently resolved timing measurement. * The unit is the same as for FPlatformTime::Cycles(). Returns 0 if there are no resolved measurements. * * @return Value of the most recently resolved timing, or 0 if no measurements have been resolved by the GPU yet. */ uint64 FOpenGLBufferedGPUTiming::GetTiming(bool bGetCurrentResultsAndBlock) { VERIFY_GL_SCOPE(); if (GIsSupported) { checkSlow(CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize); int32 TimestampIndex = CurrentTimestamp; if (!bGetCurrentResultsAndBlock) { // Quickly check the most recent measurements to see if any of them has been resolved. Do not flush these queries. for (int32 IssueIndex = 1; IssueIndex < NumIssuedTimestamps; ++IssueIndex) { FOpenGLRenderQuery* StartQuery = StartTimestamps[TimestampIndex]; FOpenGLRenderQuery* EndQuery = EndTimestamps[TimestampIndex]; if (StartQuery->CacheResult(false) && EndQuery->CacheResult(false)) { uint64 StartTime = StartQuery->GetResult(); uint64 EndTime = EndQuery->GetResult(); if (EndTime > StartTime) { return EndTime - StartTime; } } TimestampIndex = (TimestampIndex + BufferSize - 1) % BufferSize; } } if (NumIssuedTimestamps > 0 || bGetCurrentResultsAndBlock) { // None of the (NumIssuedTimestamps - 1) measurements were ready yet, // so check the oldest measurement more thoroughly. // This really only happens if occlusion and frame sync event queries are disabled, otherwise those will block until the GPU catches up to 1 frame behind const bool bBlocking = ( NumIssuedTimestamps == BufferSize ) || bGetCurrentResultsAndBlock; FOpenGLRenderQuery* StartQuery = StartTimestamps[TimestampIndex]; FOpenGLRenderQuery* EndQuery = EndTimestamps[TimestampIndex]; bool bHasStart = false, bHasEnd = false; { FRenderThreadIdleScope IdleScope(ERenderThreadIdleTypes::WaitingForGPUQuery); SCOPE_CYCLE_COUNTER(STAT_RenderQueryResultTime); double StartTimeoutTime = FPlatformTime::Seconds(); // If we are blocking, retry until the GPU processes the time stamp command while (true) { bHasStart = StartQuery->CacheResult(false); bHasEnd = EndQuery->CacheResult(false); if (bBlocking && !(bHasStart && bHasEnd)) { if ((FPlatformTime::Seconds() - StartTimeoutTime) > 0.5) { UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms)")); return 0; } } else { break; } } } if (bHasStart && bHasEnd) { uint64 StartTime = StartQuery->GetResult(); uint64 EndTime = EndQuery->GetResult(); if (EndTime > StartTime) { return EndTime - StartTime; } } } } return 0; } void FOpenGLDisjointTimeStampQuery::StartTracking() { VERIFY_GL_SCOPE(); if (IsSupported()) { DisjointQuery->Begin(); } } void FOpenGLDisjointTimeStampQuery::EndTracking() { VERIFY_GL_SCOPE(); if (IsSupported()) { DisjointQuery->End(); } } bool FOpenGLDisjointTimeStampQuery::IsResultValid() { checkSlow(IsSupported()); return bIsResultValid; } bool FOpenGLDisjointTimeStampQuery::GetResult(uint64* OutResult) { VERIFY_GL_SCOPE(); if (IsSupported()) { DisjointQuery->CacheResult(true); uint64 Result = DisjointQuery->GetResult(); bIsResultValid = (Result & FOpenGLRenderQuery::InvalidDisjointMask) == 0; *OutResult = Result & (~FOpenGLRenderQuery::InvalidDisjointMask); } return bIsResultValid; } #endif // (RHI_NEW_GPU_PROFILER == 0) TQueue FOpenGLGPUFence::ActiveSyncs; FOpenGLGPUFence::FOpenGLGPUFence(FName InName) : FRHIGPUFence(InName) { } void FOpenGLGPUFence::Clear() { Event = nullptr; } bool FOpenGLGPUFence::Poll() const { return Event && Event->IsComplete(); } void FOpenGLGPUFence::Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const { if (Event && !Event->IsComplete()) { // // The fence might get signalled by an earlier RHI command polling them, but we can't be sure that will happen. // The GPU might finish work after the RHI thread has gone idle, and then we'll never see the fence complete. // // Enqueue a command here that will block and wait for the fence if it still hasn't signalled by the time // the RHI thread is done with all prior commands. // RHICmdList.EnqueueLambda([Event = Event](FRHICommandListImmediate&) { if (!Event->IsComplete()) { PollFencesUntil(Event); } }); RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); Event->Wait(); } } void FOpenGLGPUFence::PollFencesUntil(FGraphEvent* Target) { if (!PlatformOpenGLThreadHasRenderingContext() && !Target) { // Don't poll fences if this thread doesn't own the GL context. return; } VERIFY_GL_SCOPE(); while (FGLSync* Sync = ActiveSyncs.Peek()) { bool const bTarget = Sync->Event == Target; // Wait forever if this fence is the one we're looking for specifically, otherwise just poll. uint64 const Timeout = bTarget ? 0xffffffff'ffffffff : 0; switch (FOpenGL::ClientWaitSync(Sync->GLSync, 0, Timeout)) { case FOpenGL::FR_AlreadySignaled: case FOpenGL::FR_ConditionSatisfied: break; // Fence completed case FOpenGL::FR_TimeoutExpired: return; // Fence is not done default: checkNoEntry(); [[fallthrough]]; case FOpenGL::FR_WaitFailed: // Some error state UE_LOG(LogOpenGL, Fatal, TEXT("Waiting on FGLSync fence 0x%p failed."), Sync); return; } // The fence has completed. Signal the graph event and remove the node. Sync->Event->DispatchSubsequents(); FOpenGL::DeleteSync(Sync->GLSync); ActiveSyncs.Pop(); if (bTarget) { // We found the fence we wanted. Stop polling. return; } } checkf(!Target, TEXT("Attempt to poll for a specific fence, but it was not found in the queue.")); } void FOpenGLGPUFence::WriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList) { Event = FGraphEvent::CreateGraphEvent(); RHICmdList.EnqueueLambda([Event = Event](FRHICommandListBase&) mutable { VERIFY_GL_SCOPE(); UGLsync Fence = FOpenGL::FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); ActiveSyncs.Enqueue(FGLSync(MoveTemp(Event), Fence)); }); } void FOpenGLDynamicRHI::RHIWriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIGPUFence* FenceRHI) { ResourceCast(FenceRHI)->WriteGPUFence_TopOfPipe(RHICmdList); } void FOpenGLDynamicRHI::RHIWriteGPUFence(FRHIGPUFence* FenceRHI) { checkNoEntry(); // Should never be called } FGPUFenceRHIRef FOpenGLDynamicRHI::RHICreateGPUFence(const FName& Name) { return new FOpenGLGPUFence(Name); }