UnrealEngine/Engine/Source/Runtime/OpenGLDrv/Private/OpenGLQuery.cpp

// Copyright Epic Games, Inc. All Rights Reserved.

/*=============================================================================
	OpenGLQuery.cpp: OpenGL query RHI implementation.
=============================================================================*/

#include "CoreMinimal.h"
#include "Stats/Stats.h"
#include "OpenGLDrv.h"
#include "OpenGLDrvPrivate.h"
#include "RenderCore.h"

FOpenGLRenderQuery::FActiveQueries FOpenGLRenderQuery::ActiveQueries;
FOpenGLRenderQuery::FQueryPool FOpenGLRenderQuery::PooledQueries;

FOpenGLRenderQuery::~FOpenGLRenderQuery()
{
	VERIFY_GL_SCOPE();
	ReleaseGlQuery();
}

void FOpenGLRenderQuery::Link()
{
	// The renderer might re-use a query without reading its results back first.
	// Ensure this query is unlinked, so it can be re-linked at the end of the list.
	Unlink();

	if (!ActiveQueries.First)
	{
		check(!ActiveQueries.Last);
		check(Next == nullptr);

		ActiveQueries.First = this;
		Prev = &ActiveQueries.First;
	}
	else
	{
		check(ActiveQueries.Last);
		check(ActiveQueries.Last->Next == nullptr);

		ActiveQueries.Last->Next = this;
		Prev = &ActiveQueries.Last->Next;
	}

	ActiveQueries.Last = this;
}

void FOpenGLRenderQuery::Unlink()
{
	if (!IsLinked())
		return;

	if (ActiveQueries.Last == this)
	{
		// This is the last node in the list, so the "ActiveQueries.Last" pointer needs fixing up.
		if (Prev == &ActiveQueries.First)
		{
			// This is also the first node in the list, meaning there's only 1 node total.
			// Just clear the "ActiveQueries.Last" pointer.
			ActiveQueries.Last = nullptr;
		}
		else
		{
			//
			// There's at least one real node before us.
			//
			// "Prev" points to the "Next" member field of the previous node.
			// Subtract the "Next" field offset to get the actual previous node address.
			//
			ActiveQueries.Last = reinterpret_cast<FOpenGLRenderQuery*>(reinterpret_cast<uintptr_t>(Prev) - offsetof(FOpenGLRenderQuery, Next));
		}
	}

	if (Next) { Next->Prev = Prev; }
	if (Prev) { *Prev = Next; }

	Next = nullptr;
	Prev = nullptr;
}

void FOpenGLRenderQuery::AcquireGlQuery()
{
	VERIFY_GL_SCOPE();

	if (Resource != 0)
	{
		// Already acquired
		return;
	}

	while (ActiveQueries.First && ActiveQueries.Count >= GRHIMaximumInFlightQueries)
	{
		// We can't start another query until more become available, due to the query count limit.
		// Block for results on the oldest in-flight queries.
		ActiveQueries.First->CacheResult(true);
	}

	ActiveQueries.Count++;

	if (PooledQueries[Type].Num())
	{
		Resource = PooledQueries[Type].Pop();
	}
	else
	{
		FOpenGL::GenQueries(1, &Resource);
	}
}

void FOpenGLRenderQuery::ReleaseGlQuery()
{
	VERIFY_GL_SCOPE();

	if (Resource == 0)
	{
		// Already released
		check(!IsLinked());
		return;
	}

	check(ActiveQueries.Count > 0);
	ActiveQueries.Count--;

	PooledQueries[Type].Add(Resource);

	Resource = 0;

	Unlink();
}

void FOpenGLRenderQuery::Begin()
{
	VERIFY_GL_SCOPE();

	check(!IsLinked());
	AcquireGlQuery();

	check(Resource);

	switch(Type)
	{
	default:
		checkNoEntry();
		break;

	case EType::Occlusion:
		FOpenGL::BeginQuery(
			FOpenGL::SupportsExactOcclusionQueries()
				? UGL_SAMPLES_PASSED
				: UGL_ANY_SAMPLES_PASSED
			, Resource
		);
		break;

#if RHI_NEW_GPU_PROFILER == 0
	case EType::Disjoint:
		FOpenGL::BeginQuery(UGL_TIME_ELAPSED, Resource);
		break;
#endif
	};
}

void FOpenGLRenderQuery::End(uint64* InTarget)
{
	VERIFY_GL_SCOPE();
	AcquireGlQuery();

	check(Resource);

	switch (Type)
	{
	case EType::Occlusion:
		check(Resource);
		FOpenGL::EndQuery(FOpenGL::SupportsExactOcclusionQueries()
			? UGL_SAMPLES_PASSED
			: UGL_ANY_SAMPLES_PASSED
		);
		break;

	case EType::Timestamp:
		FOpenGL::QueryTimestampCounter(Resource);
		break;

#if RHI_NEW_GPU_PROFILER
	case EType::Profiler:
		FOpenGL::QueryTimestampCounter(Resource);
		break;
#else
	case EType::Disjoint:
		FOpenGL::EndQuery(UGL_TIME_ELAPSED);
		break;
#endif
	}

	BOPCounter++;

	Target = InTarget;

	Link();
}

bool FOpenGLRenderQuery::IsCached()
{
	return BOPCounter == LastCachedBOPCounter.load(std::memory_order_relaxed);
}

bool FOpenGLRenderQuery::CacheResult(bool bWait)
{
	VERIFY_GL_SCOPE();

	if (IsCached())
	{
		// Value has been cached and no newer query operation has started.
		check(!IsLinked());
		return true;
	}

	check(Resource);

	if (!bWait)
	{
		// If we don't want to wait, we need to check if the result is available first.
		GLuint IsAvailable = GL_FALSE;
		FOpenGL::GetQueryObject(Resource, FOpenGL::QM_ResultAvailable, &IsAvailable);

		if (IsAvailable == GL_FALSE)
		{
			// Not ready yet.
			return false;
		}
	}

	// Read the result back (and block if its not ready)
	switch (Type)
	{
	default:
		checkNoEntry();
		break;

	case EType::Occlusion:
		{
			GLuint Result32 = 0;
			FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Result32);
			SetResult(Result32 * (FOpenGL::SupportsExactOcclusionQueries() ? 1 : 500000)); // half a mega pixel display
		}
		break;

	case EType::Timestamp:
		{
			GLuint64 Value = 0;
			FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value);

			// Convert to microseconds (GL queries are in nanoseconds)
			SetResult(Value / 1000);
		}
		break;

#if RHI_NEW_GPU_PROFILER
	case EType::Profiler:
		{
			FOpenGLDynamicRHI& RHI = FOpenGLDynamicRHI::Get();

			// TimerQueryDisjoint is a one-shot state in the driver, it is not pipelined.
			// If it returns true, all timers we've submitted after this timer but haven't
			// yet resolved should be discarded for having invalid data.
			if (FOpenGL::TimerQueryDisjoint())
			{
				for (FOpenGLRenderQuery* Other = this; Other; Other = Other->Next)
				{
					if (Other->Type == EType::Profiler)
					{
						Other->SetResult(RHI.Profiler.ResolveQuery(0, Other->Target, true));

						// Return the query to the pool
						RHI.Profiler.QueryPool.Push(Other);
					}
				}
			}
			else
			{
				GLuint64 Value;
				FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value);
				SetResult(RHI.Profiler.ResolveQuery(Value, Target, false));

				// Return the query to the pool
				RHI.Profiler.QueryPool.Push(this);
			}
		}
		break;
#else
	case EType::Disjoint:
		{
			// TimerQueryDisjoint is a one-shot state in the driver, it is not pipelined.
			// If it returns true, all timers we've submitted after this timer but haven't
			// yet resolved should be discarded for having invalid data.
			if (FOpenGL::TimerQueryDisjoint())
			{
				for (FOpenGLRenderQuery* Other = this; Other; Other = Other->Next)
				{
					if (Other->Type == EType::Disjoint)
					{
						Other->SetResult(InvalidDisjointMask);
					}
				}
			}
			else
			{
				GLuint64 Value;
				FOpenGL::GetQueryObject(Resource, FOpenGL::QM_Result, &Value);

				// Convert to microseconds (GL queries are in nanoseconds)
				SetResult(Value / 1000);
			}
		}
		break;
#endif
	}

	return true;
}

void FOpenGLRenderQuery::SetResult(uint64 Value)
{
	if (Target)
	{
		*Target = Value;
		Target = nullptr;
	}

	Result = Value;
	ReleaseGlQuery();

	LastCachedBOPCounter.store(BOPCounter, std::memory_order_release);
}

bool FOpenGLRenderQuery_RHI::GetResult(bool bWait, uint64& OutResult)
{
	if (TOPCounter == LastCachedBOPCounter.load(std::memory_order_acquire))
	{
		// Early return for queries we already have the result for.
		check(!IsLinked());
		OutResult = FOpenGLRenderQuery::GetResult();
		return true;
	}

	if (!bWait)
	{
		//
		// The query has not yet completed, and we don't want to wait for the query result.
		// Return. The RHI thread will poll for results later.
		//
		OutResult = 0;
		return false;
	}

	FRHICommandListImmediate& RHICmdList = FRHICommandListImmediate::Get();

	//
	// The query has not yet completed, and we want to wait for results.
	// Append an RHI thread command that will force a readback of the GL query, then flush the RHI thread.
	//
	RHICmdList.EnqueueLambda([this](FRHICommandListImmediate&)
	{
		// Fetch the query result if it hasn't arrived yet...
		CacheResult(true);
	});

	// Wait for the above lambda to execute
	RHICmdList.ImmediateFlush(EImmediateFlushType::FlushRHIThread);

	checkf(TOPCounter == LastCachedBOPCounter, TEXT("Attempting to get data from an RHI render query which was never issued."));
	check(!IsLinked());

	OutResult = FOpenGLRenderQuery::GetResult();
	return true;
}

bool FOpenGLRenderQuery::PollQueryResults(FOpenGLRenderQuery* TargetQuery)
{
	if (!PlatformOpenGLThreadHasRenderingContext())
	{
		// Don't poll queries if this thread doesn't own the GL context.
		return false;
	}

	if (TargetQuery && TargetQuery->IsCached())
	{
		return true;
	}

	if (ActiveQueries.First)
	{
		TRACE_CPUPROFILER_EVENT_SCOPE(PollQueryResults);

		do
		{
			FOpenGLRenderQuery* Current = ActiveQueries.First;
			if (!Current->CacheResult(/*bWait = */ false))
			{
				// Not complete yet
				return false;
			}

			if (Current == TargetQuery)
			{
				return true;
			}
		}
		while (ActiveQueries.First);
	}

	return TargetQuery == nullptr;
}

void FOpenGLRenderQuery::Cleanup()
{
	VERIFY_GL_SCOPE();
	check(ActiveQueries.Count == 0);

	for (auto& Array : PooledQueries)
	{
		for (GLuint Resource : Array)
		{
			FOpenGL::DeleteQueries(1, &Resource);
		}

		Array.Reset();
	}
}

FRenderQueryRHIRef FOpenGLDynamicRHI::RHICreateRenderQuery(ERenderQueryType QueryType)
{
	check(QueryType == RQT_Occlusion || QueryType == RQT_AbsoluteTime);
	if (QueryType == RQT_AbsoluteTime && FOpenGL::SupportsTimestampQueries() == false)
	{
		return nullptr;
	}

	return new FOpenGLRenderQuery_RHI(QueryType);
}

void FOpenGLDynamicRHI::RHIBeginRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery)
{
	if (!RenderQuery)
		return;

	FDynamicRHI::RHIBeginRenderQuery_TopOfPipe(RHICmdList, RenderQuery);
}

void FOpenGLDynamicRHI::RHIEndRenderQuery_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIRenderQuery* RenderQuery)
{
	if (!RenderQuery)
		return;

	ResourceCast(RenderQuery)->End_TopOfPipe();
	FDynamicRHI::RHIEndRenderQuery_TopOfPipe(RHICmdList, RenderQuery);
}

void FOpenGLDynamicRHI::RHIBeginRenderQuery(FRHIRenderQuery* RenderQuery)
{
	ResourceCast(RenderQuery)->Begin();
}

void FOpenGLDynamicRHI::RHIEndRenderQuery(FRHIRenderQuery* RenderQuery)
{
	FOpenGLRenderQuery_RHI* Query = ResourceCast(RenderQuery);
	Query->End();
}

bool FOpenGLDynamicRHI::RHIGetRenderQueryResult(FRHIRenderQuery* QueryRHI, uint64& OutResult, bool bWait, uint32 GPUIndex)
{
	if (!QueryRHI)
	{
		OutResult = 0;
		return true;
	}

	FOpenGLRenderQuery_RHI* Query = ResourceCast(QueryRHI);
	return Query->GetResult(bWait, OutResult);
}

void FOpenGLEventQuery::IssueEvent()
{
	VERIFY_GL_SCOPE();
	if(Sync)
	{
		FOpenGL::DeleteSync(Sync);
		Sync = UGLsync();
	}
	Sync = FOpenGL::FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
	FOpenGL::Flush();

	checkSlow(FOpenGL::IsSync(Sync));
}

void FOpenGLEventQuery::WaitForCompletion()
{
	VERIFY_GL_SCOPE();

	QUICK_SCOPE_CYCLE_COUNTER(STAT_FOpenGLEventQuery_WaitForCompletion);

	checkSlow(FOpenGL::IsSync(Sync));

	// Wait up to 1/2 second for sync execution
	FOpenGL::EFenceResult Status = FOpenGL::ClientWaitSync( Sync, 0, 500*1000*1000);

	switch (Status)
	{
	case FOpenGL::FR_AlreadySignaled:
	case FOpenGL::FR_ConditionSatisfied:
		break;

	case FOpenGL::FR_TimeoutExpired:
		UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms)"));
		break;

	case FOpenGL::FR_WaitFailed:
		UE_LOG(LogRHI, Log, TEXT("Wait on GPU failed in driver"));
		break;

	default:
	    UE_LOG(LogRHI, Log, TEXT("Unknown error while waiting on GPU"));
	    check(0);
		break;
	}
}

FOpenGLEventQuery::FOpenGLEventQuery()
{
	VERIFY_GL_SCOPE();

	// Initialize the query by issuing an initial event.
	IssueEvent();

	check(FOpenGL::IsSync(Sync));
}

FOpenGLEventQuery::~FOpenGLEventQuery()
{
	VERIFY_GL_SCOPE();
	FOpenGL::DeleteSync(Sync);
}

/*=============================================================================
 * class FOpenGLBufferedGPUTiming
 *=============================================================================*/

#if (RHI_NEW_GPU_PROFILER == 0)

/**
 * Constructor.
 *
 * @param InOpenGLRHI			RHI interface
 * @param InBufferSize		Number of buffered measurements
 */
FOpenGLBufferedGPUTiming::FOpenGLBufferedGPUTiming(int32 InBufferSize)
	: BufferSize(InBufferSize)
{
}

/**
 * Initializes the static variables, if necessary.
 */
void FOpenGLBufferedGPUTiming::PlatformStaticInitialize(void* UserData)
{
	// Are the static variables initialized?
	if ( !GAreGlobalsInitialized )
	{
		GIsSupported = FOpenGL::SupportsTimestampQueries();
		SetTimingFrequency(1000 * 1000 * 1000);
		GAreGlobalsInitialized = true;
	}
}

/**
 * Initializes all OpenGL resources and if necessary, the static variables.
 */

static TArray<FOpenGLRenderQuery*> TimerQueryPool;

static FOpenGLRenderQuery* GetTimeQuery()
{
	if (TimerQueryPool.Num())
	{
		return TimerQueryPool.Pop();
	}
	return new FOpenGLRenderQuery(FOpenGLRenderQuery::EType::Timestamp);
}

void FOpenGLBufferedGPUTiming::InitResources()
{
	StaticInitialize(nullptr, PlatformStaticInitialize);

	CurrentTimestamp = 0;
	NumIssuedTimestamps = 0;
	bIsTiming = false;
	GIsSupported = FOpenGL::SupportsTimestampQueries();

	if ( GIsSupported )
	{
		StartTimestamps.Reserve(BufferSize);
		EndTimestamps.Reserve(BufferSize);

		for(int32 BufferIndex = 0; BufferIndex < BufferSize; ++BufferIndex)
		{
			StartTimestamps.Add(GetTimeQuery());
			EndTimestamps.Add(GetTimeQuery());
		}
	}
}

/**
 * Releases all OpenGL resources.
 */
void FOpenGLBufferedGPUTiming::ReleaseResources()
{
	VERIFY_GL_SCOPE();

	for (FOpenGLRenderQuery* Query : StartTimestamps)
	{
		TimerQueryPool.Add(Query);
	}

	for (FOpenGLRenderQuery* Query : EndTimestamps)
	{
		TimerQueryPool.Add(Query);
	}

	StartTimestamps.Reset();
	EndTimestamps.Reset();

}

/**
 * Start a GPU timing measurement.
 */
void FOpenGLBufferedGPUTiming::StartTiming()
{
	VERIFY_GL_SCOPE();
	// Issue a timestamp query for the 'start' time.
	if ( GIsSupported && !bIsTiming )
	{
		int32 NewTimestampIndex = (CurrentTimestamp + 1) % BufferSize;
		StartTimestamps[NewTimestampIndex]->End();

		CurrentTimestamp = NewTimestampIndex;
		bIsTiming = true;
	}
}

/**
 * End a GPU timing measurement.
 * The timing for this particular measurement will be resolved at a later time by the GPU.
 */
void FOpenGLBufferedGPUTiming::EndTiming()
{
	VERIFY_GL_SCOPE();
	// Issue a timestamp query for the 'end' time.
	if ( GIsSupported && bIsTiming )
	{
		checkSlow( CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize );
		EndTimestamps[CurrentTimestamp]->End();

		NumIssuedTimestamps = FMath::Min<int32>(NumIssuedTimestamps + 1, BufferSize);
		bIsTiming = false;
	}
}

/**
 * Retrieves the most recently resolved timing measurement.
 * The unit is the same as for FPlatformTime::Cycles(). Returns 0 if there are no resolved measurements.
 *
 * @return	Value of the most recently resolved timing, or 0 if no measurements have been resolved by the GPU yet.
 */
uint64 FOpenGLBufferedGPUTiming::GetTiming(bool bGetCurrentResultsAndBlock)
{
	VERIFY_GL_SCOPE();

	if (GIsSupported)
	{
		checkSlow(CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize);
		int32 TimestampIndex = CurrentTimestamp;

		if (!bGetCurrentResultsAndBlock)
		{
			// Quickly check the most recent measurements to see if any of them has been resolved.  Do not flush these queries.
			for (int32 IssueIndex = 1; IssueIndex < NumIssuedTimestamps; ++IssueIndex)
			{
				FOpenGLRenderQuery* StartQuery = StartTimestamps[TimestampIndex];
				FOpenGLRenderQuery* EndQuery = EndTimestamps[TimestampIndex];

				if (StartQuery->CacheResult(false) && EndQuery->CacheResult(false))
				{
					uint64 StartTime = StartQuery->GetResult();
					uint64 EndTime = EndQuery->GetResult();

					if (EndTime > StartTime)
					{
						return EndTime - StartTime;
					}
				}

				TimestampIndex = (TimestampIndex + BufferSize - 1) % BufferSize;
			}
		}

		if (NumIssuedTimestamps > 0 || bGetCurrentResultsAndBlock)
		{
			// None of the (NumIssuedTimestamps - 1) measurements were ready yet,
			// so check the oldest measurement more thoroughly.
			// This really only happens if occlusion and frame sync event queries are disabled, otherwise those will block until the GPU catches up to 1 frame behind
			const bool bBlocking = ( NumIssuedTimestamps == BufferSize ) || bGetCurrentResultsAndBlock;

			FOpenGLRenderQuery* StartQuery = StartTimestamps[TimestampIndex];
			FOpenGLRenderQuery* EndQuery = EndTimestamps[TimestampIndex];

			bool bHasStart = false, bHasEnd = false;

			{
				FRenderThreadIdleScope IdleScope(ERenderThreadIdleTypes::WaitingForGPUQuery);
				SCOPE_CYCLE_COUNTER(STAT_RenderQueryResultTime);

				double StartTimeoutTime = FPlatformTime::Seconds();

				// If we are blocking, retry until the GPU processes the time stamp command
				while (true)
				{
					bHasStart = StartQuery->CacheResult(false);
					bHasEnd = EndQuery->CacheResult(false);

					if (bBlocking && !(bHasStart && bHasEnd))
					{
						if ((FPlatformTime::Seconds() - StartTimeoutTime) > 0.5)
						{
							UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms)"));
							return 0;
						}
					}
					else
					{
						break;
					}
				}
			}

			if (bHasStart && bHasEnd)
			{
				uint64 StartTime = StartQuery->GetResult();
				uint64 EndTime = EndQuery->GetResult();

				if (EndTime > StartTime)
				{
					return EndTime - StartTime;
				}
			}
		}
	}
	return 0;
}

void FOpenGLDisjointTimeStampQuery::StartTracking()
{
	VERIFY_GL_SCOPE();
	if (IsSupported())
	{
		DisjointQuery->Begin();
	}
}

void FOpenGLDisjointTimeStampQuery::EndTracking()
{
	VERIFY_GL_SCOPE();

	if (IsSupported())
	{
		DisjointQuery->End();
	}
}

bool FOpenGLDisjointTimeStampQuery::IsResultValid()
{
	checkSlow(IsSupported());
	return bIsResultValid;
}

bool FOpenGLDisjointTimeStampQuery::GetResult(uint64* OutResult)
{
	VERIFY_GL_SCOPE();

	if (IsSupported())
	{
		DisjointQuery->CacheResult(true);

		uint64 Result = DisjointQuery->GetResult();
		bIsResultValid = (Result & FOpenGLRenderQuery::InvalidDisjointMask) == 0;

		*OutResult = Result & (~FOpenGLRenderQuery::InvalidDisjointMask);
	}

	return bIsResultValid;
}

#endif // (RHI_NEW_GPU_PROFILER == 0)

TQueue<FOpenGLGPUFence::FGLSync, EQueueMode::SingleThreaded> FOpenGLGPUFence::ActiveSyncs;

FOpenGLGPUFence::FOpenGLGPUFence(FName InName)
	: FRHIGPUFence(InName)
{
}

void FOpenGLGPUFence::Clear()
{
	Event = nullptr;
}

bool FOpenGLGPUFence::Poll() const
{
	return Event && Event->IsComplete();
}

void FOpenGLGPUFence::Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const
{
	if (Event && !Event->IsComplete())
	{
		//
		// The fence might get signalled by an earlier RHI command polling them, but we can't be sure that will happen.
		// The GPU might finish work after the RHI thread has gone idle, and then we'll never see the fence complete.
		//
		// Enqueue a command here that will block and wait for the fence if it still hasn't signalled by the time
		// the RHI thread is done with all prior commands.
		//
		RHICmdList.EnqueueLambda([Event = Event](FRHICommandListImmediate&)
		{
			if (!Event->IsComplete())
			{
				PollFencesUntil(Event);
			}
		});
		RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);

		Event->Wait();
	}
}

void FOpenGLGPUFence::PollFencesUntil(FGraphEvent* Target)
{
	if (!PlatformOpenGLThreadHasRenderingContext() && !Target)
	{
		// Don't poll fences if this thread doesn't own the GL context.
		return;
	}

	VERIFY_GL_SCOPE();

	while (FGLSync* Sync = ActiveSyncs.Peek())
	{
		bool const bTarget = Sync->Event == Target;

		// Wait forever if this fence is the one we're looking for specifically, otherwise just poll.
		uint64 const Timeout = bTarget ? 0xffffffff'ffffffff : 0;

		switch (FOpenGL::ClientWaitSync(Sync->GLSync, 0, Timeout))
		{
		case FOpenGL::FR_AlreadySignaled:
		case FOpenGL::FR_ConditionSatisfied:
			break; // Fence completed

		case FOpenGL::FR_TimeoutExpired:
			return; // Fence is not done

		default:
			checkNoEntry();
			[[fallthrough]];
		case FOpenGL::FR_WaitFailed:
			// Some error state
			UE_LOG(LogOpenGL, Fatal, TEXT("Waiting on FGLSync fence 0x%p failed."), Sync);
			return;
		}

		// The fence has completed. Signal the graph event and remove the node.
		Sync->Event->DispatchSubsequents();
		FOpenGL::DeleteSync(Sync->GLSync);

		ActiveSyncs.Pop();

		if (bTarget)
		{
			// We found the fence we wanted. Stop polling.
			return;
		}
	}

	checkf(!Target, TEXT("Attempt to poll for a specific fence, but it was not found in the queue."));
}

void FOpenGLGPUFence::WriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList)
{
	Event = FGraphEvent::CreateGraphEvent();

	RHICmdList.EnqueueLambda([Event = Event](FRHICommandListBase&) mutable
	{
		VERIFY_GL_SCOPE();

		UGLsync Fence = FOpenGL::FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
		ActiveSyncs.Enqueue(FGLSync(MoveTemp(Event), Fence));
	});
}

void FOpenGLDynamicRHI::RHIWriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIGPUFence* FenceRHI)
{
	ResourceCast(FenceRHI)->WriteGPUFence_TopOfPipe(RHICmdList);
}

void FOpenGLDynamicRHI::RHIWriteGPUFence(FRHIGPUFence* FenceRHI)
{
	checkNoEntry(); // Should never be called
}

FGPUFenceRHIRef FOpenGLDynamicRHI::RHICreateGPUFence(const FName& Name)
{
	return new FOpenGLGPUFence(Name);
}