Files
UnrealEngine/Engine/Source/Runtime/OpenGLDrv/Private/OpenGLShaders.cpp
2025-05-18 13:04:45 +08:00

3486 lines
114 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
OpenGLShaders.cpp: OpenGL shader RHI implementation.
=============================================================================*/
#include "OpenGLShaders.h"
#include "HAL/PlatformFileManager.h"
#include "HAL/FileManager.h"
#include "Misc/Paths.h"
#include "Misc/Compression.h"
#include "Serialization/MemoryWriter.h"
#include "Serialization/MemoryReader.h"
#include "OpenGLDrvPrivate.h"
#include "Shader.h"
#include "GlobalShader.h"
#include "SceneUtils.h"
#include "PsoLruCache.h"
#include "RHICoreShader.h"
#include "OpenGLProgramBinaryFileCache.h"
#include "OpenGLBinaryProgramUtils.h"
#include "HAL/PlatformFramePacer.h"
#if PLATFORM_WINDOWS && PLATFORM_CPU_X86_FAMILY
#include <mmintrin.h>
#endif
#include "SceneUtils.h"
static TAutoConsoleVariable<int32> CVarEnableLRU(
TEXT("r.OpenGL.EnableProgramLRUCache"),
0,
TEXT("OpenGL program LRU cache.\n")
TEXT("For use only when driver only supports a limited number of active GL programs.\n")
TEXT("0: disable LRU. (default)\n")
TEXT("1: When the LRU cache limits are reached, the least recently used GL program(s) will be deleted to make space for new/more recent programs. Expect hitching if requested shader is not in LRU cache."),
ECVF_RenderThreadSafe | ECVF_ReadOnly);
static TAutoConsoleVariable<int32> CVarLRUMaxProgramCount(
TEXT("r.OpenGL.ProgramLRUCount"),
700,
TEXT("OpenGL LRU maximum occupancy.\n")
TEXT("Limit the maximum number of active shader programs at any one time.\n")
TEXT("0: disable LRU.\n")
TEXT("Non-Zero: Maximum number of active shader programs, if reached least, recently used shader programs will deleted. "),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarLRUMaxProgramBinarySize(
TEXT("r.OpenGL.ProgramLRUBinarySize"),
35*1024*1024,
TEXT("OpenGL LRU maximum binary shader size.\n")
TEXT("Limit the maximum number of active shader programs at any one time.\n")
TEXT("0: disable LRU. (default)\n")
TEXT("Non-Zero: Maximum number of bytes active shader programs may use. If reached, least recently used shader programs will deleted."),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarLRUKeepProgramBinaryResident(
TEXT("r.OpenGL.ProgramLRUKeepBinaryResident"),
0,
TEXT("OpenGL LRU should keep program binary in memory.\n")
TEXT("Do not discard the program binary after creation of the GL program.\n")
TEXT("0: Program binary is discarded after GL program creation and recreated on program eviction. (default)\n")
TEXT("1: Program binary is retained, this improves eviction and re-creation performance but uses more memory."),
ECVF_ReadOnly |ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarIgnoreLinkFailure(
TEXT("r.OpenGL.IgnoreLinkFailure"),
0,
TEXT("Ignore OpenGL program link failures.\n")
TEXT("0: Program link failure generates a fatal error when encountered. (default)\n")
TEXT("1: Ignore link failures. this may allow a program to continue but could lead to undefined rendering behaviour."),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarIgnoreShaderCompileFailure(
TEXT("r.OpenGL.IgnoreShaderCompileFailure"),
0,
TEXT("Ignore OpenGL shader compile failures.\n")
TEXT("0: Shader compile failure return an error when encountered. (default)\n")
TEXT("1: Ignore Shader compile failures."),
ECVF_RenderThreadSafe);
int32 GUntouchedProgramEvictTimeSeconds = 0;
static FAutoConsoleVariableRef CVarEvictUntouchedProgramSeconds(
TEXT("r.OpenGL.ProgramLRUEvictTimeSeconds"),
GUntouchedProgramEvictTimeSeconds,
TEXT("OpenGL Program LRU, unused program eviction time.\n")
TEXT("Time in seconds before an unused program is eligible for eviction from the GL driver.\n")
TEXT("0 to disable unused program eviction. (default)")
,
ECVF_RenderThreadSafe
);
int32 GProgramLRUResidentCountBeforeEviction = 300;
static FAutoConsoleVariableRef CVarProgramLRUResidentCountBeforeEviction(
TEXT("r.OpenGL.ProgramLRUResidentCountBeforeEviction"),
GProgramLRUResidentCountBeforeEviction,
TEXT("The number of permanently resident programs allowed before the LRU begins evicting unused programs.\n")
TEXT("Can help reduce hitching after a static scene is presented for an extended period of time.\n")
TEXT("0 to remove all eligible programs as soon as possible. (max memory savings with higher hitching potential.)\n")
TEXT("300: (default) ensures the last 300 programs are retained as GL objects.")
,
ECVF_RenderThreadSafe
);
static bool GCacheAllProgramBinaries = true;
static FAutoConsoleVariableRef CVarGCacheAllProgramBinaries(
TEXT("r.OpenGL.CacheAllProgramBinaries"),
GCacheAllProgramBinaries,
TEXT("Place all encountered program in the binary cache.\n")
TEXT("requires r.PSOPrecaching.")
,
ECVF_RenderThreadSafe
);
#if PLATFORM_ANDROID
bool GOpenGLShaderHackLastCompileSuccess = false;
#endif
#define VERIFY_GL_SHADER_LINK 1
#define VERIFY_GL_SHADER_COMPILE 1
static bool ReportShaderCompileFailures()
{
bool bReportCompileFailures = true;
#if PLATFORM_ANDROID
const FString * ConfigRulesReportGLShaderCompileFailures = FAndroidMisc::GetConfigRulesVariable(TEXT("ReportGLShaderCompileFailures"));
bReportCompileFailures = ConfigRulesReportGLShaderCompileFailures == nullptr || ConfigRulesReportGLShaderCompileFailures->Equals("true", ESearchCase::IgnoreCase);
#endif
#if VERIFY_GL_SHADER_COMPILE
return bReportCompileFailures;
#else
return false;
#endif
}
static bool ReportProgramLinkFailures()
{
bool bReportLinkFailures = true;
#if PLATFORM_ANDROID
const FString* ConfigRulesReportGLProgramLinkFailures = FAndroidMisc::GetConfigRulesVariable(TEXT("ReportGLProgramLinkFailures"));
bReportLinkFailures = ConfigRulesReportGLProgramLinkFailures == nullptr || ConfigRulesReportGLProgramLinkFailures->Equals("true", ESearchCase::IgnoreCase);
#endif
#if VERIFY_GL_SHADER_LINK
return bReportLinkFailures;
#else
return false;
#endif
}
#define OGL_BINARYCACHE_STATS !UE_BUILD_SHIPPING
#if OGL_BINARYCACHE_STATS
#define OGL_BINARYCACHE_STATS_MARKBEGINCOMPILE(x) if(FOpenGLBinaryCacheStats::IsEnabled()) { FOpenGLBinaryCacheStats::Get().MarkStartTime(x);}
#define OGL_BINARYCACHE_STATS_MARKCOMPILED(x) if(FOpenGLBinaryCacheStats::IsEnabled()) { FOpenGLBinaryCacheStats::Get().MarkCompileFinishTime(x);}
#define OGL_BINARYCACHE_STATS_MARKBINARYCACHEMISS(x,y) if(FOpenGLBinaryCacheStats::IsEnabled()) { FOpenGLBinaryCacheStats::Get().MarkCacheMissedTime(x,y);}
#define OGL_BINARYCACHE_STATS_MARKBINARYCACHEUSE(x) if(FOpenGLBinaryCacheStats::IsEnabled()) { FOpenGLBinaryCacheStats::Get().MarkCacheUse(x);}
#define OGL_BINARYCACHE_STATS_LOG() if(FOpenGLBinaryCacheStats::IsEnabled()) { FOpenGLBinaryCacheStats::Get().LogStats();}
class FOpenGLBinaryCacheStats
{
public:
inline static bool IsEnabled()
{
static bool bEnabled = FParse::Param(FCommandLine::Get(), TEXT("openglprecachestats"));
return bEnabled;
}
inline static FOpenGLBinaryCacheStats& Get() { static FOpenGLBinaryCacheStats CacheStats; return CacheStats; }
void MarkStartTime(const FOpenGLProgramKey& ProgramKey)
{
FScopeLock Lock(&CacheStatsCS);
KeyToTimes.FindOrAdd(ProgramKey).StartTime = FPlatformTime::Seconds();
}
void MarkCompileFinishTime(const FOpenGLProgramKey& ProgramKey)
{
FScopeLock Lock(&CacheStatsCS);
FProgramUseTimes& ProgramTime = KeyToTimes.FindOrAdd(ProgramKey);
if (!ProgramTime.UsedTime && !ProgramTime.CompileTime)
{
double CurrentTime = FPlatformTime::Seconds();
if (!ProgramTime.StartTime)
{
ProgramTime.StartTime = CurrentTime;
PreloadedBinaries++;
}
ProgramTime.CompileTime = CurrentTime;
CombinedCompileTime += ProgramTime.CompileTime - ProgramTime.StartTime;
}
}
void MarkCacheMissedTime(const FOpenGLProgramKey& ProgramKey, bool bLogOnFirstUse)
{
FScopeLock Lock(&CacheStatsCS);
FProgramUseTimes& ProgramTimes = KeyToTimes.FindOrAdd(ProgramKey);
double CurrentTime = FPlatformTime::Seconds();
if (!ProgramTimes.UsedTime)
{
bLogMe = true;
ProgramTimes.UsedTime = CurrentTime;
if (!ProgramTimes.StartTime)
{
TotalMisses++;
UE_CLOG(bLogOnFirstUse, LogRHI, Log, TEXT("BinaryCacheUsage: Program %s was not in the binary cache when first used."), *ProgramKey.ToString());
}
else if (!ProgramTimes.CompileTime)
{
TotalEarlyUses++;
double TimeToUse = CurrentTime - ProgramTimes.StartTime;
CombinedEarlyTimeToUse += TimeToUse;
UE_CLOG(bLogOnFirstUse, LogRHI, Log, TEXT("BinaryCacheUsage: Program %s was used too early, binary compile was not ready when first used. Span between compile and use: %f"), *ProgramKey.ToString(), (float)TimeToUse);
}
}
}
void MarkCacheUse(const FOpenGLProgramKey& ProgramKey)
{
FScopeLock Lock(&CacheStatsCS);
FProgramUseTimes& ProgramTime = KeyToTimes.FindChecked(ProgramKey);
if (!ProgramTime.UsedTime)
{
check(ProgramTime.StartTime && ProgramTime.CompileTime);
double CurrentTime = FPlatformTime::Seconds();
ProgramTime.UsedTime = CurrentTime;
TotalHits++;
}
}
void LogStats()
{
FScopeLock Lock(&CacheStatsCS);
const float AvgEarlyTimeToUse = TotalEarlyUses ? (float)(CombinedEarlyTimeToUse / (double)TotalEarlyUses) : 0.0f;
const uint32 CompiledBinaries = KeyToTimes.Num() - (TotalEarlyUses + TotalMisses + PreloadedBinaries);
const float AvgCompileTime = CompiledBinaries ? (float)(CombinedCompileTime / (double)(CompiledBinaries)) : 0.0f;
UE_CLOG(bLogMe, LogRHI, Log, TEXT("BinaryCacheUsage: %d programs seen, %d preloaded, %d used in time, %d used before compile finished (avg early miss time span %f), %d programs used were not in the cache. %f avg compile time"),
KeyToTimes.Num(),
PreloadedBinaries,
TotalHits,
TotalEarlyUses,
AvgEarlyTimeToUse,
TotalMisses,
AvgCompileTime
);
bLogMe = false;
}
private:
FCriticalSection CacheStatsCS;
struct FProgramUseTimes
{
double StartTime = 0;
double CompileTime = 0;
double UsedTime = 0;
};
TMap< FOpenGLProgramKey, FProgramUseTimes> KeyToTimes;
uint32 TotalMisses = 0; // Num programs marked as used but were not in the cache.
uint32 TotalEarlyUses = 0; // Num program marked as used before their compile had finished.
uint32 TotalHits = 0; // Num programs that had compiled in time for their first used.
uint32 PreloadedBinaries = 0; // Num programs that came pre-loaded from the binary cache.
double CombinedEarlyTimeToUse = 0;
double CombinedCompileTime = 0;
mutable bool bLogMe = false;
};
#else
#define OGL_BINARYCACHE_STATS_MARKBEGINCOMPILE(x)
#define OGL_BINARYCACHE_STATS_MARKCOMPILED(x)
#define OGL_BINARYCACHE_STATS_MARKBINARYCACHEMISS(x,y)
#define OGL_BINARYCACHE_STATS_MARKBINARYCACHEUSE(x)
#define OGL_BINARYCACHE_STATS_LOG()
#endif
bool IsPrecachingEnabled()
{
static const auto CVarPSOPrecaching = IConsoleManager::Get().FindConsoleVariable(TEXT("r.PSOPrecaching"));
return CVarPSOPrecaching && (CVarPSOPrecaching->GetInt() != 0);
}
static bool ShouldCacheAllProgramBinaries()
{
return IsPrecachingEnabled() && GCacheAllProgramBinaries;
}
static uint32 GCurrentDriverProgramBinaryAllocation = 0;
static uint32 GNumPrograms = 0;
static void PrintProgramStats()
{
FPlatformMisc::LowLevelOutputDebugStringf(TEXT(" --- Programs Num: %d, Size: %d \n"), GNumPrograms, GCurrentDriverProgramBinaryAllocation);
}
static FAutoConsoleCommand ConsoleCommandPrintProgramStats(
TEXT("r.OpenGL.PrintProgramStats"),
TEXT("Print to log current program binary stats"),
FConsoleCommandDelegate::CreateStatic(PrintProgramStats)
);
static void SetNewProgramStats(GLuint Program)
{
VERIFY_GL_SCOPE();
#if STATS | VERIFY_GL_SHADER_LINK
GLint BinaryLength = 0;
glGetProgramiv(Program, GL_PROGRAM_BINARY_LENGTH, &BinaryLength);
#endif
#if STATS
INC_MEMORY_STAT_BY(STAT_OpenGLProgramBinaryMemory, BinaryLength);
INC_DWORD_STAT(STAT_OpenGLProgramCount);
#endif
GNumPrograms++;
#if VERIFY_GL_SHADER_LINK
GCurrentDriverProgramBinaryAllocation += BinaryLength;
#endif
}
static void SetDeletedProgramStats(GLuint Program)
{
VERIFY_GL_SCOPE();
#if STATS | VERIFY_GL_SHADER_LINK
GLint BinaryLength = 0;
glGetProgramiv(Program, GL_PROGRAM_BINARY_LENGTH, &BinaryLength);
#endif
#if STATS
DEC_MEMORY_STAT_BY(STAT_OpenGLProgramBinaryMemory, BinaryLength);
DEC_DWORD_STAT(STAT_OpenGLProgramCount);
#endif
#if VERIFY_GL_SHADER_LINK
GCurrentDriverProgramBinaryAllocation -= BinaryLength;
#endif
GNumPrograms--;
}
const uint32 SizeOfFloat4 = 16;
const uint32 NumFloatsInFloat4 = 4;
FORCEINLINE void FOpenGLShaderParameterCache::FRange::MarkDirtyRange(uint32 NewStartVector, uint32 NewNumVectors)
{
if (NumVectors > 0)
{
uint32 High = StartVector + NumVectors;
uint32 NewHigh = NewStartVector + NewNumVectors;
uint32 MaxVector = FMath::Max(High, NewHigh);
uint32 MinVector = FMath::Min(StartVector, NewStartVector);
StartVector = MinVector;
NumVectors = (MaxVector - MinVector) + 1;
}
else
{
StartVector = NewStartVector;
NumVectors = NewNumVectors;
}
}
/**
* Verify that an OpenGL program has linked successfully.
*/
static bool VerifyLinkedProgram(GLuint Program)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLinkVerifyTime);
VERIFY_GL_SCOPE();
GLint LinkStatus = 0;
glGetProgramiv(Program, GL_LINK_STATUS, &LinkStatus);
if (LinkStatus != GL_TRUE)
{
if (ReportProgramLinkFailures())
{
GLenum LastGLError = glGetError();
GLint LogLength;
ANSICHAR DefaultLog[] = "No log";
ANSICHAR *CompileLog = DefaultLog;
glGetProgramiv(Program, GL_INFO_LOG_LENGTH, &LogLength);
if (LogLength > 1)
{
CompileLog = (ANSICHAR *)FMemory::Malloc(LogLength);
glGetProgramInfoLog(Program, LogLength, NULL, CompileLog);
}
UE_LOG(LogRHI, Error, TEXT("Failed to link program. Current total programs: %d program binary bytes, last gl error 0x%X, drvalloc %d\n log:\n%s"),
GNumPrograms,
LastGLError,
GCurrentDriverProgramBinaryAllocation,
ANSI_TO_TCHAR(CompileLog));
if (LogLength > 1)
{
FMemory::Free(CompileLog);
}
}
else
{
UE_LOG(LogRHI, Error, TEXT("Failed to link program. Current total programs:%d"), GNumPrograms);
}
// if we're required to ignore link failure then we return true here.
return CVarIgnoreLinkFailure.GetValueOnAnyThread() == 1;
}
return true;
}
// ============================================================================================================================
class FOpenGLCompiledShaderValue
{
const FName CompressionMethod = NAME_Oodle;
public:
FOpenGLCompiledShaderValue()
{
}
~FOpenGLCompiledShaderValue()
{
StatTotalStoredSize -= GlslCode.Num();
StatTotalUncompressedSize -= UncompressedSize == -1 ? GlslCode.Num() : UncompressedSize;
}
GLuint Resource = 0;
TArray<ANSICHAR> GetUncompressedShader() const
{
TArray<ANSICHAR> OutGlslCode;
QUICK_SCOPE_CYCLE_COUNTER(STAT_glUncompressShader);
if (UncompressedSize != -1)
{
OutGlslCode.Empty(UncompressedSize);
OutGlslCode.SetNum(UncompressedSize);
bool bResult = FCompression::UncompressMemory(
CompressionMethod,
(void*)OutGlslCode.GetData(),
UncompressedSize,
(void*)GlslCode.GetData(),
GlslCode.Num());
check(bResult);
}
else
{
OutGlslCode = GlslCode;
}
return OutGlslCode;
}
static TAtomic<uint32> StatTotalStoredSize;
static TAtomic<uint32> StatTotalUncompressedSize;
bool HasCode() const { return !GlslCode.IsEmpty(); };
void CompressShader(const TArray<ANSICHAR>& InGlslCode)
{
static_assert(sizeof(InGlslCode[0]) == sizeof(uint8), "expecting shader code type to be byte.");
check(GlslCode.IsEmpty());
UncompressedSize = InGlslCode.Num();
int32 CompressedSize = FCompression::CompressMemoryBound(CompressionMethod, UncompressedSize);
GlslCode.Empty(CompressedSize);
GlslCode.SetNumUninitialized(CompressedSize);
bool bCompressed = FCompression::CompressMemory(
CompressionMethod,
(void*)GlslCode.GetData(),
CompressedSize,
(void*)InGlslCode.GetData(),
UncompressedSize,
COMPRESS_BiasSpeed);
if (bCompressed)
{
// shrink buffer
GlslCode.SetNum(CompressedSize, EAllowShrinking::Yes);
}
else
{
GlslCode = InGlslCode;
UncompressedSize = -1;
}
StatTotalStoredSize += GlslCode.Num();
StatTotalUncompressedSize += UncompressedSize == -1 ? GlslCode.Num() : UncompressedSize;
//UE_LOG(LogRHI, Warning, TEXT("Shader sizes: %d %d"), StatTotalStoredSize.Load(EMemoryOrder::Relaxed), StatTotalUncompressedSize.Load(EMemoryOrder::Relaxed));
}
private:
TArray<ANSICHAR> GlslCode;
int32 UncompressedSize = -1;
};
TAtomic<uint32> FOpenGLCompiledShaderValue::StatTotalStoredSize = 0;
TAtomic<uint32> FOpenGLCompiledShaderValue::StatTotalUncompressedSize = 0;
typedef TMap<FOpenGLCompiledShaderKey, FOpenGLCompiledShaderValue> FOpenGLCompiledShaderCache;
static FCriticalSection GCompiledShaderCacheCS;
static FOpenGLCompiledShaderCache& GetOpenGLCompiledShaderCache()
{
static FOpenGLCompiledShaderCache CompiledShaderCache;
return CompiledShaderCache;
}
// ============================================================================================================================
static const TCHAR* ShaderNameFromShaderType(GLenum ShaderType)
{
switch(ShaderType)
{
case GL_VERTEX_SHADER: return TEXT("vertex");
case GL_FRAGMENT_SHADER: return TEXT("fragment");
case GL_GEOMETRY_SHADER: return TEXT("geometry");
case GL_COMPUTE_SHADER: return TEXT("compute");
default: return NULL;
}
}
// ============================================================================================================================
namespace
{
inline void AppendCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source)
{
if (Dest.Num() > 0)
{
Dest.Insert(Source, FCStringAnsi::Strlen(Source), Dest.Num() - 1);
}
else
{
Dest.Append(Source, FCStringAnsi::Strlen(Source) + 1);
}
}
inline void ReplaceCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source, const ANSICHAR * Replacement)
{
int32 SourceLen = FCStringAnsi::Strlen(Source);
int32 ReplacementLen = FCStringAnsi::Strlen(Replacement);
int32 FoundIndex = 0;
for (const ANSICHAR * FoundPointer = FCStringAnsi::Strstr(Dest.GetData(), Source);
nullptr != FoundPointer;
FoundPointer = FCStringAnsi::Strstr(Dest.GetData()+FoundIndex, Source))
{
FoundIndex = FoundPointer - Dest.GetData();
Dest.RemoveAt(FoundIndex, SourceLen);
Dest.Insert(Replacement, ReplacementLen, FoundIndex);
}
}
inline const ANSICHAR * CStringEndOfLine(const ANSICHAR * Text)
{
const ANSICHAR * LineEnd = FCStringAnsi::Strchr(Text, '\n');
if (nullptr == LineEnd)
{
LineEnd = Text + FCStringAnsi::Strlen(Text);
}
return LineEnd;
}
inline bool CStringIsBlankLine(const ANSICHAR * Text)
{
while (!FCharAnsi::IsLinebreak(*Text))
{
if (!FCharAnsi::IsWhitespace(*Text))
{
return false;
}
++Text;
}
return true;
}
inline int CStringCountOccurances(TArray<ANSICHAR> & Source, const ANSICHAR * TargetString)
{
int32 TargetLen = FCStringAnsi::Strlen(TargetString);
int Count = 0;
int32 FoundIndex = 0;
for (const ANSICHAR * FoundPointer = FCStringAnsi::Strstr(Source.GetData(), TargetString);
nullptr != FoundPointer;
FoundPointer = FCStringAnsi::Strstr(Source.GetData() + FoundIndex, TargetString))
{
FoundIndex = FoundPointer - Source.GetData();
FoundIndex += TargetLen;
Count++;
}
return Count;
}
inline bool MoveHashLines(TArray<ANSICHAR> & Dest, TArray<ANSICHAR> & Source)
{
// Walk through the lines to find the first non-# line...
const ANSICHAR * LineStart = Source.GetData();
for (bool FoundNonHashLine = false; !FoundNonHashLine;)
{
const ANSICHAR * LineEnd = CStringEndOfLine(LineStart);
if (LineStart[0] != '#' && !CStringIsBlankLine(LineStart))
{
FoundNonHashLine = true;
}
else if (LineEnd[0] == '\n')
{
LineStart = LineEnd + 1;
}
else
{
LineStart = LineEnd;
}
}
// Copy the hash lines over, if we found any. And delete from
// the source.
if (LineStart > Source.GetData())
{
int32 LineLength = LineStart - Source.GetData();
if (Dest.Num() > 0)
{
Dest.Insert(Source.GetData(), LineLength, Dest.Num() - 1);
}
else
{
Dest.Append(Source.GetData(), LineLength);
Dest.Append("", 1);
}
if (Dest.Last(1) != '\n')
{
Dest.Insert("\n", 1, Dest.Num() - 1);
}
Source.RemoveAt(0, LineStart - Source.GetData());
return true;
}
return false;
}
}
// make some anon ns functions available to platform extensions
void PE_AppendCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source)
{
AppendCString(Dest, Source);
}
void PE_ReplaceCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source, const ANSICHAR * Replacement)
{
ReplaceCString(Dest, Source, Replacement);
}
inline uint32 GetTypeHash(FAnsiCharArray const& CharArray)
{
return FCrc::MemCrc32(CharArray.GetData(), CharArray.Num() * sizeof(ANSICHAR));
}
// Helper to verify a compiled shader
// returns true if shader was compiled without any errors or errors should be ignored
static bool VerifyShaderCompilation(GLuint Resource, const ANSICHAR* GlslCodeString)
{
VERIFY_GL_SCOPE();
// Verify that an OpenGL shader has compiled successfully.
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderCompileVerifyTime);
{
GLint CompileStatus;
glGetShaderiv(Resource, GL_COMPILE_STATUS, &CompileStatus);
if (CompileStatus != GL_TRUE)
{
if (ReportShaderCompileFailures())
{
GLint LogLength;
ANSICHAR DefaultLog[] = "No log";
ANSICHAR *CompileLog = DefaultLog;
glGetShaderiv(Resource, GL_INFO_LOG_LENGTH, &LogLength);
#if PLATFORM_ANDROID
if ( LogLength == 0 )
{
// make it big anyway
// there was a bug in android 2.2 where glGetShaderiv would return 0 even though there was a error message
// https://code.google.com/p/android/issues/detail?id=9953
LogLength = 4096;
}
#endif
if (LogLength > 1)
{
CompileLog = (ANSICHAR *)FMemory::Malloc(LogLength);
glGetShaderInfoLog(Resource, LogLength, NULL, CompileLog);
}
if (GlslCodeString)
{
UE_LOG(LogRHI,Error,TEXT("Shader:\n%s"), ANSI_TO_TCHAR(GlslCodeString));
}
UE_LOG(LogRHI,Error,TEXT("Failed to compile shader. Compile log:\n%s"), ANSI_TO_TCHAR(CompileLog));
if (LogLength > 1)
{
FMemory::Free(CompileLog);
}
}
// if we're required to ignore compile failure then we return true here, it will end with link failure.
return CVarIgnoreShaderCompileFailure.GetValueOnAnyThread() == 1;
}
}
return true;
}
static const FOpenGLShaderDeviceCapabilities& GetOpenGLShaderDeviceCapabilities()
{
static bool bInitialized = false;
static FOpenGLShaderDeviceCapabilities Capabilities;
if( !bInitialized )
{
GetCurrentOpenGLShaderDeviceCapabilities(Capabilities);
bInitialized = true;
}
return Capabilities;
}
static void GLSLToPlatform(const FOpenGLCodeHeader& Header, GLenum TypeEnum, FAnsiCharArray& GlslCodeOriginal, FAnsiCharArray& GlslPlatformCodeOUT)
{
const FOpenGLShaderDeviceCapabilities& Capabilities = GetOpenGLShaderDeviceCapabilities();
// get a modified version of the shader based on device capabilities to compile (destructive to GlslCodeOriginal copy)
GLSLToDeviceCompatibleGLSL(GlslCodeOriginal, Header.ShaderName, TypeEnum, Capabilities, GlslPlatformCodeOUT);
}
/**
* Compiles an OpenGL shader using the given GLSL microcode.
*/
void FOpenGLShader::Compile(GLenum TypeEnum)
{
VERIFY_GL_SCOPE();
FScopeLock Lock(&GCompiledShaderCacheCS);
FOpenGLCompiledShaderValue& FoundShader = GetOpenGLCompiledShaderCache().FindOrAdd(ShaderCodeKey);
Resource = FoundShader.Resource;
if (Resource == 0)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderCompileTime);
Resource = FOpenGL::CreateShader(TypeEnum);
TArray<ANSICHAR> UncompressedShaderCode = FoundShader.GetUncompressedShader();
int32 GlslCodeLength = UncompressedShaderCode.Num() - 1;
const ANSICHAR* UncompressedGlslCodeString = UncompressedShaderCode.GetData();
glShaderSource(Resource, 1, (const GLchar**)&UncompressedGlslCodeString, &GlslCodeLength);
glCompileShader(Resource);
const bool bSuccessfullyCompiled = VerifyShaderCompilation(Resource, UncompressedGlslCodeString);
ensure(bSuccessfullyCompiled);
FoundShader.Resource = Resource;
}
}
void OPENGLDRV_API GetCurrentOpenGLShaderDeviceCapabilities(FOpenGLShaderDeviceCapabilities& Capabilities)
{
FMemory::Memzero(Capabilities);
#if PLATFORM_DESKTOP
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Desktop;
if (FOpenGL::IsAndroidGLESCompatibilityModeEnabled())
{
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Android;
Capabilities.bSupportsShaderFramebufferFetch = FOpenGL::SupportsShaderFramebufferFetch();
Capabilities.bRequiresARMShaderFramebufferFetchDepthStencilUndef = false;
Capabilities.bRequiresReadOnlyBuffersWorkaround = false;
Capabilities.MaxVaryingVectors = FOpenGL::GetMaxVaryingVectors();
Capabilities.bRequiresPreciseQualifierWorkaround = false;
}
#elif PLATFORM_ANDROID
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Android;
Capabilities.bSupportsShaderFramebufferFetch = FOpenGL::SupportsShaderFramebufferFetch();
Capabilities.bRequiresReadOnlyBuffersWorkaround = FOpenGL::RequiresReadOnlyBuffersWorkaround();
Capabilities.bRequiresARMShaderFramebufferFetchDepthStencilUndef = FOpenGL::RequiresARMShaderFramebufferFetchDepthStencilUndef();
Capabilities.MaxVaryingVectors = FOpenGL::GetMaxVaryingVectors();
Capabilities.bRequiresDisabledEarlyFragmentTests = FOpenGL::RequiresDisabledEarlyFragmentTests();
Capabilities.bRequiresPreciseQualifierWorkaround = FOpenGL::RequiresPreciseQualifierWorkaround();
#elif PLATFORM_IOS
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_iOS;
#else
FOpenGL::PE_GetCurrentOpenGLShaderDeviceCapabilities(Capabilities); // platform extension
#endif
Capabilities.MaxRHIShaderPlatform = GMaxRHIShaderPlatform;
}
void OPENGLDRV_API GLSLToDeviceCompatibleGLSL(FAnsiCharArray& GlslCodeOriginal, const FString& ShaderName, GLenum TypeEnum, const FOpenGLShaderDeviceCapabilities& Capabilities, FAnsiCharArray& GlslCode)
{
if (FOpenGL::PE_GLSLToDeviceCompatibleGLSL(GlslCodeOriginal, ShaderName, TypeEnum, Capabilities, GlslCode))
{
return; // platform extension overrides
}
GlslCode.Reserve(GlslCodeOriginal.Num());
// Whether we need to emit mobile multi-view code or not.
const bool bEmitMobileMultiView = (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "gl_ViewID_OVR") != nullptr);
// Whether we need to emit texture external code or not.
const bool bEmitTextureExternal = (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "samplerExternalOES") != nullptr);
FAnsiCharArray GlslCodeAfterExtensions;
const ANSICHAR* GlslPlaceHolderAfterExtensions = "// end extensions";
bool bGlslCodeHasExtensions = CStringCountOccurances(GlslCodeOriginal, GlslPlaceHolderAfterExtensions) == 1;
if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_Android)
{
const ANSICHAR* ESVersion = "#version 320 es";
bool FoundVersion = (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), ESVersion)) != nullptr;
if (!FoundVersion)
{
ESVersion = "#version 310 es";
}
AppendCString(GlslCode, ESVersion);
AppendCString(GlslCode, "\n");
ReplaceCString(GlslCodeOriginal, ESVersion, "");
AppendCString(GlslCode, "#define fma(A, B, C) ((A) * (B) + (C))\n");
}
if (Capabilities.bRequiresPreciseQualifierWorkaround)
{
// Disable use of 'precise' qualifier
AppendCString(GlslCode, "#define precise\n");
}
if (Capabilities.bRequiresReadOnlyBuffersWorkaround)
{
ReplaceCString(GlslCodeOriginal, "readonly buffer", "buffer");
}
if (TypeEnum == GL_FRAGMENT_SHADER && Capabilities.bRequiresDisabledEarlyFragmentTests)
{
ReplaceCString(GlslCodeOriginal, "layout(early_fragment_tests) in;", "");
}
// The incoming glsl may have preprocessor code that is dependent on defines introduced via the engine.
// This is the place to insert such engine preprocessor defines, immediately after the glsl version declaration.
if (TypeEnum == GL_FRAGMENT_SHADER)
{
if (FOpenGL::SupportsPixelLocalStorage() && FOpenGL::SupportsShaderDepthStencilFetch())
{
AppendCString(GlslCode, "#define UE_MRT_PLS 1\n");
}
else if(FOpenGL::SupportsShaderMRTFramebufferFetch())
{
AppendCString(GlslCode, "#define UE_MRT_FRAMEBUFFER_FETCH 1\n");
}
}
if (bEmitTextureExternal)
{
// remove comment so MoveHashLines works as intended
ReplaceCString(GlslCodeOriginal, "// Uses samplerExternalOES", "");
MoveHashLines(GlslCode, GlslCodeOriginal);
if (GSupportsImageExternal)
{
AppendCString(GlslCode, "\n\n");
#if PLATFORM_ANDROID
FOpenGL::EImageExternalType ImageExternalType = FOpenGL::GetImageExternalType();
switch (ImageExternalType)
{
case FOpenGL::EImageExternalType::ImageExternal100:
AppendCString(GlslCode, "#extension GL_OES_EGL_image_external : require\n");
break;
case FOpenGL::EImageExternalType::ImageExternal300:
AppendCString(GlslCode, "#extension GL_OES_EGL_image_external : require\n");
break;
case FOpenGL::EImageExternalType::ImageExternalESSL300:
// GL_OES_EGL_image_external_essl3 is only compatible with ES 3.x
AppendCString(GlslCode, "#extension GL_OES_EGL_image_external_essl3 : require\n");
break;
}
#else
AppendCString(GlslCode, "#extension GL_OES_EGL_image_external : require\n");
#endif
AppendCString(GlslCode, "\n\n");
}
else
{
// Strip out texture external for devices that don't support it.
AppendCString(GlslCode, "#define samplerExternalOES sampler2D\n");
}
}
if (bEmitMobileMultiView)
{
MoveHashLines(GlslCode, GlslCodeOriginal);
if (GSupportsMobileMultiView)
{
AppendCString(GlslCode, "\n\n");
AppendCString(GlslCode, "#extension GL_OVR_multiview2 : enable\n");
AppendCString(GlslCode, "\n\n");
}
else
{
// Strip out multi-view for devices that don't support it.
AppendCString(GlslCode, "#define gl_ViewID_OVR 0\n");
}
}
// Move version tag & extensions before beginning all other operations
MoveHashLines(GlslCode, GlslCodeOriginal);
#if DEBUG_GL_SHADERS
if (ShaderName.IsEmpty() == false)
{
AppendCString(GlslCode, "// ");
AppendCString(GlslCode, TCHAR_TO_ANSI(ShaderName.GetCharArray().GetData()));
AppendCString(GlslCode, "\n");
}
#endif
if (bEmitMobileMultiView && GSupportsMobileMultiView && TypeEnum == GL_VERTEX_SHADER)
{
AppendCString(GlslCode, "\n\n");
AppendCString(GlslCode, "layout(num_views = 2) in;\n");
AppendCString(GlslCode, "\n\n");
}
if (TypeEnum != GL_COMPUTE_SHADER)
{
if (FOpenGL::SupportsClipControl())
{
AppendCString(GlslCode, "#define HLSLCC_DX11ClipSpace 0 \n");
}
else
{
AppendCString(GlslCode, "#define HLSLCC_DX11ClipSpace 1 \n");
}
}
// Append the possibly edited shader to the one we will compile.
// This is to make it easier to debug as we can see the whole
// shader source.
AppendCString(GlslCode, "\n\n");
AppendCString(GlslCode, GlslCodeOriginal.GetData());
if (bGlslCodeHasExtensions && GlslCodeAfterExtensions.Num() > 0)
{
// the initial code has an #extension chunk. replace the placeholder line
ReplaceCString(GlslCode, GlslPlaceHolderAfterExtensions, GlslCodeAfterExtensions.GetData());
}
}
FOpenGLShader::FOpenGLShader(TArrayView<const uint8> Code, const FSHAHash& Hash, GLenum TypeEnum, FShaderResourceTable& SRT, FRHIShader* RHIShader)
{
FMemory::Memzero(&Bindings, sizeof(Bindings));
FShaderCodeReader ShaderCode(Code);
FMemoryReaderView Ar(Code, true);
Ar.SetLimitSize(ShaderCode.GetActualShaderCodeSize());
FOpenGLCodeHeader Header = { 0 };
Header.Serialize(Ar, SRT);
if (Header.GlslMarker != 0x474c534c
|| (TypeEnum == GL_VERTEX_SHADER && Header.FrequencyMarker != 0x5653)
|| (TypeEnum == GL_FRAGMENT_SHADER && Header.FrequencyMarker != 0x5053)
|| (TypeEnum == GL_GEOMETRY_SHADER && Header.FrequencyMarker != 0x4753)
|| (TypeEnum == GL_COMPUTE_SHADER && Header.FrequencyMarker != 0x4353)
)
{
UE_LOG(LogRHI, Fatal,
TEXT("Corrupt shader bytecode. GlslMarker=0x%08x FrequencyMarker=0x%04x"),
Header.GlslMarker,
Header.FrequencyMarker
);
return;
}
Bindings = Header.Bindings;
UniformBuffersCopyInfo = Header.UniformBuffersCopyInfo;
UE::RHICore::InitStaticUniformBufferSlots(RHIShader);
int32 CodeOffset = Ar.Tell();
// The code as given to us.
// put back the 'original code crc' in to cache key
// pull back out the modified glsl.
FAnsiCharArray GlslCodeOriginal;
AppendCString(GlslCodeOriginal, (ANSICHAR*)Code.GetData() + CodeOffset);
uint32 CodeCRC = FCrc::MemCrc32(GlslCodeOriginal.GetData(), GlslCodeOriginal.Num());
ShaderCodeKey = FOpenGLCompiledShaderKey(TypeEnum, GlslCodeOriginal.Num(), CodeCRC);
if (TypeEnum == GL_FRAGMENT_SHADER && FOpenGL::SupportsShaderFramebufferFetch())
{
// _Globals_gl_LastFragColor should only exist when 'FramebufferFetchGLES2()' is being used, not for MRT/deferred
if (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "_Globals_gl_LastFragColor") != nullptr)
{
bUsesProgrammableBlending = true;
}
}
FAnsiCharArray GlslCodeFinal;
{
FScopeLock Lock(&GCompiledShaderCacheCS);
FOpenGLCompiledShaderValue& FoundShader = GetOpenGLCompiledShaderCache().FindOrAdd(ShaderCodeKey);
Resource = FoundShader.Resource;
if (FoundShader.Resource == 0 && !FoundShader.HasCode())
{
GLSLToPlatform(Header, TypeEnum, GlslCodeOriginal, GlslCodeFinal);
FoundShader.CompressShader(GlslCodeFinal);
}
// With debug shaders we insert a shader name into the source and that can make it unique failing CRC check
#if (UE_BUILD_DEBUG || UE_BUILD_DEVELOPMENT) && !DEBUG_GL_SHADERS
else
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_GLCheckShaderCodeCRC);
GLSLToPlatform(Header, TypeEnum, GlslCodeOriginal, GlslCodeFinal);
TArray<ANSICHAR> FoundShaderCode = FoundShader.GetUncompressedShader();
if (FoundShaderCode.Num() != GlslCodeFinal.Num()
|| FMemory::Memcmp(FoundShaderCode.GetData(), GlslCodeFinal.GetData(), FoundShaderCode.Num())
)
{
UE_LOG(LogRHI, Fatal, TEXT("SHADER CRC CLASH!"));
}
}
#endif
}
#if DEBUG_GL_SHADERS
if (GlslCodeFinal.Num() == 0)
{
GLSLToPlatform(Header, TypeEnum, GlslCodeOriginal, GlslCodeFinal);
check(GlslCodeFinal.Num());
}
GlslCode = MoveTemp(GlslCodeFinal);
GlslCodeString = GlslCode.GetData();
#endif
#if RHI_INCLUDE_SHADER_DEBUG_DATA
RHIShader->Debug.ShaderName = ShaderCode.FindOptionalData(FShaderCodeName::Key);
#endif
// The shader is compiled when we link program
}
FOpenGLVertexShader::FOpenGLVertexShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
: FOpenGLShader(Code, Hash, GL_VERTEX_SHADER, ShaderResourceTable, this)
{}
void FOpenGLVertexShader::ConditionalyCompile()
{
if (Resource == 0)
{
Compile(GL_VERTEX_SHADER);
}
}
FOpenGLPixelShader::FOpenGLPixelShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
: FOpenGLShader(Code, Hash, GL_FRAGMENT_SHADER, ShaderResourceTable, this)
{}
void FOpenGLPixelShader::ConditionalyCompile()
{
if (Resource == 0)
{
Compile(GL_FRAGMENT_SHADER);
}
}
FOpenGLGeometryShader::FOpenGLGeometryShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
: FOpenGLShader(Code, Hash, GL_GEOMETRY_SHADER, ShaderResourceTable, this)
{}
void FOpenGLGeometryShader::ConditionalyCompile()
{
if (Resource == 0)
{
Compile(GL_GEOMETRY_SHADER);
}
}
FOpenGLComputeShader::FOpenGLComputeShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
: FOpenGLShader(Code, Hash, GL_COMPUTE_SHADER, ShaderResourceTable, this)
{}
void FOpenGLComputeShader::ConditionalyCompile()
{
if (Resource == 0)
{
Compile(GL_COMPUTE_SHADER);
}
}
FVertexShaderRHIRef FOpenGLDynamicRHI::RHICreateVertexShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
{
return new FOpenGLVertexShader(Code, Hash);
}
FPixelShaderRHIRef FOpenGLDynamicRHI::RHICreatePixelShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
{
return new FOpenGLPixelShader(Code, Hash);
}
FGeometryShaderRHIRef FOpenGLDynamicRHI::RHICreateGeometryShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
{
return new FOpenGLGeometryShader(Code, Hash);
}
static void MarkShaderParameterCachesDirty(FOpenGLShaderParameterCache* ShaderParameters, bool UpdateCompute)
{
VERIFY_GL_SCOPE();
const int32 StageStart = UpdateCompute ? CrossCompiler::SHADER_STAGE_COMPUTE : CrossCompiler::SHADER_STAGE_VERTEX;
const int32 StageEnd = UpdateCompute ? CrossCompiler::NUM_SHADER_STAGES : CrossCompiler::NUM_NON_COMPUTE_SHADER_STAGES;
for (int32 Stage = StageStart; Stage < StageEnd; ++Stage)
{
ShaderParameters[Stage].MarkAllDirty();
}
}
void FOpenGLDynamicRHI::BindUniformBufferBase(int32 NumUniformBuffers, FRHIUniformBuffer** BoundUniformBuffers, uint32* DynamicOffsets, uint32 FirstUniformBuffer, bool ForceUpdate)
{
SCOPE_CYCLE_COUNTER_DETAILED(STAT_OpenGLUniformBindTime);
VERIFY_GL_SCOPE();
checkSlow(IsInRenderingThread() || IsInRHIThread());
for (int32 BufferIndex = 0; BufferIndex < NumUniformBuffers; ++BufferIndex)
{
GLuint Buffer = 0;
uint32 Offset = 0;
uint32 Size = ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE;
int32 BindIndex = FirstUniformBuffer + BufferIndex;
if (BoundUniformBuffers[BufferIndex])
{
FRHIUniformBuffer* UB = BoundUniformBuffers[BufferIndex];
FOpenGLUniformBuffer* GLUB = ((FOpenGLUniformBuffer*)UB);
Buffer = GLUB->Resource;
if (GLUB->bIsEmulatedUniformBuffer)
{
continue;
}
Size = GLUB->RangeSize;
Offset = GLUB->Offset + DynamicOffsets[BufferIndex];
// make sure range is within bounds of the buffer
ensure(GLUB->AllocatedSize >= (Offset + Size));
}
else
{
if (PendingState.ZeroFilledDummyUniformBuffer == 0)
{
void* ZeroBuffer = FMemory::Malloc(ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE);
FMemory::Memzero(ZeroBuffer,ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE);
FOpenGL::GenBuffers(1, &PendingState.ZeroFilledDummyUniformBuffer);
check(PendingState.ZeroFilledDummyUniformBuffer != 0);
CachedBindUniformBuffer(PendingState.ZeroFilledDummyUniformBuffer);
glBufferData(GL_UNIFORM_BUFFER, ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE, ZeroBuffer, GL_STATIC_DRAW);
FMemory::Free(ZeroBuffer);
OpenGLBufferStats::UpdateUniformBufferStats(ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE, true);
}
Buffer = PendingState.ZeroFilledDummyUniformBuffer;
}
if (ForceUpdate || (Buffer != 0 && ContextState.UniformBuffers[BindIndex] != Buffer) || ContextState.UniformBufferOffsets[BindIndex] != Offset)
{
FOpenGL::BindBufferRange(GL_UNIFORM_BUFFER, BindIndex, Buffer, Offset, Size);
ContextState.UniformBuffers[BindIndex] = Buffer;
ContextState.UniformBufferOffsets[BindIndex] = Offset;
ContextState.UniformBufferBound = Buffer; // yes, calling glBindBufferRange also changes uniform buffer binding.
}
}
}
// ============================================================================================================================
struct FOpenGLUniformName
{
ANSICHAR Buffer[10] {};
/**
* Helper for constructing strings of the form XXXXX##.
* @param Str - The string to build.
* @param Offset - Offset into the string at which to set the number.
* @param Index - Number to set. Must be in the range [0,100).
*/
ANSICHAR* SetIndex(int32 Offset, int32 Index)
{
ANSICHAR* Str = Buffer;
check(Index >= 0 && Index < 100);
Str += Offset;
if (Index >= 10)
{
*Str++ = '0' + (ANSICHAR)(Index / 10);
}
*Str++ = '0' + (ANSICHAR)(Index % 10);
*Str = '\0';
return Str;
}
};
// ============================================================================================================================
class FOpenGLLinkedProgramBase
{
public:
FOpenGLProgramKey const ProgramKey;
TBitArray<> TextureStageNeeds { false, FOpenGL::GetMaxCombinedTextureImageUnits() };
TBitArray<> UAVStageNeeds { false, FOpenGL::GetMaxCombinedUAVUnits() };
int32 MaxTextureStage = -1;
int32 MaxUAVUnitUsed = -1;
GLuint Program = 0;
bool bDrawn = false;
int32 GetProgramBinarySize() const
{
check(Program);
GLint BinaryLength = -1;
glGetProgramiv(Program, GL_PROGRAM_BINARY_LENGTH, &BinaryLength);
check(BinaryLength > 0);
return BinaryLength;
}
protected:
FOpenGLLinkedProgramBase(FOpenGLProgramKey const& ProgramKey, GLuint Program)
: ProgramKey(ProgramKey)
, Program(Program)
{}
};
class FOpenGLLinkedProgram final : public FOpenGLLinkedProgramBase
{
public:
struct FPackedUniformInfo
{
GLint Location;
uint8 ArrayType; // OGL_PACKED_ARRAYINDEX_TYPE
uint8 Index; // OGL_PACKED_INDEX_TYPE
};
struct FShaderStage
{
FOpenGLShaderBindings Bindings;
// Packed Uniform Arrays (regular globals); array elements per precision/type
TArray<FPackedUniformInfo> PackedUniformInfos;
// Packed Uniform Buffers; outer array is per Uniform Buffer; inner array is per precision/type
TArray<TArray<FPackedUniformInfo>> PackedUniformBufferInfos;
// Holds the unique ID of the last uniform buffer uploaded to the program; since we don't reuse uniform buffers
// (can't modify existing ones), we use this as a check for dirty/need to mem copy on Mobile
mutable TArray<uint32> LastEmulatedUniformBufferSet;
FShaderStage(FOpenGLLinkedProgramBase& ProgramBase, FOpenGLShader const& Shader, CrossCompiler::EShaderStage const Stage, uint32 const FirstUniformBuffer);
};
struct FGraphicsProgram
{
FShaderStage Vertex;
FShaderStage Pixel;
TOptional<FShaderStage> Geometry;
bool bUsesProgrammableBlending;
FGraphicsProgram(FOpenGLLinkedProgramBase& ProgramBase, FOpenGLVertexShader* VertexShader, FOpenGLPixelShader* PixelShader, FOpenGLGeometryShader* GeometryShader)
: Vertex(ProgramBase, *VertexShader, CrossCompiler::SHADER_STAGE_VERTEX, 0)
, Pixel(ProgramBase, *PixelShader, CrossCompiler::SHADER_STAGE_PIXEL, Vertex.Bindings.NumUniformBuffers)
, Geometry()
, bUsesProgrammableBlending(PixelShader->bUsesProgrammableBlending)
{
if (GeometryShader)
{
Geometry.Emplace(ProgramBase , *GeometryShader, CrossCompiler::SHADER_STAGE_GEOMETRY, Vertex.Bindings.NumUniformBuffers + Pixel.Bindings.NumUniformBuffers);
}
}
};
struct FComputeProgram
{
FShaderStage Compute;
FComputeProgram(FOpenGLLinkedProgramBase& ProgramBase, FOpenGLComputeShader* ComputeShader)
: Compute(ProgramBase, *ComputeShader, CrossCompiler::SHADER_STAGE_COMPUTE, 0)
{}
};
private:
TVariant<FEmptyVariantState
, FGraphicsProgram
, FComputeProgram
> Config;
public:
FGraphicsProgram const& GetGraphicsProgram() const { return Config.Get<FGraphicsProgram>(); }
FComputeProgram const& GetComputeProgram () const { return Config.Get<FComputeProgram >(); }
bool IsGraphics() const { return Config.IsType<FGraphicsProgram>(); }
bool IsCompute () const { return Config.IsType<FComputeProgram >(); }
// TODO: This should be stored within the lru.
class FLRUInfo
{
public:
// ID to LRU (if used) allows quick access when updating LRU status.
FSetElementId LRUNode;
// cached binary used to create this program.
TUniqueObj<FOpenGLProgramBinary> CachedProgramBinary;
void Touch() { LastTouchedFrame = GFrameNumber; }
uint32 LastTouchedFrame = 0;
} LRUInfo;
// Add a program without a valid config. (partially initialized)
FOpenGLLinkedProgram(const FOpenGLProgramKey& InProgramKey, GLuint InProgram);
FOpenGLLinkedProgram(FOpenGLVertexShader* VertexShader, FOpenGLPixelShader* PixelShader, FOpenGLGeometryShader* GeometryShader);
FOpenGLLinkedProgram(FOpenGLComputeShader* ComputeShader);
~FOpenGLLinkedProgram()
{
DeleteGLResources();
}
void DeleteGLResources()
{
VERIFY_GL_SCOPE();
Config.Emplace<FEmptyVariantState>();
if (Program != 0)
{
SetDeletedProgramStats(Program);
FOpenGL::DeleteProgramPipelines(1, &Program);
Program = 0;
}
}
template <typename TProgramType, typename... TArgs>
void UpdateShaders(TArgs&&... Args)
{
#if DO_CHECK
// The key of the provided RHI shaders should match the key this linked program was created with
FOpenGLProgramKey const LocalKey = { Forward<TArgs>(Args)... };
check(ProgramKey == LocalKey);
#endif
if (Config.IsType<FEmptyVariantState>())
{
// We now have the config for this program, we must configure the program for use.
ensure(VerifyLinkedProgram(Program));
FOpenGL::BindProgramPipeline(Program);
Config.Emplace<TProgramType>(*this, Forward<TArgs>(Args)...);
}
}
FShaderStage const& GetStage(CrossCompiler::EShaderStage Stage) const
{
switch (Stage)
{
default: checkNoEntry(); [[fallthrough]];
case CrossCompiler::EShaderStage::SHADER_STAGE_VERTEX : return GetGraphicsProgram().Vertex;
case CrossCompiler::EShaderStage::SHADER_STAGE_PIXEL : return GetGraphicsProgram().Pixel;
case CrossCompiler::EShaderStage::SHADER_STAGE_GEOMETRY: return *GetGraphicsProgram().Geometry;
case CrossCompiler::EShaderStage::SHADER_STAGE_COMPUTE : return GetComputeProgram ().Compute;
}
}
};
namespace UE
{
namespace OpenGL
{
static bool CreateGLProgramFromUncompressedBinary(GLuint& ProgramOUT, const TArrayView<const uint8>& ProgramBinary)
{
VERIFY_GL_SCOPE();
GLuint GLProgramName = 0;
FOpenGL::GenProgramPipelines(1, &GLProgramName);
int32 BinarySize = ProgramBinary.Num();
check(BinarySize);
const uint8* ProgramBinaryPtr = ProgramBinary.GetData();
// BinaryFormat is stored at the start of ProgramBinary array
FOpenGL::ProgramBinary(GLProgramName, ((GLenum*)ProgramBinaryPtr)[0], ProgramBinaryPtr + sizeof(GLenum), BinarySize - sizeof(GLenum));
// UE_LOG(LogRHI, Warning, TEXT("LRU: CreateFromBinary %d, binary format: %x, BinSize: %d"), GLProgramName, ((GLenum*)ProgramBinaryPtr)[0], BinarySize - sizeof(GLenum));
ProgramOUT = GLProgramName;
return VerifyLinkedProgram(GLProgramName);
}
static bool CreateGLProgramFromCompressedBinary(GLuint& ProgramOUT, const TArrayView<const uint8>& CompressedProgramBinary)
{
TArray<uint8> UncompressedProgramBinary;
bool bDecompressSuccess;
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_DecompressProgramBinary);
bDecompressSuccess = UE::OpenGL::UncompressCompressedBinaryProgram(CompressedProgramBinary, UncompressedProgramBinary);
}
if(bDecompressSuccess)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_CreateProgramFromBinary);
return CreateGLProgramFromUncompressedBinary(ProgramOUT, UncompressedProgramBinary);
}
return false;
}
bool CreateGLProgramFromBinary(GLuint& ProgramOUT, const TArrayView<const uint8>& ProgramBinary)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLCreateProgramFromBinaryTime)
bool bSuccess;
if (UE::OpenGL::IsStoringCompressedBinaryPrograms())
{
bSuccess = CreateGLProgramFromCompressedBinary(ProgramOUT, ProgramBinary);
}
else
{
bSuccess = CreateGLProgramFromUncompressedBinary(ProgramOUT, ProgramBinary);
}
if( bSuccess )
{
SetNewProgramStats(ProgramOUT);
}
return bSuccess;
}
}
}
class FGLProgramCacheLRU
{
class FEvictedGLProgram
{
FOpenGLLinkedProgram* LinkedProgram = nullptr;
// When evicted, the cached binary program is owned here.
TUniqueObj<FOpenGLProgramBinary> CachedProgramBinary {};
FORCEINLINE_DEBUGGABLE TArrayView<const uint8> GetProgramBinaryView() const
{
return CachedProgramBinary->GetDataView();
}
public:
FEvictedGLProgram() = default;
// Create an evicted program with the program binary provided.
FEvictedGLProgram(const FOpenGLProgramKey& ProgramKey, TUniqueObj<FOpenGLProgramBinary>&& ProgramBinaryIn)
{
check(ProgramBinaryIn->IsValid());
CachedProgramBinary = MoveTemp(ProgramBinaryIn);
}
FEvictedGLProgram(FOpenGLLinkedProgram* InLinkedProgram)
: LinkedProgram(InLinkedProgram)
{
// If the binary is not available then we must create it.
bool bCreateProgramBinary = LinkedProgram->LRUInfo.CachedProgramBinary->GetDataView().Num() == 0;
if( bCreateProgramBinary )
{
// build the binary:
CachedProgramBinary = TUniqueObj<FOpenGLProgramBinary>(UE::OpenGL::GetProgramBinaryFromGLProgram(LinkedProgram->Program));
}
else
{
// transfer ownership from the linked program obj.
CachedProgramBinary = MoveTemp(LinkedProgram->LRUInfo.CachedProgramBinary);
// The binary should only be available if we're keeping resident, or is mmapped
check(CVarLRUKeepProgramBinaryResident.GetValueOnAnyThread() == 1 || !CachedProgramBinary->IsOwned());
}
LinkedProgram->DeleteGLResources();
}
void RestoreGLProgramFromBinary(const FOpenGLProgramKey& ProgramKey)
{
if (LinkedProgram == nullptr)
{
LinkedProgram = new FOpenGLLinkedProgram(ProgramKey, 0);
}
check(LinkedProgram->Program == 0);
check(LinkedProgram->LRUInfo.CachedProgramBinary->GetDataView().Num() == 0); // ownership of the binary should have been transferred to us from the linked prog at eviction time.
bool bSuccess = UE::OpenGL::CreateGLProgramFromBinary(LinkedProgram->Program, GetProgramBinaryView());
if(bSuccess)
{
OGL_BINARYCACHE_STATS_MARKBINARYCACHEUSE(ProgramKey);
// Always keep any mmapped data resident.
if(CVarLRUKeepProgramBinaryResident.GetValueOnAnyThread() || !CachedProgramBinary->IsOwned())
{
// avoid destruction the program binary by passing the binary back to the linked program.
LinkedProgram->LRUInfo.CachedProgramBinary = MoveTemp(CachedProgramBinary);
}
}
else
{
uint32 ProgramCRC = FCrc::MemCrc32(GetProgramBinaryView().GetData(), GetProgramBinaryView().Num());
UE_LOG(LogRHI, Log, TEXT("[%s, %d, %d, crc 0x%X]"), *ProgramKey.ToString(), LinkedProgram->Program, GetProgramBinaryView().Num(), ProgramCRC );
// dump first 32 bytes..
if (GetProgramBinaryView().Num() >= 32)
{
const uint32* MemPtr = (const uint32*)GetProgramBinaryView().GetData();
for (int32 Dump = 0; Dump < 8; Dump++)
{
UE_LOG(LogRHI, Log, TEXT("[%d : 0x%08X]"), Dump, *MemPtr++);
}
}
RHIGetPanicDelegate().ExecuteIfBound(FName("FailedBinaryProgramCreate"));
UE_LOG(LogRHI, Fatal, TEXT("RestoreGLProgramFromBinary : Failed to restore GL program from binary data! [%s]"), *ProgramKey.ToString());
}
}
FOpenGLLinkedProgram* GetLinkedProgram() const
{
return LinkedProgram;
}
};
const int LRUCapacity = 2048;
int32 LRUBinaryMemoryUse;
// Find linked program within the evicted container.
// no attempt to promote to LRU or create the GL object is made.
FOpenGLLinkedProgram* FindEvicted(const FOpenGLProgramKey& ProgramKey)
{
FEvictedGLProgram* FoundEvicted = EvictedPrograms.Find(ProgramKey);
if (FoundEvicted)
{
FOpenGLLinkedProgram* LinkedProgram = FoundEvicted->GetLinkedProgram();
return LinkedProgram;
}
return nullptr;
}
FOpenGLLinkedProgram* FindEvictedAndUpdateLRU(const FOpenGLProgramKey& ProgramKey)
{
FOpenGLLinkedProgram* LinkedProgram;
{
// Missed LRU cache, check evicted cache and add back to LRU
FEvictedGLProgram FoundEvicted;
if (!EvictedPrograms.RemoveAndCopyValue(ProgramKey, FoundEvicted))
{
return nullptr;
}
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLRUMissTime);
INC_DWORD_STAT(STAT_OpenGLShaderLRUMissCount);
// UE_LOG(LogRHI, Warning, TEXT("LRU: found and recovered EVICTED program %s"), *ProgramKey.ToString());
FoundEvicted.RestoreGLProgramFromBinary(ProgramKey);
LinkedProgram = FoundEvicted.GetLinkedProgram();
}
// Add this back to the LRU
Add(ProgramKey, LinkedProgram);
DEC_DWORD_STAT(STAT_OpenGLShaderLRUEvictedProgramCount);
return LinkedProgram;
}
void EvictFromLRU(FOpenGLLinkedProgram* LinkedProgram)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLRUEvictTime);
LinkedProgram->LRUInfo.LRUNode = FSetElementId();
DEC_DWORD_STAT(STAT_OpenGLShaderLRUProgramCount);
LRUBinaryMemoryUse -= LinkedProgram->GetProgramBinarySize();
checkf(!EvictedPrograms.Contains(LinkedProgram->ProgramKey), TEXT("Program is already in the evicted program list: %s"), *LinkedProgram->ProgramKey.ToString());
//UE_LOG(LogRHI, Warning, TEXT("LRU: Evicting program %d"), LinkedProgram->Program);
FEvictedGLProgram& test = EvictedPrograms.Emplace(LinkedProgram->ProgramKey, LinkedProgram);
INC_DWORD_STAT(STAT_OpenGLShaderLRUEvictedProgramCount);
}
public:
bool IsEvicted(const FOpenGLProgramKey& ProgramKey)
{
return EvictedPrograms.Contains(ProgramKey);
}
void EvictLeastRecentFromLRU()
{
EvictFromLRU(LRU.RemoveLeastRecent());
}
void EvictMostRecentFromLRU()
{
EvictFromLRU(LRU.RemoveMostRecent());
}
void EvictProgramFromLRU(const FOpenGLProgramKey& ProgramKey)
{
FOpenGLLinkedProgram* RemovedLinkedProgram = nullptr;
if(LRU.Remove(ProgramKey, RemovedLinkedProgram))
{
EvictFromLRU(RemovedLinkedProgram);
}
}
void EvictLeastRecentByPredicate(TUniqueFunction<bool(FOpenGLLinkedProgram*)> Pred)
{
while (Pred(LRU.GetLeastRecent()))
{
EvictLeastRecentFromLRU();
}
}
FGLProgramCacheLRU()
: LRUBinaryMemoryUse(0)
, LRU(LRUCapacity)
{
EvictedPrograms.Reserve(10000); // |TODO: establish a reasonable default.
}
bool IsLRUAtCapacity() const
{
return LRU.Num() == CVarLRUMaxProgramCount.GetValueOnAnyThread() || LRU.Num() == LRU.Max() || LRUBinaryMemoryUse > CVarLRUMaxProgramBinarySize.GetValueOnAnyThread();
}
// returns the number of programs currently resident.
int32 GetLRUSize() const
{
return LRU.Num();
}
void Add(const FOpenGLProgramKey& ProgramKey, FOpenGLLinkedProgram* LinkedProgram)
{
// Remove least recently used programs until we reach our limit.
// note that a single large shader could evict multiple smaller shaders.
checkf(!LRU.Contains(ProgramKey), TEXT("Program is already in the LRU program list: %s"), *ProgramKey.ToString());
checkf(!IsEvicted(ProgramKey), TEXT("Program is already in the evicted program list: %s"), *ProgramKey.ToString());
// UE_LOG(LogRHI, Warning, TEXT("LRU: adding program %s (%d)"), *ProgramKey.ToString(), LinkedProgram->Program);
while (IsLRUAtCapacity())
{
EvictLeastRecentFromLRU();
}
LinkedProgram->LRUInfo.LRUNode = LRU.Add(ProgramKey, LinkedProgram);
LinkedProgram->LRUInfo.Touch();
LRUBinaryMemoryUse += LinkedProgram->GetProgramBinarySize();
INC_DWORD_STAT(STAT_OpenGLShaderLRUProgramCount);
}
void AddOrReplaceEvicted(const FOpenGLProgramKey& ProgramKey, TUniqueObj<FOpenGLProgramBinary>&& ProgramBinaryData)
{
checkf(!LRU.Contains(ProgramKey), TEXT("Program is already in the LRU program list: %s"), *ProgramKey.ToString());
// TODO: if we're replacing then check we dont replace a mmapped binary with a non-mmapped binary?
FEvictedGLProgram& test = EvictedPrograms.Emplace(ProgramKey, FEvictedGLProgram(ProgramKey, MoveTemp(ProgramBinaryData)));
// UE_LOG(LogRHI, Warning, TEXT("LRU: adding EVICTED program %s"), *ProgramKey.ToString());
INC_DWORD_STAT(STAT_OpenGLShaderLRUEvictedProgramCount);
}
FOpenGLLinkedProgram* Find(const FOpenGLProgramKey& ProgramKey, bool bFindAndCreateEvictedProgram)
{
// if it's in LRU pop to top.
FOpenGLLinkedProgram *const * Found = LRU.FindAndTouch(ProgramKey);
if (Found)
{
check((*Found)->LRUInfo.LRUNode.IsValidId());
//UE_LOG(LogRHI, Warning, TEXT("LRU: ::Find program %d exists in LRU!"), (*Found)->Program);
return *Found;
}
if( bFindAndCreateEvictedProgram )
{
return FindEvictedAndUpdateLRU(ProgramKey);
}
else
{
return FindEvicted(ProgramKey);
}
}
bool Contains(const FOpenGLProgramKey& ProgramKey) const
{
return LRU.Contains(ProgramKey) || EvictedPrograms.Contains(ProgramKey);
}
FORCEINLINE_DEBUGGABLE void Touch(FOpenGLLinkedProgram* LinkedProgram)
{
if(LinkedProgram->LRUInfo.LRUNode.IsValidId())
{
LRU.MarkAsRecent(LinkedProgram->LRUInfo.LRUNode);
}
else
{
// This must find the program.
ensure(FindEvictedAndUpdateLRU(LinkedProgram->ProgramKey));
}
LinkedProgram->LRUInfo.Touch();
}
void Empty()
{
// delete all FOpenGLLinkedPrograms from evicted container
for (auto const& Pair : EvictedPrograms)
{
FOpenGLLinkedProgram* LinkedProgram = Pair.Value.GetLinkedProgram();
delete LinkedProgram;
}
EvictedPrograms.Empty();
// delete all FOpenGLLinkedPrograms from LRU
for (FOpenGLLinkedProgram* Value : LRU)
{
delete Value;
}
LRU.Empty(LRUCapacity);
}
TPsoLruCache<FOpenGLProgramKey, FOpenGLLinkedProgram*> LRU;
TMap<FOpenGLProgramKey, FEvictedGLProgram> EvictedPrograms;
};
// FGLProgramCache is a K/V store that holds on to all FOpenGLLinkedProgram created.
// It is implemented by either a TMap or an LRU cache that will limit the number of active GL programs at any one time.
// (LRU is used only to work around the mali driver's maximum shader heap size.)
class FGLProgramCache
{
FGLProgramCacheLRU ProgramCacheLRU;
TMap<FOpenGLProgramKey, FOpenGLLinkedProgram*> ProgramCache;
inline static uint32 UseLRUCacheStatus = -1;
public:
static bool IsUsingLRU()
{
if (UseLRUCacheStatus == -1)
{
if (CVarEnableLRU.GetValueOnAnyThread() && !FOpenGL::SupportsProgramBinary())
{
UE_LOG(LogRHI, Warning, TEXT("Requesting OpenGL program LRU cache, but program binary is not supported by driver. Falling back to non-lru cache."));
}
UseLRUCacheStatus = CVarEnableLRU.GetValueOnAnyThread() == 1 && FOpenGLProgramBinaryCache::IsEnabled();
UE_LOG(LogRHI, Log, TEXT("OpenGL program LRU cache active = %d (%d, %d)"), UseLRUCacheStatus, CVarEnableLRU.GetValueOnAnyThread(), FOpenGLProgramBinaryCache::IsEnabled());
}
check(UseLRUCacheStatus != -1);
return UseLRUCacheStatus == 1;
}
void Touch(FOpenGLLinkedProgram* LinkedProgram)
{
if (IsUsingLRU())
{
ProgramCacheLRU.Touch(LinkedProgram);
}
}
FOpenGLLinkedProgram* Find(const FOpenGLProgramKey& ProgramKey, bool bFindAndCreateEvictedProgram)
{
if (IsUsingLRU())
{
return ProgramCacheLRU.Find(ProgramKey, bFindAndCreateEvictedProgram);
}
else
{
FOpenGLLinkedProgram** FoundProgram = ProgramCache.Find(ProgramKey);
return FoundProgram ? *FoundProgram : nullptr;
}
}
bool Contains(const FOpenGLProgramKey& ProgramKey) const
{
if (IsUsingLRU())
{
return ProgramCacheLRU.Contains(ProgramKey);
}
else
{
return ProgramCache.Contains(ProgramKey);
}
}
void Add(const FOpenGLProgramKey& ProgramKey, FOpenGLLinkedProgram* LinkedProgram)
{
if (IsUsingLRU())
{
ProgramCacheLRU.Add(ProgramKey, LinkedProgram);
}
else
{
check(!ProgramCache.Contains(ProgramKey));
ProgramCache.Add(ProgramKey, LinkedProgram);
}
}
void Empty()
{
if (IsUsingLRU())
{
ProgramCacheLRU.Empty();
}
else
{
// delete all FOpenGLLinkedPrograms from ProgramCache
for (auto& Pair : ProgramCache)
{
delete Pair.Value;
}
ProgramCache.Empty();
}
}
bool IsLRUAtCapacity() const
{
if (IsUsingLRU())
{
ProgramCacheLRU.IsLRUAtCapacity();
}
return false;
}
void EvictMostRecent()
{
check(IsUsingLRU());
if( ProgramCacheLRU.LRU.Num() )
{
ProgramCacheLRU.EvictMostRecentFromLRU();
}
}
void EvictLeastRecentByPredicate(TUniqueFunction<bool(FOpenGLLinkedProgram*)> Pred)
{
check(IsUsingLRU());
if (ProgramCacheLRU.LRU.Num())
{
ProgramCacheLRU.EvictLeastRecentByPredicate(MoveTemp(Pred));
}
}
void EvictProgram(const FOpenGLProgramKey& ProgramKey)
{
check(IsUsingLRU());
ProgramCacheLRU.EvictProgramFromLRU(ProgramKey);
}
void AddEvicted(const FOpenGLProgramKey& ProgramKey, TUniqueObj<FOpenGLProgramBinary>&& ProgramBinary)
{
check(IsUsingLRU());
check(!Contains(ProgramKey));
AddOrReplaceEvicted(ProgramKey, MoveTemp(ProgramBinary));
}
void AddOrReplaceEvicted(const FOpenGLProgramKey& ProgramKey, TUniqueObj<FOpenGLProgramBinary>&& ProgramBinary)
{
check(IsUsingLRU());
ProgramCacheLRU.AddOrReplaceEvicted(ProgramKey, MoveTemp(ProgramBinary));
}
bool IsEvicted(const FOpenGLProgramKey& ProgramKey)
{
check(IsUsingLRU());
return ProgramCacheLRU.IsEvicted(ProgramKey);
}
int32 GetLRUSize() const
{
check(IsUsingLRU());
return ProgramCacheLRU.GetLRUSize();
}
};
static FGLProgramCache& GetOpenGLProgramsCache()
{
check(IsInRenderingThread() || IsInRHIThread());
static FGLProgramCache ProgramsCache;
return ProgramsCache;
}
// This short queue preceding released programs cache is here because usually the programs are requested again
// very shortly after they're released, so looking through recently released programs first provides tangible
// performance improvement.
#define LAST_RELEASED_PROGRAMS_CACHE_COUNT 10
static FOpenGLLinkedProgram* StaticLastReleasedPrograms[LAST_RELEASED_PROGRAMS_CACHE_COUNT] {};
static int32 StaticLastReleasedProgramsIndex = 0;
// ============================================================================================================================
FOpenGLLinkedProgram::FShaderStage::FShaderStage(FOpenGLLinkedProgramBase& ProgramBase, FOpenGLShader const& Shader, CrossCompiler::EShaderStage const Stage, uint32 const FirstUniformBuffer)
: Bindings(Shader.Bindings)
{
static const GLint FirstTextureUnit[CrossCompiler::NUM_SHADER_STAGES] =
{
FOpenGL::GetFirstVertexTextureUnit(),
FOpenGL::GetFirstPixelTextureUnit(),
FOpenGL::GetFirstGeometryTextureUnit(),
0,
0,
FOpenGL::GetFirstComputeTextureUnit()
};
static const GLint MaxTextureUnit[CrossCompiler::NUM_SHADER_STAGES] =
{
FOpenGL::GetMaxVertexTextureImageUnits(),
FOpenGL::GetMaxTextureImageUnits(),
FOpenGL::GetMaxGeometryTextureImageUnits(),
0,
0,
FOpenGL::GetMaxComputeTextureImageUnits()
};
static const GLint FirstUAVUnit[CrossCompiler::NUM_SHADER_STAGES] =
{
FOpenGL::GetFirstVertexUAVUnit(),
FOpenGL::GetFirstPixelUAVUnit(),
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
FOpenGL::GetFirstComputeUAVUnit()
};
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderBindParameterTime);
VERIFY_GL_SCOPE();
FOpenGLUniformName Name;
Name.Buffer[0] = CrossCompiler::ShaderStageIndexToTypeName(Stage);
// Bind Global uniform arrays (vu_h, pu_i, etc)
{
Name.Buffer[1] = 'u';
Name.Buffer[2] = '_';
Name.Buffer[3] = 0;
Name.Buffer[4] = 0;
TArray<FPackedUniformInfo, TInlineAllocator<CrossCompiler::PACKED_TYPEINDEX_MAX>> LocalPackedUniformInfos;
for (uint8 Index = 0; Index < CrossCompiler::PACKED_TYPEINDEX_MAX; ++Index)
{
uint8 ArrayIndexType = CrossCompiler::PackedTypeIndexToTypeName(Index);
Name.Buffer[3] = ArrayIndexType;
GLint Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer);
if ((int32)Location != -1)
{
LocalPackedUniformInfos.Add({ Location, ArrayIndexType, Index });
}
}
PackedUniformInfos.Empty(Bindings.PackedGlobalArrays.Num());
for (auto const& PackedArray : Bindings.PackedGlobalArrays)
{
FPackedUniformInfo OutInfo = { -1, PackedArray.TypeName, CrossCompiler::PACKED_TYPEINDEX_MAX };
// Find this Global Array in the reflection list
for (auto const& ReflectedInfo : LocalPackedUniformInfos)
{
if (ReflectedInfo.ArrayType == PackedArray.TypeName)
{
OutInfo = ReflectedInfo;
break;
}
}
PackedUniformInfos.Add(OutInfo);
}
}
// Bind uniform buffer packed arrays (vc0_h, pc2_i, etc)
{
Name.Buffer[1] = 'c';
Name.Buffer[2] = 0;
Name.Buffer[3] = 0;
Name.Buffer[4] = 0;
Name.Buffer[5] = 0;
Name.Buffer[6] = 0;
PackedUniformBufferInfos.SetNum(Bindings.NumUniformBuffers);
check(Bindings.PackedUniformBuffers.Num() <= Bindings.NumUniformBuffers);
for (int32 UB = 0; UB < Bindings.PackedUniformBuffers.Num(); ++UB)
{
const TArray<CrossCompiler::FPackedArrayInfo>& PackedInfo = Bindings.PackedUniformBuffers[UB];
TArray<FPackedUniformInfo>& PackedBuffers = PackedUniformBufferInfos[UB];
ANSICHAR* Str = Name.SetIndex(2, UB);
*Str++ = '_';
Str[1] = 0;
for (auto const& Info : PackedInfo)
{
Str[0] = Info.TypeName;
GLint Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer); // This could be -1 if optimized out
PackedBuffers.Add({ Location, Info.TypeName, Info.TypeIndex });
}
}
}
// Reserve and setup Space for Emulated Uniform Buffers
LastEmulatedUniformBufferSet.Empty(Bindings.NumUniformBuffers);
LastEmulatedUniformBufferSet.AddZeroed(Bindings.NumUniformBuffers);
// Bind samplers.
Name.Buffer[1] = 's';
Name.Buffer[2] = 0;
Name.Buffer[3] = 0;
Name.Buffer[4] = 0;
int32 LastFoundIndex = -1;
for (int32 SamplerIndex = 0; SamplerIndex < Bindings.NumSamplers; ++SamplerIndex)
{
Name.SetIndex(2, SamplerIndex);
GLint Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer);
if (Location == -1)
{
if (LastFoundIndex != -1)
{
// It may be an array of samplers. Get the initial element location, if available, and count from it.
Name.SetIndex(2, LastFoundIndex);
int32 OffsetOfArraySpecifier = (LastFoundIndex > 9) ? 4 : 3;
int32 ArrayIndex = SamplerIndex - LastFoundIndex;
Name.Buffer[OffsetOfArraySpecifier] = '[';
ANSICHAR* EndBracket = Name.SetIndex(OffsetOfArraySpecifier + 1, ArrayIndex);
*EndBracket++ = ']';
*EndBracket = 0;
Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer);
}
}
else
{
LastFoundIndex = SamplerIndex;
}
if (Location != -1)
{
FOpenGL::ProgramUniform1i(ProgramBase.Program, Location, FirstTextureUnit[Stage] + SamplerIndex);
ProgramBase.TextureStageNeeds[ FirstTextureUnit[Stage] + SamplerIndex ] = true;
ProgramBase.MaxTextureStage = FMath::Max(ProgramBase.MaxTextureStage, FirstTextureUnit[Stage] + SamplerIndex);
if (SamplerIndex >= MaxTextureUnit[Stage])
{
UE_LOG(LogShaders, Error, TEXT("%s has a shader using too many textures (idx %d, max allowed %d) at stage %d"), *ProgramBase.ProgramKey.ToString(), SamplerIndex, MaxTextureUnit[Stage] - 1, Stage);
checkNoEntry();
}
}
}
// Bind UAVs/images.
Name.Buffer[1] = 'i';
Name.Buffer[2] = 0;
Name.Buffer[3] = 0;
Name.Buffer[4] = 0;
int32 LastFoundUAVIndex = -1;
for (int32 UAVIndex = 0; UAVIndex < Bindings.NumUAVs; ++UAVIndex)
{
ANSICHAR* Str = Name.SetIndex(2, UAVIndex);
GLint Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer);
if (Location == -1)
{
// SSBO
Str[0] = '_';
Str[1] = 'V';
Str[2] = 'A';
Str[3] = 'R';
Str[4] = '\0';
Location = glGetProgramResourceIndex(ProgramBase.Program, GL_SHADER_STORAGE_BLOCK, Name.Buffer);
}
if (Location == -1)
{
if (LastFoundUAVIndex != -1)
{
// It may be an array of UAVs. Get the initial element location, if available, and count from it.
Name.SetIndex(2, LastFoundUAVIndex);
int32 OffsetOfArraySpecifier = (LastFoundUAVIndex > 9) ? 4 : 3;
int32 ArrayIndex = UAVIndex-LastFoundUAVIndex;
Name.Buffer[OffsetOfArraySpecifier] = '[';
ANSICHAR* EndBracket = Name.SetIndex(OffsetOfArraySpecifier + 1, ArrayIndex);
*EndBracket++ = ']';
*EndBracket = '\0';
Location = glGetUniformLocation(ProgramBase.Program, Name.Buffer);
}
}
else
{
LastFoundUAVIndex = UAVIndex;
}
if (Location != -1)
{
// compute shaders have layout(binding) for images
// glUniform1i(Location, FirstUAVUnit[Stage] + UAVIndex);
// verify that only CS and PS uses UAVs (limitation on MALI GPUs)
checkf(Stage == CrossCompiler::SHADER_STAGE_COMPUTE || Stage == CrossCompiler::SHADER_STAGE_PIXEL, TEXT("%s uses UAV in vertex shader"), *ProgramBase.ProgramKey.ToString());
ProgramBase.UAVStageNeeds[ FirstUAVUnit[Stage] + UAVIndex ] = true;
ProgramBase.MaxUAVUnitUsed = FMath::Max(ProgramBase.MaxUAVUnitUsed, FirstUAVUnit[Stage] + UAVIndex);
}
}
// Bind uniform buffers.
if (FOpenGL::SupportsUniformBuffers())
{
Name.Buffer[1] = 'b';
Name.Buffer[2] = 0;
Name.Buffer[3] = 0;
Name.Buffer[4] = 0;
for (int32 BufferIndex = 0; BufferIndex < Bindings.NumUniformBuffers; ++BufferIndex)
{
Name.SetIndex(2, BufferIndex);
GLint Location = FOpenGL::GetUniformBlockIndex(ProgramBase.Program, Name.Buffer);
if (Location >= 0)
{
FOpenGL::UniformBlockBinding(ProgramBase.Program, Location, FirstUniformBuffer + BufferIndex);
}
}
}
}
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
#define ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097 1
/*
As of CL 1862097 uniform buffer names are mangled to avoid collisions between variables referenced
in different shaders of the same program
layout(std140) uniform _vb0
{
#define View View_vb0
anon_struct_0000 View;
};
layout(std140) uniform _vb1
{
#define Primitive Primitive_vb1
anon_struct_0001 Primitive;
};
*/
struct UniformData
{
UniformData(uint32 InOffset, uint32 InArrayElements)
: Offset(InOffset)
, ArrayElements(InArrayElements)
{
}
uint32 Offset;
uint32 ArrayElements;
bool operator == (const UniformData& RHS) const
{
return Offset == RHS.Offset && ArrayElements == RHS.ArrayElements;
}
bool operator != (const UniformData& RHS) const
{
return !(*this == RHS);
}
};
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
static void VerifyUniformLayout(const FString& BlockName, const TCHAR* UniformName, const UniformData& GLSLUniform)
#else
static void VerifyUniformLayout(const TCHAR* UniformName, const UniformData& GLSLUniform)
#endif //#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
{
static TMap<FString, UniformData> Uniforms;
if(!Uniforms.Num())
{
for (TLinkedList<FShaderParametersMetadata*>::TIterator StructIt(FShaderParametersMetadata::GetStructList()); StructIt; StructIt.Next())
{
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
UE_LOG(LogRHI, Log, TEXT("UniformBufferStruct %s %s %d"),
StructIt->GetStructTypeName(),
StructIt->GetShaderVariableName(),
StructIt->GetSize()
);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
const TArray<FShaderParametersMetadata::FMember>& StructMembers = StructIt->GetMembers();
for(int32 MemberIndex = 0;MemberIndex < StructMembers.Num();++MemberIndex)
{
const FShaderParametersMetadata::FMember& Member = StructMembers[MemberIndex];
FString BaseTypeName;
switch(Member.GetBaseType())
{
case UBMT_NESTED_STRUCT: BaseTypeName = TEXT("struct"); break;
case UBMT_INT32: BaseTypeName = TEXT("int"); break;
case UBMT_UINT32: BaseTypeName = TEXT("uint"); break;
case UBMT_FLOAT32: BaseTypeName = TEXT("float"); break;
case UBMT_TEXTURE: BaseTypeName = TEXT("texture"); break;
case UBMT_SAMPLER: BaseTypeName = TEXT("sampler"); break;
default: UE_LOG(LogShaders, Fatal,TEXT("Unrecognized uniform buffer struct member base type."));
};
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
UE_LOG(LogRHI, Log, TEXT(" +%d %s%dx%d %s[%d]"),
Member.GetOffset(),
*BaseTypeName,
Member.GetNumRows(),
Member.GetNumColumns(),
Member.GetName(),
Member.GetNumElements()
);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
FString CompositeName = FString(StructIt->GetShaderVariableName()) + TEXT("_") + Member.GetName();
// GLSL returns array members with a "[0]" suffix
if(Member.GetNumElements())
{
CompositeName += TEXT("[0]");
}
check(!Uniforms.Contains(CompositeName));
Uniforms.Add(CompositeName, UniformData(Member.GetOffset(), Member.GetNumElements()));
}
}
}
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
/* unmangle the uniform name by stripping the block name from it
layout(std140) uniform _vb0
{
#define View View_vb0
anon_struct_0000 View;
};
*/
FString RequestedUniformName(UniformName);
RequestedUniformName = RequestedUniformName.Replace(*BlockName, TEXT(""));
if(RequestedUniformName.StartsWith(TEXT("."), ESearchCase::CaseSensitive))
{
RequestedUniformName.RightChopInline(1, false);
}
#else
FString RequestedUniformName = UniformName;
#endif
const UniformData* FoundUniform = Uniforms.Find(RequestedUniformName);
// MaterialTemplate uniform buffer does not have an entry in the FShaderParametersMetadatas list, so skipping it here
if(!(RequestedUniformName.StartsWith("Material_") || RequestedUniformName.StartsWith("MaterialCollection")))
{
if(!FoundUniform || (*FoundUniform != GLSLUniform))
{
UE_LOG(LogRHI, Fatal, TEXT("uniform buffer member %s in the GLSL source doesn't match it's declaration in it's FShaderParametersMetadata"), *RequestedUniformName);
}
}
}
static void VerifyUniformBufferLayouts(GLuint Program)
{
GLint NumBlocks = 0;
glGetProgramiv(Program, GL_ACTIVE_UNIFORM_BLOCKS, &NumBlocks);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
UE_LOG(LogRHI, Log, TEXT("program %d has %d uniform blocks"), Program, NumBlocks);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
for(GLint BlockIndex = 0; BlockIndex < NumBlocks; ++BlockIndex)
{
const GLsizei BufferSize = 256;
char Buffer[BufferSize] = {0};
GLsizei Length = 0;
GLint ActiveUniforms = 0;
GLint BlockBytes = 0;
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ActiveUniforms);
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &BlockBytes);
glGetActiveUniformBlockName(Program, BlockIndex, BufferSize, &Length, Buffer);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
FString BlockName(Buffer);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
FString ReferencedBy;
{
GLint ReferencedByVS = 0;
GLint ReferencedByPS = 0;
GLint ReferencedByGS = 0;
GLint ReferencedByHS = 0;
GLint ReferencedByDS = 0;
GLint ReferencedByCS = 0;
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER, &ReferencedByVS);
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER, &ReferencedByPS);
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER, &ReferencedByGS);
#endif
if (GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5)
{
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER, &ReferencedByHS);
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER, &ReferencedByDS);
#endif
}
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER, &ReferencedByCS);
#endif
if(ReferencedByVS) {ReferencedBy += TEXT("V");}
if(ReferencedByHS) {ReferencedBy += TEXT("H");}
if(ReferencedByDS) {ReferencedBy += TEXT("D");}
if(ReferencedByGS) {ReferencedBy += TEXT("G");}
if(ReferencedByPS) {ReferencedBy += TEXT("P");}
if(ReferencedByCS) {ReferencedBy += TEXT("C");}
}
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
UE_LOG(LogRHI, Log, TEXT(" [%d] uniform block (%s) = %s, %d active uniforms, %d bytes {"),
BlockIndex,
*ReferencedBy,
ANSI_TO_TCHAR(Buffer),
ActiveUniforms,
BlockBytes
);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
if(ActiveUniforms)
{
// the other TArrays copy construct this to get the proper array size
TArray<GLint> ActiveUniformIndices;
ActiveUniformIndices.Init(ActiveUniforms);
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES, ActiveUniformIndices.GetData());
TArray<GLint> ActiveUniformOffsets(ActiveUniformIndices);
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_OFFSET, ActiveUniformOffsets.GetData());
TArray<GLint> ActiveUniformSizes(ActiveUniformIndices);
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_SIZE, ActiveUniformSizes.GetData());
TArray<GLint> ActiveUniformTypes(ActiveUniformIndices);
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_TYPE, ActiveUniformTypes.GetData());
TArray<GLint> ActiveUniformArrayStrides(ActiveUniformIndices);
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_ARRAY_STRIDE, ActiveUniformArrayStrides.GetData());
extern const TCHAR* GetGLUniformTypeString( GLint UniformType );
for(GLint i = 0; i < ActiveUniformIndices.Num(); ++i)
{
const GLint UniformIndex = ActiveUniformIndices[i];
GLsizei Size = 0;
GLenum Type = 0;
glGetActiveUniform(Program, UniformIndex , BufferSize, &Length, &Size, &Type, Buffer);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
UE_LOG(LogRHI, Log, TEXT(" [%d] +%d %s %s %d elements %d array stride"),
UniformIndex,
ActiveUniformOffsets[i],
GetGLUniformTypeString(ActiveUniformTypes[i]),
ANSI_TO_TCHAR(Buffer),
ActiveUniformSizes[i],
ActiveUniformArrayStrides[i]
);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
const UniformData GLSLUniform
(
ActiveUniformOffsets[i],
ActiveUniformArrayStrides[i] > 0 ? ActiveUniformSizes[i] : 0 // GLSL has 1 as array size for non-array uniforms, but FShaderParametersMetadata assumes 0
);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
VerifyUniformLayout(BlockName, ANSI_TO_TCHAR(Buffer), GLSLUniform);
#else
VerifyUniformLayout(ANSI_TO_TCHAR(Buffer), GLSLUniform);
#endif
}
}
}
}
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
#define PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257
FOpenGLLinkedProgram::FOpenGLLinkedProgram(const FOpenGLProgramKey& InProgramKey, GLuint InProgram)
: FOpenGLLinkedProgramBase(InProgramKey, InProgram)
{}
FOpenGLLinkedProgram::FOpenGLLinkedProgram(FOpenGLVertexShader* VertexShader, FOpenGLPixelShader* PixelShader, FOpenGLGeometryShader* GeometryShader)
: FOpenGLLinkedProgramBase(FOpenGLProgramKey(VertexShader, PixelShader, GeometryShader), 0)
{
VERIFY_GL_SCOPE();
OGL_BINARYCACHE_STATS_MARKBINARYCACHEMISS(ProgramKey, true);
// Link vertex and pixel shaders in to an OpenGL program.
VertexShader->ConditionalyCompile();
PixelShader->ConditionalyCompile();
if (GeometryShader)
{
GeometryShader->ConditionalyCompile();
}
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLinkTime);
FOpenGL::GenProgramPipelines(1, &Program);
check(VertexShader->Resource);
FOpenGL::UseProgramStages(Program, GL_VERTEX_SHADER_BIT, VertexShader->Resource);
check(PixelShader->Resource);
FOpenGL::UseProgramStages(Program, GL_FRAGMENT_SHADER_BIT, PixelShader->Resource);
if (GeometryShader)
{
check(GeometryShader->Resource);
FOpenGL::UseProgramStages(Program, GL_GEOMETRY_SHADER_BIT, GeometryShader->Resource);
}
if (FOpenGLProgramBinaryCache::IsEnabled() || FGLProgramCache::IsUsingLRU())
{
FOpenGL::ProgramParameter(Program, PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
}
// Link.
glLinkProgram(Program);
if (!VerifyLinkedProgram(Program))
{
#if DEBUG_GL_SHADERS
//if (VertexShader)
{
UE_LOG(LogRHI, Error, TEXT("Vertex Shader:\n%s"), ANSI_TO_TCHAR(VertexShader->GlslCode.GetData()));
}
//if (PixelShader)
{
UE_LOG(LogRHI, Error, TEXT("Pixel Shader:\n%s"), ANSI_TO_TCHAR(PixelShader->GlslCode.GetData()));
}
if (GeometryShader)
{
UE_LOG(LogRHI, Error, TEXT("Geometry Shader:\n%s"), ANSI_TO_TCHAR(GeometryShader->GlslCode.GetData()));
}
#endif //DEBUG_GL_SHADERS
RHIGetPanicDelegate().ExecuteIfBound(FName("FailedProgramLink"));
UE_LOG(LogRHI, Fatal, TEXT("Failed to link graphics program [%s]. Current total programs: %d"), *ProgramKey.ToString(), GNumPrograms);
}
SetNewProgramStats(Program);
FOpenGL::BindProgramPipeline(Program);
Config.Emplace<FGraphicsProgram>(*this, VertexShader, PixelShader, GeometryShader);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
VerifyUniformBufferLayouts(Program);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
// Link program, using the data provided in config
if (ShouldCacheAllProgramBinaries() && FOpenGLProgramBinaryCache::RequiresCaching(ProgramKey))
{
// In precache mode we can put any newly compiled programs in the binary cache
FOpenGLProgramBinary CompiledProgram = UE::OpenGL::GetProgramBinaryFromGLProgram(Program);
FOpenGLProgramBinaryCache::CacheProgramBinary(ProgramKey, TUniqueObj<FOpenGLProgramBinary>(MoveTemp(CompiledProgram)));
}
GetOpenGLProgramsCache().Add(ProgramKey, this);
}
FOpenGLLinkedProgram::FOpenGLLinkedProgram(FOpenGLComputeShader* ComputeShader)
: FOpenGLLinkedProgramBase(FOpenGLProgramKey(ComputeShader), 0)
{
check(!ComputeShader->LinkedProgram);
ComputeShader->LinkedProgram = this;
// Not in the cache. Create and add the program here.
// We can now link the compute shader, by now the shader hash has been set.
ComputeShader->ConditionalyCompile();
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLinkTime);
FOpenGL::GenProgramPipelines(1, &Program);
check(ComputeShader->Resource);
FOpenGL::UseProgramStages(Program, GL_COMPUTE_SHADER_BIT, ComputeShader->Resource);
if (FOpenGLProgramBinaryCache::IsEnabled() || FGLProgramCache::IsUsingLRU())
{
FOpenGL::ProgramParameter(Program, PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
}
// Link.
glLinkProgram(Program);
if (!VerifyLinkedProgram(Program))
{
#if DEBUG_GL_SHADERS
UE_LOG(LogRHI, Error, TEXT("Compute Shader:\n%s"), ANSI_TO_TCHAR(ComputeShader->GlslCode.GetData()));
#endif //DEBUG_GL_SHADERS
checkf(false, TEXT("Compute shader failed to compile & link."));
FName LinkFailurePanic = FName("FailedComputeProgramLink");
RHIGetPanicDelegate().ExecuteIfBound(LinkFailurePanic);
UE_LOG(LogRHI, Fatal, TEXT("Failed to link compute program [%s]. Current total programs: %d"), *ProgramKey.ToString(), GNumPrograms);
}
SetNewProgramStats(Program);
FOpenGL::BindProgramPipeline(Program);
Config.Emplace<FComputeProgram>(*this, ComputeShader);
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
VerifyUniformBufferLayouts(Program);
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
GetOpenGLProgramsCache().Add(ProgramKey, this);
}
void FOpenGLDynamicRHI::LinkComputeProgram(FRHIComputeShader* ComputeShaderRHI)
{
FOpenGLComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
if (ComputeShader->LinkedProgram)
{
return;
}
VERIFY_GL_SCOPE();
check(ComputeShaderRHI->GetHash() != FSHAHash());
FOpenGLProgramKey const ProgramKey = ComputeShaderRHI;
ComputeShader->LinkedProgram = GetOpenGLProgramsCache().Find(ProgramKey, true);
if (!ComputeShader->LinkedProgram)
{
// ensure that pending request for this program has been completed before attempting to link
if (FOpenGLProgramBinaryCache::CheckSinglePendingGLProgramCreateRequest(ProgramKey))
{
ComputeShader->LinkedProgram = GetOpenGLProgramsCache().Find(ProgramKey, true);
}
}
if (ComputeShader->LinkedProgram == nullptr)
{
// Make sure we have OpenGL context set up, and invalidate the parameters cache and current program (as we'll link a new one soon)
ContextState.Program = -1;
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, true);
PendingState.LinkedProgramAndDirtyFlag = nullptr;
ComputeShader->LinkedProgram = new FOpenGLLinkedProgram(ComputeShader);
}
else
{
// this has been loaded via binary program cache, properly initialize it here:
ComputeShader->LinkedProgram->UpdateShaders<FOpenGLLinkedProgram::FComputeProgram>(ComputeShader);
}
}
FComputeShaderRHIRef FOpenGLDynamicRHI::RHICreateComputeShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
{
return new FOpenGLComputeShader(Code, Hash);
}
template<class TOpenGLStage>
static FString GetShaderStageSource(TOpenGLStage* Shader)
{
FString Source;
#if DEBUG_GL_SHADERS
Source = Shader->GlslCodeString;
#else
GLsizei NumShaders = 0;
glGetProgramiv(Shader->Resource, GL_ATTACHED_SHADERS, (GLint*)&NumShaders);
if(NumShaders > 0)
{
GLuint* Shaders = (GLuint*)alloca(sizeof(GLuint)*NumShaders);
glGetAttachedShaders(Shader->Resource, NumShaders, &NumShaders, Shaders);
for(int32 i = 0; i < NumShaders; i++)
{
GLint Len = 0;
glGetShaderiv(Shaders[i], GL_SHADER_SOURCE_LENGTH, &Len);
if(Len > 0)
{
ANSICHAR* Code = new ANSICHAR[Len + 1];
glGetShaderSource(Shaders[i], Len + 1, &Len, Code);
Source += Code;
delete [] Code;
}
}
}
#endif
return Source;
}
// ============================================================================================================================
struct FOpenGLShaderVaryingMapping
{
FAnsiCharArray Name;
int32 WriteLoc;
int32 ReadLoc;
};
// ============================================================================================================================
FOpenGLProgramKey::FOpenGLProgramKey(FRHIVertexShader* VertexShaderRHI, FRHIPixelShader* PixelShaderRHI, FRHIGeometryShader* GeometryShaderRHI)
{
ShaderHashes[CrossCompiler::SHADER_STAGE_VERTEX ] = FOpenGLDynamicRHI::ResourceCast(VertexShaderRHI )->GetHash();
ShaderHashes[CrossCompiler::SHADER_STAGE_PIXEL ] = FOpenGLDynamicRHI::ResourceCast(PixelShaderRHI )->GetHash();
if (GeometryShaderRHI)
{
ShaderHashes[CrossCompiler::SHADER_STAGE_GEOMETRY] = FOpenGLDynamicRHI::ResourceCast(GeometryShaderRHI)->GetHash();
}
}
FOpenGLProgramKey::FOpenGLProgramKey(FRHIComputeShader* ComputeShaderRHI)
{
ShaderHashes[CrossCompiler::SHADER_STAGE_COMPUTE] = FOpenGLDynamicRHI::ResourceCast(ComputeShaderRHI)->GetHash();
}
static bool CanCreateExternally(bool bIsFromPSO)
{
#if PLATFORM_ANDROID
if (bIsFromPSO && FOpenGLProgramBinaryCache::IsBuildingCache() && FAndroidOpenGL::AreRemoteCompileServicesActive())
{
return true;
}
#endif
return false;
}
static FOpenGLProgramBinary ExternalProgramCompile(const FOpenGLProgramKey& ProgramKey, FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType PSOCompileType, FRHIVertexShader* VertexShaderRHI, FRHIPixelShader* PixelShaderRHI)
{
FOpenGLProgramBinary CompiledProgram;
#if PLATFORM_ANDROID
// compile externally, sit and wait for the linked result
const FOpenGLCompiledShaderKey& VSKey = FOpenGLDynamicRHI::ResourceCast(VertexShaderRHI)->ShaderCodeKey;
const FOpenGLCompiledShaderKey& PSKey = FOpenGLDynamicRHI::ResourceCast(PixelShaderRHI)->ShaderCodeKey;
TArray<ANSICHAR> VSCode;
TArray<ANSICHAR> PSCode;
TArray<ANSICHAR> ComputeGlslCode;
{
FScopeLock Lock(&GCompiledShaderCacheCS);
VSCode = GetOpenGLCompiledShaderCache().FindRef(VSKey).GetUncompressedShader();
PSCode = GetOpenGLCompiledShaderCache().FindRef(PSKey).GetUncompressedShader();
}
FString FailLog;
TArray<uint8> CompiledProgramBytes = FAndroidOpenGL::DispatchAndWaitForRemoteGLProgramCompile(PSOCompileType, TArrayView<uint8>((uint8*)&ProgramKey, sizeof(ProgramKey)), VSCode, PSCode, ComputeGlslCode, FailLog);
if (FailLog.IsEmpty())
{
GLenum glFormat = *(GLenum*)CompiledProgramBytes.GetData();
if (UE::OpenGL::IsStoringCompressedBinaryPrograms())
{
TArray<uint8> CompressedCompiledProgramResult;
UE::OpenGL::CompressProgramBinary(CompiledProgramBytes, CompressedCompiledProgramResult);
CompiledProgramBytes = MoveTemp(CompressedCompiledProgramResult);
}
CompiledProgram = FOpenGLProgramBinary(MoveTemp(CompiledProgramBytes));
}
else
{
UE_LOG(LogRHI, Error, TEXT("External compile of program %s failed: %s "), *ProgramKey.ToString(), *FailLog);
#if DEBUG_GL_SHADERS
if (VSCode.Num())
{
UE_LOG(LogRHI, Error, TEXT("Vertex Shader:\n%s"), ANSI_TO_TCHAR(VSCode.GetData()));
}
if (PSCode.Num())
{
UE_LOG(LogRHI, Error, TEXT("Pixel Shader:\n%s"), ANSI_TO_TCHAR(PSCode.GetData()));
}
#endif //DEBUG_GL_SHADERS
}
#else
checkNoEntry();
#endif
return CompiledProgram;
}
void FOpenGLDynamicRHI::PrepareGFXBoundShaderState(const FGraphicsPipelineStateInitializer& Initializer)
{
const bool bIsPreCachePSO = Initializer.bPSOPrecache || Initializer.bFromPSOFileCache;
// if external creation is not available then ignore precache PSOs
// precaching on the RHIT will cause severe hitching.
const bool bCanCreateExternally = CanCreateExternally(bIsPreCachePSO);
if (!bIsPreCachePSO || !FOpenGLProgramBinaryCache::IsEnabled() || !bCanCreateExternally)
{
static bool bOneTime = true;
if(bOneTime && bIsPreCachePSO && FOpenGLProgramBinaryCache::IsEnabled())
{
UE_LOG(LogRHI, Warning, TEXT("Ignoring precache PSO, external compiler not active."));
bOneTime = false;
}
return;
}
FRHIVertexShader* VertexShaderRHI = Initializer.BoundShaderState.GetVertexShader();
FRHIPixelShader* PixelShaderRHI = Initializer.BoundShaderState.GetPixelShader();
FRHIGeometryShader* GeometryShaderRHI = Initializer.BoundShaderState.GetGeometryShader();
if (!PixelShaderRHI)
{
// use special null pixel shader when PixelShader was set to NULL
PixelShaderRHI = TShaderMapRef<FNULLPS>(GetGlobalShaderMap(GMaxRHIFeatureLevel)).GetPixelShader();
}
FOpenGLProgramKey ProgramKey;
ProgramKey.ShaderHashes[CrossCompiler::SHADER_STAGE_VERTEX] = VertexShaderRHI->GetHash();
ProgramKey.ShaderHashes[CrossCompiler::SHADER_STAGE_PIXEL] = PixelShaderRHI->GetHash();
bool bCreateProgram = false;
bool bCreateBinary = false;
if (FOpenGLProgramBinaryCache::RequiresCaching(ProgramKey))
{
if (FOpenGLProgramBinaryCache::IsBuildingCache())
{
OGL_BINARYCACHE_STATS_MARKBEGINCOMPILE(ProgramKey);
FOpenGLProgramBinary CompiledProgram = ExternalProgramCompile(ProgramKey, Initializer.GetPSOPrecacheCompileType(), VertexShaderRHI, PixelShaderRHI);
if (CompiledProgram.IsValid())
{
FOpenGLProgramBinaryCache::CacheProgramBinary(ProgramKey, TUniqueObj<FOpenGLProgramBinary>(MoveTemp(CompiledProgram)));
}
else
{
UE_LOG(LogRHI, Warning, TEXT("Program binary generation failed (%s), omitted from binary cache."), *ProgramKey.ToString());
}
}
else
{
// This PSO was not present when the binary cache was created.
// Do nothing and hitch when used for rendering.
// Mitigation could be to compile on the RHIT or append to the existing cache?
UE_LOG(LogRHI, Warning, TEXT("New PSO encountered during precompile %s"), *ProgramKey.ToString());
}
}
}
FBoundShaderStateRHIRef FOpenGLDynamicRHI::RHICreateBoundShaderState_Internal(
FRHIVertexDeclaration* VertexDeclarationRHI,
FRHIVertexShader* VertexShaderRHI,
FRHIPixelShader* PixelShaderRHI,
FRHIGeometryShader* GeometryShaderRHI,
bool bFromPSOFileCache
)
{
VERIFY_GL_SCOPE();
check(!bFromPSOFileCache);
SCOPE_CYCLE_COUNTER(STAT_OpenGLCreateBoundShaderStateTime);
FOpenGLVertexDeclaration* VertexDeclaration = FOpenGLDynamicRHI::ResourceCast(VertexDeclarationRHI);
FOpenGLVertexShader* VertexShader = FOpenGLDynamicRHI::ResourceCast(VertexShaderRHI);
FOpenGLPixelShader* PixelShader = FOpenGLDynamicRHI::ResourceCast(PixelShaderRHI);
FOpenGLGeometryShader* GeometryShader = FOpenGLDynamicRHI::ResourceCast(GeometryShaderRHI);
if (!PixelShader)
{
// use special null pixel shader when PixelShader was set to NULL
TShaderMapRef<FNULLPS> NullPS(GetGlobalShaderMap(GMaxRHIFeatureLevel));
PixelShader = FOpenGLDynamicRHI::ResourceCast(NullPS.GetPixelShader());
}
// Check for an existing bound shader state which matches the parameters
FCachedBoundShaderStateLink* CachedBoundShaderStateLink = GetCachedBoundShaderState(
VertexDeclaration,
VertexShader,
PixelShader,
GeometryShader
);
if (CachedBoundShaderStateLink)
{
// If we've already created a bound shader state with these parameters, reuse it.
FOpenGLBoundShaderState* BoundShaderState = ResourceCast(CachedBoundShaderStateLink->BoundShaderState);
GetOpenGLProgramsCache().Touch(BoundShaderState->LinkedProgram);
// touch may have unevicted the program, set it up.
BoundShaderState->LinkedProgram->UpdateShaders<FOpenGLLinkedProgram::FGraphicsProgram>(VertexShader, PixelShader, GeometryShader);
return BoundShaderState;
}
else
{
// Make sure we have OpenGL context set up, and invalidate the parameters cache and current program (as we'll link a new one soon)
ContextState.Program = -1;
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, false);
PendingState.LinkedProgramAndDirtyFlag = nullptr;
return new FOpenGLBoundShaderState(
VertexDeclaration
, VertexShader
, PixelShader
, GeometryShader
);
}
}
void DestroyShadersAndPrograms()
{
VERIFY_GL_SCOPE();
GetOpenGLProgramsCache().Empty();
StaticLastReleasedProgramsIndex = 0;
{
FScopeLock Lock(&GCompiledShaderCacheCS);
FOpenGLCompiledShaderCache& ShaderCache = GetOpenGLCompiledShaderCache();
for (FOpenGLCompiledShaderCache::TIterator It(ShaderCache); It; ++It)
{
FOpenGL::DeleteShader(It.Value().Resource);
}
ShaderCache.Empty();
}
}
void FOpenGLDynamicRHI::BindPendingShaderState()
{
SCOPE_CYCLE_COUNTER_DETAILED(STAT_OpenGLShaderBindTime);
VERIFY_GL_SCOPE();
bool ForceUniformBindingUpdate = false;
FOpenGLLinkedProgram* const PendingLinkedProgram = PendingState.BoundShaderState->LinkedProgram;
const GLuint PendingProgram = PendingLinkedProgram->Program;
if (ContextState.Program != PendingProgram)
{
FOpenGL::BindProgramPipeline(PendingProgram);
ContextState.Program = PendingProgram;
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, false);
PendingState.LinkedProgramAndDirtyFlag = nullptr;
#if PLATFORM_ANDROID
// Disable non-coherent framebuffer fetch if it's being used for programmable blending to make sure that we actually fetch the last pixel value in draw order
if (ContextState.bNonCoherentFramebufferFetchEnabled)
{
if (PendingLinkedProgram->GetGraphicsProgram().bUsesProgrammableBlending)
{
FAndroidOpenGL::DisableNonCoherentFramebufferFetch();
}
}
#endif
}
if (PendingState.bAnyDirtyRealUniformBuffers[SF_Vertex] ||
PendingState.bAnyDirtyRealUniformBuffers[SF_Pixel] ||
PendingState.bAnyDirtyRealUniformBuffers[SF_Geometry])
{
int32 NextUniformBufferIndex = OGL_FIRST_UNIFORM_BUFFER;
static_assert(SF_NumGraphicsFrequencies == 5 && SF_NumFrequencies == 12, "Unexpected SF_ ordering");
static_assert(SF_RayGen > SF_NumGraphicsFrequencies, "SF_NumGraphicsFrequencies be the number of frequencies supported in OpenGL");
int32 NumUniformBuffers[SF_NumGraphicsFrequencies];
PendingState.BoundShaderState->GetNumUniformBuffers(NumUniformBuffers);
if (PendingState.bAnyDirtyRealUniformBuffers[SF_Vertex])
{
BindUniformBufferBase(
NumUniformBuffers[SF_Vertex],
PendingState.BoundUniformBuffers[SF_Vertex],
PendingState.BoundUniformBuffersDynamicOffset[SF_Vertex],
NextUniformBufferIndex,
ForceUniformBindingUpdate);
}
NextUniformBufferIndex += NumUniformBuffers[SF_Vertex];
if (PendingState.bAnyDirtyRealUniformBuffers[SF_Pixel])
{
BindUniformBufferBase(
NumUniformBuffers[SF_Pixel],
PendingState.BoundUniformBuffers[SF_Pixel],
PendingState.BoundUniformBuffersDynamicOffset[SF_Pixel],
NextUniformBufferIndex,
ForceUniformBindingUpdate);
}
NextUniformBufferIndex += NumUniformBuffers[SF_Pixel];
if (NumUniformBuffers[SF_Geometry] >= 0 && PendingState.bAnyDirtyRealUniformBuffers[SF_Geometry])
{
BindUniformBufferBase(
NumUniformBuffers[SF_Geometry],
PendingState.BoundUniformBuffers[SF_Geometry],
PendingState.BoundUniformBuffersDynamicOffset[SF_Geometry],
NextUniformBufferIndex,
ForceUniformBindingUpdate);
NextUniformBufferIndex += NumUniformBuffers[SF_Geometry];
}
PendingState.bAnyDirtyRealUniformBuffers[SF_Vertex] = false;
PendingState.bAnyDirtyRealUniformBuffers[SF_Pixel] = false;
PendingState.bAnyDirtyRealUniformBuffers[SF_Geometry] = false;
}
}
FOpenGLLinkedProgram* FOpenGLBoundShaderState::FindOrCreateLinkedProgram(FOpenGLVertexShader* VertexShader, FOpenGLPixelShader* PixelShader, FOpenGLGeometryShader* GeometryShader)
{
FOpenGLProgramKey const ProgramKey(VertexShader, PixelShader, GeometryShader);
// Check if we already have such a program in released programs cache. Use it, if we do.
for (int32 CacheIndex = 0, Index = StaticLastReleasedProgramsIndex; CacheIndex < LAST_RELEASED_PROGRAMS_CACHE_COUNT; ++CacheIndex, Index = ((Index + 1) % LAST_RELEASED_PROGRAMS_CACHE_COUNT))
{
FOpenGLLinkedProgram* Prog = StaticLastReleasedPrograms[Index];
if (Prog && Prog->ProgramKey == ProgramKey)
{
StaticLastReleasedPrograms[Index] = nullptr;
GetOpenGLProgramsCache().Touch(Prog);
Prog->UpdateShaders<FOpenGLLinkedProgram::FGraphicsProgram>(VertexShader, PixelShader, GeometryShader);
return Prog;
}
}
{
FOpenGLLinkedProgram* CachedProgram = GetOpenGLProgramsCache().Find(ProgramKey, true);
if (!CachedProgram)
{
// ensure that pending request for this program has been completed before
if (FOpenGLProgramBinaryCache::CheckSinglePendingGLProgramCreateRequest(ProgramKey))
{
CachedProgram = GetOpenGLProgramsCache().Find(ProgramKey, true);
}
}
if (CachedProgram)
{
CachedProgram->UpdateShaders<FOpenGLLinkedProgram::FGraphicsProgram>(VertexShader, PixelShader, GeometryShader);
return CachedProgram;
}
}
return new FOpenGLLinkedProgram(VertexShader, PixelShader, GeometryShader);
}
FOpenGLBoundShaderState::FOpenGLBoundShaderState(
FOpenGLVertexDeclaration* InVertexDeclaration,
FOpenGLVertexShader* InVertexShader,
FOpenGLPixelShader* InPixelShader,
FOpenGLGeometryShader* InGeometryShader
)
: CacheLink (InVertexDeclaration, InVertexShader, InPixelShader, InGeometryShader, this)
, LinkedProgram (FindOrCreateLinkedProgram(InVertexShader, InPixelShader, InGeometryShader))
, VertexDeclaration(InVertexDeclaration)
, VertexShader (InVertexShader)
, PixelShader (InPixelShader)
, GeometryShader (InGeometryShader)
{
check(VertexDeclaration);
if (VertexDeclaration)
{
FMemory::Memcpy(StreamStrides, VertexDeclaration->StreamStrides, sizeof(StreamStrides));
}
else
{
FMemory::Memzero(StreamStrides, sizeof(StreamStrides));
}
}
FOpenGLBoundShaderState::~FOpenGLBoundShaderState()
{
VERIFY_GL_SCOPE();
check(LinkedProgram);
const bool bIsEvicted = FGLProgramCache::IsUsingLRU() && GetOpenGLProgramsCache().IsEvicted(LinkedProgram->ProgramKey);
if (!bIsEvicted)
{
StaticLastReleasedPrograms[StaticLastReleasedProgramsIndex++] = LinkedProgram;
if (StaticLastReleasedProgramsIndex == LAST_RELEASED_PROGRAMS_CACHE_COUNT)
{
StaticLastReleasedProgramsIndex = 0;
}
FOpenGLDynamicRHI::Get().OnProgramDeletion(LinkedProgram->Program);
}
}
bool FOpenGLBoundShaderState::NeedsTextureStage(int32 TextureStageIndex)
{
return LinkedProgram->TextureStageNeeds[TextureStageIndex];
}
int32 FOpenGLBoundShaderState::MaxTextureStageUsed()
{
return LinkedProgram->MaxTextureStage;
}
const TBitArray<>& FOpenGLBoundShaderState::GetTextureNeeds(int32& OutMaxTextureStageUsed)
{
OutMaxTextureStageUsed = LinkedProgram->MaxTextureStage;
return LinkedProgram->TextureStageNeeds;
}
const TBitArray<>& FOpenGLBoundShaderState::GetUAVNeeds(int32& OutMaxUAVUnitUsed) const
{
OutMaxUAVUnitUsed = LinkedProgram->MaxUAVUnitUsed;
return LinkedProgram->UAVStageNeeds;
}
void FOpenGLBoundShaderState::GetNumUniformBuffers(int32 NumUniformBuffers[SF_NumGraphicsFrequencies])
{
check(IsValidRef(VertexShader) && IsValidRef(PixelShader));
NumUniformBuffers[SF_Vertex] = VertexShader->Bindings.NumUniformBuffers;
NumUniformBuffers[SF_Pixel] = PixelShader->Bindings.NumUniformBuffers;
NumUniformBuffers[SF_Geometry] = GeometryShader ? GeometryShader->Bindings.NumUniformBuffers : -1;
}
bool FOpenGLBoundShaderState::RequiresDriverInstantiation()
{
check(LinkedProgram);
bool const bDrawn = LinkedProgram->bDrawn;
LinkedProgram->bDrawn = true;
return !bDrawn;
}
bool FOpenGLComputeShader::NeedsTextureStage(int32 TextureStageIndex)
{
return LinkedProgram->TextureStageNeeds[TextureStageIndex];
}
int32 FOpenGLComputeShader::MaxTextureStageUsed()
{
return LinkedProgram->MaxTextureStage;
}
const TBitArray<>& FOpenGLComputeShader::GetTextureNeeds(int32& OutMaxTextureStageUsed)
{
OutMaxTextureStageUsed = LinkedProgram->MaxTextureStage;
return LinkedProgram->TextureStageNeeds;
}
const TBitArray<>& FOpenGLComputeShader::GetUAVNeeds(int32& OutMaxUAVUnitUsed) const
{
OutMaxUAVUnitUsed = LinkedProgram->MaxUAVUnitUsed;
return LinkedProgram->UAVStageNeeds;
}
bool FOpenGLComputeShader::NeedsUAVStage(int32 UAVStageIndex) const
{
return LinkedProgram->UAVStageNeeds[UAVStageIndex];
}
void FOpenGLDynamicRHI::BindPendingComputeShaderState(FOpenGLComputeShader* ComputeShader)
{
VERIFY_GL_SCOPE();
bool ForceUniformBindingUpdate = false;
GetOpenGLProgramsCache().Touch(ComputeShader->LinkedProgram);
ComputeShader->LinkedProgram->UpdateShaders<FOpenGLLinkedProgram::FComputeProgram>(ComputeShader);
GLuint PendingProgram = ComputeShader->LinkedProgram->Program;
if (ContextState.Program != PendingProgram)
{
FOpenGL::BindProgramPipeline(PendingProgram);
ContextState.Program = PendingProgram;
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, true);
PendingState.LinkedProgramAndDirtyFlag = nullptr;
ForceUniformBindingUpdate = true;
}
if (PendingState.bAnyDirtyRealUniformBuffers[SF_Compute])
{
BindUniformBufferBase(
ComputeShader->Bindings.NumUniformBuffers,
PendingState.BoundUniformBuffers[SF_Compute],
PendingState.BoundUniformBuffersDynamicOffset[SF_Compute],
OGL_FIRST_UNIFORM_BUFFER,
ForceUniformBindingUpdate);
PendingState.bAnyDirtyRealUniformBuffers[SF_Compute] = 0;
}
}
/** Constructor. */
FOpenGLShaderParameterCache::FOpenGLShaderParameterCache()
: GlobalUniformArraySize(-1)
{
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
{
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
PackedGlobalUniformDirty[ArrayIndex].NumVectors = 0;
}
}
void FOpenGLShaderParameterCache::InitializeResources(int32 UniformArraySize)
{
check(GlobalUniformArraySize == -1);
// Uniform arrays have to be multiples of float4s.
UniformArraySize = Align(UniformArraySize,SizeOfFloat4);
PackedGlobalUniforms[0] = (uint8*)FMemory::Malloc(UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
PackedUniformsScratch[0] = (uint8*)FMemory::Malloc(UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
FMemory::Memzero(PackedGlobalUniforms[0], UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
FMemory::Memzero(PackedUniformsScratch[0], UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
for (int32 ArrayIndex = 1; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
{
PackedGlobalUniforms[ArrayIndex] = PackedGlobalUniforms[ArrayIndex - 1] + UniformArraySize;
PackedUniformsScratch[ArrayIndex] = PackedUniformsScratch[ArrayIndex - 1] + UniformArraySize;
}
GlobalUniformArraySize = UniformArraySize;
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
{
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
PackedGlobalUniformDirty[ArrayIndex].NumVectors = UniformArraySize / SizeOfFloat4;
}
}
/** Destructor. */
FOpenGLShaderParameterCache::~FOpenGLShaderParameterCache()
{
if (GlobalUniformArraySize > 0)
{
FMemory::Free(PackedUniformsScratch[0]);
FMemory::Free(PackedGlobalUniforms[0]);
}
FMemory::Memzero(PackedUniformsScratch);
FMemory::Memzero(PackedGlobalUniforms);
GlobalUniformArraySize = -1;
}
/**
* Marks all uniform arrays as dirty.
*/
void FOpenGLShaderParameterCache::MarkAllDirty()
{
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
{
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
PackedGlobalUniformDirty[ArrayIndex].NumVectors = GlobalUniformArraySize / SizeOfFloat4;
}
}
/**
* Set parameter values.
*/
void FOpenGLShaderParameterCache::Set(uint32 BufferIndexName, uint32 ByteOffset, uint32 NumBytes, const void* NewValues)
{
uint32 BufferIndex = CrossCompiler::PackedTypeNameToTypeIndex(BufferIndexName);
check(GlobalUniformArraySize != -1);
check(BufferIndex < CrossCompiler::PACKED_TYPEINDEX_MAX);
check(ByteOffset + NumBytes <= (uint32)GlobalUniformArraySize);
PackedGlobalUniformDirty[BufferIndex].MarkDirtyRange(ByteOffset / SizeOfFloat4, (NumBytes + SizeOfFloat4 - 1) / SizeOfFloat4);
FMemory::Memcpy(PackedGlobalUniforms[BufferIndex] + ByteOffset, NewValues, NumBytes);
}
/**
* Commit shader parameters to the currently bound program.
* @param ParameterTable - Information on the bound uniform arrays for the program.
*/
void FOpenGLShaderParameterCache::CommitPackedGlobals(const FOpenGLLinkedProgram* LinkedProgram, CrossCompiler::EShaderStage Stage)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLUniformCommitTime);
VERIFY_GL_SCOPE();
const uint32 BytesPerRegister = 16;
/**
* Note that this always uploads the entire uniform array when it is dirty.
* The arrays are marked dirty either when the bound shader state changes or
* a value in the array is modified. OpenGL actually caches uniforms per-
* program. If we shadowed those per-program uniforms we could avoid calling
* glUniform4?v for values that have not changed since the last invocation
* of the program.
*
* It's unclear whether the driver does the same thing and whether there is
* a performance benefit. Even if there is, this type of caching makes any
* multithreading vastly more difficult, so for now uniforms are not cached
* per-program.
*/
FOpenGLLinkedProgram::FShaderStage const& ShaderStage = LinkedProgram->GetStage(Stage);
for (int32 PackedUniform = 0; PackedUniform < ShaderStage.PackedUniformInfos.Num(); ++PackedUniform)
{
auto const& UniformInfo = ShaderStage.PackedUniformInfos[PackedUniform];
GLint Location = UniformInfo.Location;
if (Location >= 0 && // Probably this uniform array was optimized away in a linked program
PackedGlobalUniformDirty[UniformInfo.Index].NumVectors > 0)
{
check(UniformInfo.Index < CrossCompiler::PACKED_TYPEINDEX_MAX);
const uint32 NumVectors = ShaderStage.Bindings.PackedGlobalArrays[PackedUniform].Size / BytesPerRegister;
const uint32 StartVector = PackedGlobalUniformDirty[UniformInfo.Index].StartVector;
int32 NumDirtyVectors = FMath::Min(PackedGlobalUniformDirty[UniformInfo.Index].NumVectors, NumVectors - StartVector);
check(NumDirtyVectors);
const void* UniformData = (uint8*)PackedGlobalUniforms[UniformInfo.Index] + StartVector * sizeof(float) * 4;
Location += StartVector;
switch (UniformInfo.Index)
{
case CrossCompiler::PACKED_TYPEINDEX_HIGHP:
case CrossCompiler::PACKED_TYPEINDEX_MEDIUMP:
case CrossCompiler::PACKED_TYPEINDEX_LOWP:
FOpenGL::ProgramUniform4fv(LinkedProgram->Program, Location, NumDirtyVectors, static_cast<const GLfloat*>(UniformData));
break;
case CrossCompiler::PACKED_TYPEINDEX_INT:
FOpenGL::ProgramUniform4iv(LinkedProgram->Program, Location, NumDirtyVectors, static_cast<const GLint*>(UniformData));
break;
case CrossCompiler::PACKED_TYPEINDEX_UINT:
FOpenGL::ProgramUniform4uiv(LinkedProgram->Program, Location, NumDirtyVectors, static_cast<const GLuint*>(UniformData));
break;
}
PackedGlobalUniformDirty[UniformInfo.Index].StartVector = 0;
PackedGlobalUniformDirty[UniformInfo.Index].NumVectors = 0;
}
}
}
void FOpenGLShaderParameterCache::CommitPackedUniformBuffers(FOpenGLLinkedProgram* LinkedProgram, CrossCompiler::EShaderStage Stage, FRHIUniformBuffer** RHIUniformBuffers, const TArray<CrossCompiler::FUniformBufferCopyInfo>& UniformBuffersCopyInfo)
{
SCOPE_CYCLE_COUNTER(STAT_OpenGLConstantBufferUpdateTime);
VERIFY_GL_SCOPE();
// Uniform Buffers are split into precision/type; the list of RHI UBs is traversed and if a new one was set, its
// contents are copied per precision/type into corresponding scratch buffers which are then uploaded to the program
const FOpenGLShaderBindings& Bindings = LinkedProgram->GetStage(Stage).Bindings;
check(Bindings.NumUniformBuffers <= FOpenGLRHIState::MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE);
if (Bindings.bFlattenUB)
{
int32 LastInfoIndex = 0;
for (int32 BufferIndex = 0; BufferIndex < Bindings.NumUniformBuffers; ++BufferIndex)
{
const FOpenGLUniformBuffer* UniformBuffer = (FOpenGLUniformBuffer*)RHIUniformBuffers[BufferIndex];
check(UniformBuffer);
if (!UniformBuffer->bIsEmulatedUniformBuffer)
{
continue;
}
const uint32* RESTRICT SourceData = UniformBuffer->EmulatedBufferData->Data.GetData();
for (int32 InfoIndex = LastInfoIndex; InfoIndex < UniformBuffersCopyInfo.Num(); ++InfoIndex)
{
const CrossCompiler::FUniformBufferCopyInfo& Info = UniformBuffersCopyInfo[InfoIndex];
if (Info.SourceUBIndex == BufferIndex)
{
check((Info.DestOffsetInFloats + Info.SizeInFloats) * sizeof(float) <= (uint32)GlobalUniformArraySize);
float* RESTRICT ScratchMem = (float*)PackedGlobalUniforms[Info.DestUBTypeIndex];
ScratchMem += Info.DestOffsetInFloats;
FMemory::Memcpy(ScratchMem, SourceData + Info.SourceOffsetInFloats, Info.SizeInFloats * sizeof(float));
PackedGlobalUniformDirty[Info.DestUBTypeIndex].MarkDirtyRange(Info.DestOffsetInFloats / NumFloatsInFloat4, (Info.SizeInFloats + NumFloatsInFloat4 - 1) / NumFloatsInFloat4);
}
else
{
LastInfoIndex = InfoIndex;
break;
}
}
}
}
else
{
FOpenGLLinkedProgram::FShaderStage const& ShaderStage = LinkedProgram->GetStage(Stage);
auto& EmulatedUniformBufferSet = ShaderStage.LastEmulatedUniformBufferSet;
int32 LastCopyInfoIndex = 0;
for (int32 BufferIndex = 0; BufferIndex < Bindings.NumUniformBuffers; ++BufferIndex)
{
const FOpenGLUniformBuffer* UniformBuffer = (FOpenGLUniformBuffer*)RHIUniformBuffers[BufferIndex];
if (UniformBuffer && !UniformBuffer->bIsEmulatedUniformBuffer)
{
continue;
}
// Workaround for null UBs (FORT-323429), additional logging here is to give us a chance to investigate the higher level issue causing the null UB.
#if !UE_BUILD_SHIPPING
UE_CLOG(UniformBuffer == nullptr && EmulatedUniformBufferSet.IsValidIndex(BufferIndex), LogRHI, Fatal, TEXT("CommitPackedUniformBuffers null UB stage %d, idx %d (%d), %s"), Stage, BufferIndex, EmulatedUniformBufferSet.Num(), *LinkedProgram->ProgramKey.ToString());
#endif
if (UniformBuffer && EmulatedUniformBufferSet.IsValidIndex(BufferIndex) && EmulatedUniformBufferSet[BufferIndex] != UniformBuffer->UniqueID)
{
EmulatedUniformBufferSet[BufferIndex] = UniformBuffer->UniqueID;
// Go through the list of copy commands and perform the appropriate copy into the scratch buffer
for (int32 InfoIndex = LastCopyInfoIndex; InfoIndex < UniformBuffersCopyInfo.Num(); ++InfoIndex)
{
const CrossCompiler::FUniformBufferCopyInfo& Info = UniformBuffersCopyInfo[InfoIndex];
if (Info.SourceUBIndex == BufferIndex)
{
const uint32* RESTRICT SourceData = UniformBuffer->EmulatedBufferData->Data.GetData();
SourceData += Info.SourceOffsetInFloats;
float* RESTRICT ScratchMem = (float*)PackedUniformsScratch[Info.DestUBTypeIndex];
ScratchMem += Info.DestOffsetInFloats;
FMemory::Memcpy(ScratchMem, SourceData, Info.SizeInFloats * sizeof(float));
}
else if (Info.SourceUBIndex > BufferIndex)
{
// Done finding current copies
LastCopyInfoIndex = InfoIndex;
break;
}
// keep going since we could have skipped this loop when skipping cached UBs...
}
// Upload the split buffers to the program
const auto& UniformBufferUploadInfoList = ShaderStage.PackedUniformBufferInfos[BufferIndex];
for (int32 InfoIndex = 0; InfoIndex < UniformBufferUploadInfoList.Num(); ++InfoIndex)
{
auto& UBInfo = Bindings.PackedUniformBuffers[BufferIndex];
const auto& UniformInfo = UniformBufferUploadInfoList[InfoIndex];
if (UniformInfo.Location < 0)
{
// Optimized out
continue;
}
const void* RESTRICT UniformData = PackedUniformsScratch[UniformInfo.Index];
int32 NumVectors = UBInfo[InfoIndex].Size / SizeOfFloat4;
check(UniformInfo.ArrayType == UBInfo[InfoIndex].TypeName);
switch (UniformInfo.Index)
{
case CrossCompiler::PACKED_TYPEINDEX_HIGHP:
case CrossCompiler::PACKED_TYPEINDEX_MEDIUMP:
case CrossCompiler::PACKED_TYPEINDEX_LOWP:
FOpenGL::ProgramUniform4fv(LinkedProgram->Program, UniformInfo.Location, NumVectors, (GLfloat*)UniformData);
break;
case CrossCompiler::PACKED_TYPEINDEX_INT:
FOpenGL::ProgramUniform4iv(LinkedProgram->Program, UniformInfo.Location, NumVectors, (GLint*)UniformData);
break;
case CrossCompiler::PACKED_TYPEINDEX_UINT:
FOpenGL::ProgramUniform4uiv(LinkedProgram->Program, UniformInfo.Location, NumVectors, (GLuint*)UniformData);
break;
}
}
}
}
}
}
namespace UE
{
namespace OpenGL
{
// Called from the binary file cache when the binary version of a program has been encountered.
void OnGLProgramLoadedFromBinaryCache(const FOpenGLProgramKey& ProgramKey, TUniqueObj<FOpenGLProgramBinary>&& ProgramBinaryData)
{
OGL_BINARYCACHE_STATS_MARKCOMPILED(ProgramKey);
QUICK_SCOPE_CYCLE_COUNTER(STAT_OpenGLOnGLProgramLoadedFromBinaryCache);
// FScopeLock Lock(&GProgramBinaryCacheCS);
FOpenGLLinkedProgram* FoundProgram = GetOpenGLProgramsCache().Find(ProgramKey, false);
const bool bProgramExists = FoundProgram != nullptr;
if (FGLProgramCache::IsUsingLRU())
{
const bool bIsEvicted = bProgramExists && GetOpenGLProgramsCache().IsEvicted(ProgramKey);
// always replace any existing binary data with this.
if (!bProgramExists || bIsEvicted)
{
check(!bProgramExists || FoundProgram->LRUInfo.CachedProgramBinary->GetDataView().IsEmpty());
// Always add programs as evicted, 1st use will create them as programs.
// This will reduce pressure on driver by ensuring only used programs
// are created.
// In this case do not create the GL program.
GetOpenGLProgramsCache().AddOrReplaceEvicted(ProgramKey, MoveTemp(ProgramBinaryData));
}
else
{
// replace the existing program with the incoming data.
// For PSO cache programs this will replace the heap allocated data with a region of the mmapped program file.
FoundProgram->LRUInfo.CachedProgramBinary = MoveTemp(ProgramBinaryData);
}
}
else
{
if (!bProgramExists)
{
GLuint GLProgramId = 0;
bool bSuccess = UE::OpenGL::CreateGLProgramFromBinary(GLProgramId, ProgramBinaryData->GetDataView());
if (!bSuccess)
{
UE_LOG(LogRHI, Log, TEXT("[%s, %d, %d]"), *ProgramKey.ToString(), GLProgramId, ProgramBinaryData->GetDataView().Num());
RHIGetPanicDelegate().ExecuteIfBound(FName("FailedBinaryProgramCreateLoadRequest"));
UE_LOG(LogRHI, Fatal, TEXT("CompleteLoadedGLProgramRequest_internal : Failed to create GL program from binary data! [%s]"), *ProgramKey.ToString());
}
FOpenGLLinkedProgram* NewLinkedProgram = new FOpenGLLinkedProgram(ProgramKey, GLProgramId);
GetOpenGLProgramsCache().Add(ProgramKey, NewLinkedProgram);
}
}
}
}
}
static void TickProgramLRU()
{
if (FGLProgramCache::IsUsingLRU() && GUntouchedProgramEvictTimeSeconds > 0)
{
// FScopeLock Lock(&GProgramBinaryCacheCS);
int FramePace = FPlatformRHIFramePacer::GetFramePace();
int MaxProgramsToEvictPerFrame = 10;
int32 CurrentResidentCount = GetOpenGLProgramsCache().GetLRUSize();
MaxProgramsToEvictPerFrame = FMath::Min(FMath::Max(0, CurrentResidentCount - GProgramLRUResidentCountBeforeEviction), MaxProgramsToEvictPerFrame);
if (MaxProgramsToEvictPerFrame > 0)
{
GetOpenGLProgramsCache().EvictLeastRecentByPredicate(
[FramePace, &MaxProgramsToEvictPerFrame](FOpenGLLinkedProgram* LeastRecentProgram)
{
MaxProgramsToEvictPerFrame--;
uint32 LastFrameAllowed = GFrameNumber - FMath::Min(GFrameNumber, (uint32)(FramePace * GUntouchedProgramEvictTimeSeconds));
return MaxProgramsToEvictPerFrame >= 0 && LeastRecentProgram->LRUInfo.LastTouchedFrame < LastFrameAllowed;
}
);
}
}
}
void FOpenGLDynamicRHI::EndFrameTick()
{
TickProgramLRU();
FOpenGLProgramBinaryCache::TickBinaryCache();
FTextureEvictionLRU::Get().TickEviction();
OGL_BINARYCACHE_STATS_LOG();
}