2752 lines
97 KiB
C++
2752 lines
97 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
VulkanPipeline.cpp: Vulkan device RHI implementation.
|
|
=============================================================================*/
|
|
|
|
#include "VulkanRHIPrivate.h"
|
|
#include "VulkanPipeline.h"
|
|
#include "HAL/PlatformFileManager.h"
|
|
#include "HAL/FileManager.h"
|
|
#include "Misc/Paths.h"
|
|
#include "Misc/FileHelper.h"
|
|
#include "Serialization/MemoryReader.h"
|
|
#include "Serialization/MemoryWriter.h"
|
|
#include "VulkanPendingState.h"
|
|
#include "VulkanContext.h"
|
|
#include "GlobalShader.h"
|
|
#include "VulkanLLM.h"
|
|
#include "Misc/ScopeRWLock.h"
|
|
#include "VulkanChunkedPipelineCache.h"
|
|
|
|
#define LRU_DEBUG 0
|
|
#if !UE_BUILD_SHIPPING
|
|
#define LRUPRINT(...) FPlatformMisc::LowLevelOutputDebugStringf(__VA_ARGS__)
|
|
#if LRU_DEBUG
|
|
#define LRUPRINT_DEBUG(...) FPlatformMisc::LowLevelOutputDebugStringf(__VA_ARGS__)
|
|
#endif
|
|
#else
|
|
#define LRUPRINT(...) do{}while(0)
|
|
#endif
|
|
|
|
#ifndef LRUPRINT_DEBUG
|
|
#define LRUPRINT_DEBUG(...) do{}while(0)
|
|
#endif
|
|
|
|
|
|
|
|
#if PLATFORM_ANDROID
|
|
#define LRU_MAX_PIPELINE_SIZE 10
|
|
#define LRU_PIPELINE_CAPACITY 2048
|
|
#else
|
|
#define LRU_MAX_PIPELINE_SIZE 512 //needs to be super high to work on pc.
|
|
#define LRU_PIPELINE_CAPACITY 8192
|
|
#endif
|
|
|
|
|
|
|
|
#if !UE_BUILD_SHIPPING
|
|
static TAtomic<uint64> SGraphicsRHICount;
|
|
static TAtomic<uint64> SPipelineCount;
|
|
static TAtomic<uint64> SPipelineGfxCount;
|
|
#endif
|
|
|
|
static const double HitchTime = 1.0 / 1000.0;
|
|
|
|
TAutoConsoleVariable<int32> CVarPipelineDebugForceEvictImmediately(
|
|
TEXT("r.Vulkan.PipelineDebugForceEvictImmediately"),
|
|
0,
|
|
TEXT("1: Force all created PSOs to be evicted immediately. Only for debugging"),
|
|
ECVF_RenderThreadSafe | ECVF_ReadOnly);
|
|
|
|
|
|
|
|
TAutoConsoleVariable<int32> CVarPipelineLRUCacheEvictBinaryPreloadScreen(
|
|
TEXT("r.Vulkan.PipelineLRUCacheEvictBinaryPreloadScreen"),
|
|
0,
|
|
TEXT("1: Use a preload screen while loading preevicted PSOs ala r.Vulkan.PipelineLRUCacheEvictBinary"),
|
|
ECVF_RenderThreadSafe);
|
|
|
|
TAutoConsoleVariable<int32> CVarEnableLRU(
|
|
TEXT("r.Vulkan.EnablePipelineLRUCache"),
|
|
0,
|
|
TEXT("Pipeline LRU cache.\n")
|
|
TEXT("0: disable LRU\n")
|
|
TEXT("1: Enable LRU"),
|
|
ECVF_RenderThreadSafe | ECVF_ReadOnly);
|
|
|
|
TAutoConsoleVariable<int32> CVarPipelineLRUCacheEvictBinary(
|
|
TEXT("r.Vulkan.PipelineLRUCacheEvictBinary"),
|
|
0,
|
|
TEXT("0: create pipelines in from the binary PSO cache and binary shader cache and evict them only as it fills up.\n")
|
|
TEXT("1: don't create pipelines....just immediately evict them"),
|
|
ECVF_RenderThreadSafe | ECVF_ReadOnly);
|
|
|
|
|
|
TAutoConsoleVariable<int32> CVarLRUMaxPipelineSize(
|
|
TEXT("r.Vulkan.PipelineLRUSize"),
|
|
LRU_MAX_PIPELINE_SIZE * 1024 * 1024,
|
|
TEXT("Maximum size of shader memory ."),
|
|
ECVF_RenderThreadSafe);
|
|
|
|
TAutoConsoleVariable<int32> CVarLRUPipelineCapacity(
|
|
TEXT("r.Vulkan.PipelineLRUCapactiy"),
|
|
LRU_PIPELINE_CAPACITY,
|
|
TEXT("Maximum no. of PSOs in LRU."),
|
|
ECVF_RenderThreadSafe| ECVF_ReadOnly);
|
|
|
|
|
|
|
|
|
|
|
|
static TAutoConsoleVariable<int32> GEnablePipelineCacheLoadCvar(
|
|
TEXT("r.Vulkan.PipelineCacheLoad"),
|
|
1,
|
|
TEXT("0 to disable loading the pipeline cache")
|
|
TEXT("1 to enable using pipeline cache")
|
|
);
|
|
|
|
static TAutoConsoleVariable<int32> GPipelineCacheFromShaderPipelineCacheCvar(
|
|
TEXT("r.Vulkan.PipelineCacheFromShaderPipelineCache"),
|
|
PLATFORM_ANDROID,
|
|
TEXT("0 look for a pipeline cache in the normal locations with the normal names.")
|
|
TEXT("1 tie the vulkan pipeline cache to the shader pipeline cache, use the PSOFC guid as part of the filename, etc."),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
|
|
static int32 GEnablePipelineCacheCompression = 1;
|
|
static FAutoConsoleVariableRef GEnablePipelineCacheCompressionCvar(
|
|
TEXT("r.Vulkan.PipelineCacheCompression"),
|
|
GEnablePipelineCacheCompression,
|
|
TEXT("Enable/disable compression on the Vulkan pipeline cache disk file\n"),
|
|
ECVF_Default | ECVF_RenderThreadSafe
|
|
);
|
|
|
|
|
|
enum class ESingleThreadedPSOCreateMode
|
|
{
|
|
None = 0,
|
|
All = 1,
|
|
Precompile = 2,
|
|
NonPrecompiled = 3,
|
|
};
|
|
|
|
static int32 GVulkanPSOForceSingleThreaded = (int32)ESingleThreadedPSOCreateMode::None;
|
|
static FAutoConsoleVariableRef GVulkanPSOForceSingleThreadedCVar(
|
|
TEXT("r.Vulkan.ForcePSOSingleThreaded"),
|
|
GVulkanPSOForceSingleThreaded,
|
|
TEXT("Enable to force singlethreaded creation of PSOs. Only intended as a workaround for buggy drivers\n")
|
|
TEXT("0: (default) Allow Async precompile PSO creation.\n")
|
|
TEXT("1: force singlethreaded creation of all PSOs.\n")
|
|
TEXT("2: force singlethreaded creation of precompile PSOs only.\n")
|
|
TEXT("3: force singlethreaded creation of non-precompile PSOs only."),
|
|
ECVF_ReadOnly | ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GVulkanPSOLRUEvictAfterUnusedFrames = 0;
|
|
static FAutoConsoleVariableRef GVulkanPSOLRUEvictAfterUnusedFramesCVar(
|
|
TEXT("r.Vulkan.PSOLRUEvictAfterUnusedFrames"),
|
|
GVulkanPSOLRUEvictAfterUnusedFrames,
|
|
TEXT("0: unused PSOs are not removed from the PSO LRU cache. (default)\n")
|
|
TEXT(">0: The number of frames an unused PSO can remain in the PSO LRU cache. When this is exceeded the PSO is destroyed and memory returned to the system. This can save memory with the risk of increased hitching.")
|
|
, ECVF_RenderThreadSafe
|
|
);
|
|
|
|
|
|
static int32 GVulkanReleaseShaderModuleWhenEvictingPSO = 0;
|
|
static FAutoConsoleVariableRef GVulkanReleaseShaderModuleWhenEvictingPSOCVar(
|
|
TEXT("r.Vulkan.ReleaseShaderModuleWhenEvictingPSO"),
|
|
GVulkanReleaseShaderModuleWhenEvictingPSO,
|
|
TEXT("0: shader modules remain when a PSO is removed from the PSO LRU cache. (default)\n")
|
|
TEXT("1: shader modules are destroyed when a PSO is removed from the PSO LRU cache. This can save memory at the risk of increased hitching and cpu cost.")
|
|
,ECVF_RenderThreadSafe
|
|
);
|
|
|
|
template <typename TRHIType, typename TVulkanType>
|
|
static inline FSHAHash GetShaderHash(TRHIType* RHIShader)
|
|
{
|
|
if (RHIShader)
|
|
{
|
|
const TVulkanType* VulkanShader = ResourceCast<TRHIType>(RHIShader);
|
|
const FVulkanShader* Shader = static_cast<const FVulkanShader*>(VulkanShader);
|
|
check(Shader);
|
|
return Shader->GetCodeHeader().SourceHash;
|
|
}
|
|
|
|
FSHAHash Dummy;
|
|
return Dummy;
|
|
}
|
|
|
|
static inline FSHAHash GetShaderHashForStage(const FGraphicsPipelineStateInitializer& PSOInitializer, ShaderStage::EStage Stage)
|
|
{
|
|
switch (Stage)
|
|
{
|
|
case ShaderStage::Vertex: return GetShaderHash<FRHIVertexShader, FVulkanVertexShader>(PSOInitializer.BoundShaderState.VertexShaderRHI);
|
|
case ShaderStage::Pixel: return GetShaderHash<FRHIPixelShader, FVulkanPixelShader>(PSOInitializer.BoundShaderState.PixelShaderRHI);
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
case ShaderStage::Mesh: return GetShaderHash<FRHIMeshShader, FVulkanMeshShader>(PSOInitializer.BoundShaderState.GetMeshShader());
|
|
case ShaderStage::Task: return GetShaderHash<FRHIAmplificationShader, FVulkanTaskShader>(PSOInitializer.BoundShaderState.GetAmplificationShader());
|
|
#endif
|
|
#if VULKAN_SUPPORTS_GEOMETRY_SHADERS
|
|
case ShaderStage::Geometry: return GetShaderHash<FRHIGeometryShader, FVulkanGeometryShader>(PSOInitializer.BoundShaderState.GetGeometryShader());
|
|
#endif
|
|
default: check(0); break;
|
|
}
|
|
|
|
FSHAHash Dummy;
|
|
return Dummy;
|
|
}
|
|
|
|
FVulkanPipeline::FVulkanPipeline(FVulkanDevice* InDevice)
|
|
: Device(InDevice)
|
|
, Pipeline(VK_NULL_HANDLE)
|
|
, Layout(nullptr)
|
|
{
|
|
#if !UE_BUILD_SHIPPING
|
|
SPipelineCount++;
|
|
#endif
|
|
}
|
|
|
|
FVulkanPipeline::~FVulkanPipeline()
|
|
{
|
|
#if !UE_BUILD_SHIPPING
|
|
SPipelineCount--;
|
|
#endif
|
|
if (Pipeline != VK_NULL_HANDLE)
|
|
{
|
|
Device->GetDeferredDeletionQueue().EnqueueResource(VulkanRHI::FDeferredDeletionQueue2::EType::Pipeline, Pipeline);
|
|
Pipeline = VK_NULL_HANDLE;
|
|
}
|
|
/* we do NOT own Layout !*/
|
|
}
|
|
|
|
FVulkanComputePipeline::FVulkanComputePipeline(FVulkanDevice* InDevice, FVulkanComputeShader* InShader)
|
|
: FVulkanPipeline(InDevice)
|
|
, FRHIComputePipelineState(InShader)
|
|
, bUsesBindless(InShader->UsesBindless())
|
|
{
|
|
INC_DWORD_STAT(STAT_VulkanNumComputePSOs);
|
|
}
|
|
|
|
FVulkanComputePipeline::~FVulkanComputePipeline()
|
|
{
|
|
Device->NotifyDeletedComputePipeline(this);
|
|
DEC_DWORD_STAT(STAT_VulkanNumComputePSOs);
|
|
}
|
|
|
|
FVulkanRHIGraphicsPipelineState::~FVulkanRHIGraphicsPipelineState()
|
|
{
|
|
#if !UE_BUILD_SHIPPING
|
|
SGraphicsRHICount--;
|
|
#endif
|
|
DEC_DWORD_STAT(STAT_VulkanNumGraphicsPSOs);
|
|
|
|
Device->PipelineStateCache->NotifyDeletedGraphicsPSO(this);
|
|
|
|
for (int ShaderStageIndex = 0; ShaderStageIndex < ShaderStage::NumGraphicsStages; ShaderStageIndex++)
|
|
{
|
|
if (VulkanShaders[ShaderStageIndex] != nullptr)
|
|
{
|
|
VulkanShaders[ShaderStageIndex]->Release();
|
|
}
|
|
}
|
|
}
|
|
|
|
void FVulkanRHIGraphicsPipelineState::GetOrCreateShaderModules(TRefCountPtr<FVulkanShaderModule> (&ShaderModulesOUT)[ShaderStage::NumGraphicsStages], FVulkanShader*const* Shaders)
|
|
{
|
|
for (int32 Index = 0; Index < ShaderStage::NumGraphicsStages; ++Index)
|
|
{
|
|
check(!ShaderModulesOUT[Index].IsValid());
|
|
FVulkanShader* Shader = Shaders[Index];
|
|
if (Shader)
|
|
{
|
|
ShaderModulesOUT[Index] = Shader->GetOrCreateHandle(Desc, Layout, Layout->GetDescriptorSetLayoutHash());
|
|
}
|
|
}
|
|
}
|
|
|
|
FVulkanShader::FSpirvCode FVulkanRHIGraphicsPipelineState::GetPatchedSpirvCode(FVulkanShader* Shader)
|
|
{
|
|
check(Shader);
|
|
return Shader->GetPatchedSpirvCode(Desc, Layout);
|
|
}
|
|
|
|
void FVulkanRHIGraphicsPipelineState::PurgeShaderModules(FVulkanShader*const* Shaders)
|
|
{
|
|
for (int32 Index = 0; Index < ShaderStage::NumGraphicsStages; ++Index)
|
|
{
|
|
FVulkanShader* Shader = Shaders[Index];
|
|
if (Shader)
|
|
{
|
|
Shader->PurgeShaderModules();
|
|
}
|
|
}
|
|
}
|
|
|
|
FVulkanPipelineStateCacheManager::FVulkanPipelineStateCacheManager(FVulkanDevice* InDevice)
|
|
: Device(InDevice)
|
|
, bEvictImmediately(false)
|
|
, bPrecompilingCacheLoadedFromFile(false)
|
|
{
|
|
bUseLRU = (int32)CVarEnableLRU.GetValueOnAnyThread() != 0;
|
|
LRUUsedPipelineMax = CVarLRUPipelineCapacity.GetValueOnAnyThread();
|
|
}
|
|
|
|
|
|
FVulkanPipelineStateCacheManager::~FVulkanPipelineStateCacheManager()
|
|
{
|
|
if (OnShaderPipelineCacheOpenedDelegate.IsValid())
|
|
{
|
|
FShaderPipelineCache::GetCacheOpenedDelegate().Remove(OnShaderPipelineCacheOpenedDelegate);
|
|
}
|
|
|
|
if (OnShaderPipelineCachePrecompilationCompleteDelegate.IsValid())
|
|
{
|
|
FShaderPipelineCache::GetPrecompilationCompleteDelegate().Remove(OnShaderPipelineCachePrecompilationCompleteDelegate);
|
|
}
|
|
DestroyCache();
|
|
|
|
// Only destroy layouts when quitting
|
|
for (auto& Pair : LayoutMap)
|
|
{
|
|
delete Pair.Value;
|
|
}
|
|
for (auto& Pair : DSetLayoutMap)
|
|
{
|
|
VulkanRHI::vkDestroyDescriptorSetLayout(Device->GetInstanceHandle(), Pair.Value.Handle, VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
|
|
{
|
|
//TODO: Save PSOCache here?!
|
|
FScopedPipelineCache PipelineCacheExclusive = GlobalPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
|
|
{
|
|
FScopedPipelineCache PipelineCacheExclusive = CurrentPrecompilingPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
if (PipelineCacheExclusive.Get() != VK_NULL_HANDLE)
|
|
{
|
|
//If CurrentOpenedPSOCache is still valid then it has never received OnShaderPipelineCachePrecompilationComplete callback, so we are not going to save it's content to disk as it's most likely is incomplete at this point
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool FVulkanPipelineStateCacheManager::Load(const TArray<FString>& CacheFilenames, FPipelineCache& Cache)
|
|
{
|
|
FScopedPipelineCache PipelineCacheExclusive = Cache.Get(EPipelineCacheAccess::Exclusive);
|
|
|
|
bool bResult = false;
|
|
// Try to load device cache first
|
|
for (const FString& CacheFilename : CacheFilenames)
|
|
{
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
FString BinaryCacheFilename = FVulkanPlatform::CreatePSOBinaryCacheFilename(Device, CacheFilename);
|
|
|
|
TArray<uint8> DeviceCache;
|
|
if (FFileHelper::LoadFileToArray(DeviceCache, *BinaryCacheFilename, FILEREAD_Silent))
|
|
{
|
|
if (FVulkanPlatform::PSOBinaryCacheMatches(Device, DeviceCache))
|
|
{
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
PipelineCacheInfo.initialDataSize = DeviceCache.Num();
|
|
PipelineCacheInfo.pInitialData = DeviceCache.GetData();
|
|
|
|
if (PipelineCacheExclusive.Get() == VK_NULL_HANDLE)
|
|
{
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheExclusive.Get()));
|
|
}
|
|
else
|
|
{
|
|
//TODO: assert on reopening the same cache twice?!
|
|
// if we have one already, create a temp one and merge it
|
|
VkPipelineCache TempPipelineCache;
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &TempPipelineCache));
|
|
VERIFYVULKANRESULT(VulkanRHI::vkMergePipelineCaches(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), 1, &TempPipelineCache));
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), TempPipelineCache, VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
|
|
double EndTime = FPlatformTime::Seconds();
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("FVulkanPipelineStateCacheManager: Loaded binary pipeline cache %s in %.3f seconds"), *BinaryCacheFilename, (float)(EndTime - BeginTime));
|
|
bResult = true;
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("FVulkanPipelineStateCacheManager: Mismatched binary pipeline cache %s"), *BinaryCacheFilename);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("FVulkanPipelineStateCacheManager: Binary pipeline cache '%s' not found."), *BinaryCacheFilename);
|
|
}
|
|
}
|
|
|
|
//TODO: how to load LRU cache as it will have info about PSOs from multiple caches
|
|
if(CVarEnableLRU.GetValueOnAnyThread() != 0)
|
|
{
|
|
for (const FString& CacheFilename : CacheFilenames)
|
|
{
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
FString LruCacheFilename = FVulkanPlatform::CreatePSOBinaryCacheFilename(Device, CacheFilename);
|
|
LruCacheFilename += TEXT(".lru");
|
|
LruCacheFilename.ReplaceInline(TEXT("TempScanVulkanPSO_"), TEXT("VulkanPSO_")); //lru files do not use the rename trick...but are still protected against corruption indirectly
|
|
|
|
TArray<uint8> MemFile;
|
|
if (FFileHelper::LoadFileToArray(MemFile, *LruCacheFilename, FILEREAD_Silent))
|
|
{
|
|
FMemoryReader Ar(MemFile);
|
|
|
|
FVulkanLRUCacheFile File;
|
|
bool Valid = File.Load(Ar);
|
|
if (!Valid)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Unable to load lru pipeline cache '%s'"), *LruCacheFilename);
|
|
bResult = false;
|
|
}
|
|
|
|
for (int32 Index = 0; Index < File.PipelineSizes.Num(); ++Index)
|
|
{
|
|
LRU2SizeList.Add(File.PipelineSizes[Index].ShaderHash, File.PipelineSizes[Index]);
|
|
}
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("Loaded %d LRU size entries for '%s'"), File.PipelineSizes.Num(), *LruCacheFilename);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Unable to load lru pipeline cache '%s'"), *LruCacheFilename);
|
|
bResult = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Lazily create the cache in case the load failed
|
|
if (PipelineCacheExclusive.Get() == VK_NULL_HANDLE)
|
|
{
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheExclusive.Get()));
|
|
}
|
|
|
|
return bResult;
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::InitAndLoad(const TArray<FString>& CacheFilenames)
|
|
{
|
|
if (GEnablePipelineCacheLoadCvar.GetValueOnAnyThread() == 0)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("Not loading pipeline cache per r.Vulkan.PipelineCacheLoad=0"));
|
|
}
|
|
else
|
|
{
|
|
if (GPipelineCacheFromShaderPipelineCacheCvar.GetValueOnAnyThread() == 0)
|
|
{
|
|
Load(CacheFilenames, GlobalPSOCache);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager will check for loading, etc when ShaderPipelineCache opens its file"));
|
|
|
|
#if PLATFORM_ANDROID && USE_ANDROID_FILE
|
|
// @todo Lumin: Use that GetPathForExternalWrite or something?
|
|
// BTW, this is totally bad. We should not platform ifdefs like this, rather the HAL needs to be extended!
|
|
extern FString GExternalFilePath;
|
|
CompiledPSOCacheTopFolderPath = GExternalFilePath / TEXT("VulkanProgramBinaryCache");
|
|
|
|
#else
|
|
CompiledPSOCacheTopFolderPath = FPaths::ProjectSavedDir() / TEXT("VulkanProgramBinaryCache");
|
|
#endif
|
|
|
|
// Remove entire ProgramBinaryCache folder if -ClearOpenGLBinaryProgramCache is specified on command line
|
|
if (FParse::Param(FCommandLine::Get(), TEXT("ClearVulkanBinaryProgramCache")))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager: Deleting binary program cache folder for -ClearVulkanBinaryProgramCache: %s"), *CompiledPSOCacheTopFolderPath);
|
|
FPlatformFileManager::Get().GetPlatformFile().DeleteDirectoryRecursively(*CompiledPSOCacheTopFolderPath);
|
|
}
|
|
|
|
OnShaderPipelineCacheOpenedDelegate = FShaderPipelineCache::GetCacheOpenedDelegate().AddRaw(this, &FVulkanPipelineStateCacheManager::OnShaderPipelineCacheOpened);
|
|
OnShaderPipelineCachePrecompilationCompleteDelegate = FShaderPipelineCache::GetPrecompilationCompleteDelegate().AddRaw(this, &FVulkanPipelineStateCacheManager::OnShaderPipelineCachePrecompilationComplete);
|
|
}
|
|
}
|
|
|
|
FScopedPipelineCache PipelineCacheExclusive = GlobalPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
// Lazily create the cache in case the load failed
|
|
if (PipelineCacheExclusive.Get() == VK_NULL_HANDLE)
|
|
{
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheExclusive.Get()));
|
|
}
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::Save(const FString& CacheFilename)
|
|
{
|
|
SavePSOCache(CacheFilename, GlobalPSOCache);
|
|
|
|
//TODO: Save LRU cache here
|
|
}
|
|
|
|
|
|
#if PLATFORM_ANDROID
|
|
static int32 GNumRemoteProgramCompileServices = 6;
|
|
static FAutoConsoleVariableRef CVarNumRemoteProgramCompileServices(
|
|
TEXT("Android.Vulkan.NumRemoteProgramCompileServices"),
|
|
GNumRemoteProgramCompileServices,
|
|
TEXT("The number of separate processes to make available to compile Vulkan PSOs.\n")
|
|
TEXT("0 to disable use of separate processes to precompile PSOs\n")
|
|
TEXT("valid range is 1-8 (4 default).")
|
|
,
|
|
ECVF_RenderThreadSafe | ECVF_ReadOnly
|
|
);
|
|
#endif
|
|
|
|
void FVulkanPipelineStateCacheManager::OnShaderPipelineCacheOpened(FString const& Name, EShaderPlatform Platform, uint32 Count, const FGuid& VersionGuid, FShaderPipelineCache::FShaderCachePrecompileContext& ShaderCachePrecompileContext)
|
|
{
|
|
//TODO: support reloading the same cache
|
|
if (CompiledPSOCaches.Contains(VersionGuid))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager::OnShaderPipelineCacheOpened attempts to load a cache that was already loaded before %s %s"), *Name, *VersionGuid.ToString());
|
|
return;
|
|
}
|
|
|
|
CurrentPrecompilingPSOCacheGuid = VersionGuid;
|
|
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager::OnShaderPipelineCacheOpened %s %d %s"), *Name, Count, *VersionGuid.ToString());
|
|
|
|
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
|
|
|
|
const VkPhysicalDeviceProperties& DeviceProperties = Device->GetDeviceProperties();
|
|
FString BinaryCacheAppendage = FString::Printf(TEXT(".%x.%x"), DeviceProperties.vendorID, DeviceProperties.deviceID);
|
|
|
|
CompiledPSOCacheFolderName = CompiledPSOCacheTopFolderPath / TEXT("VulkanPSO_") + VersionGuid.ToString() + BinaryCacheAppendage;
|
|
FString TempName = CompiledPSOCacheTopFolderPath / TEXT("TempScanVulkanPSO_") + VersionGuid.ToString() + BinaryCacheAppendage;
|
|
|
|
{
|
|
FScopedPipelineCache PipelineCacheExclusive = CurrentPrecompilingPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
checkf(PipelineCacheExclusive.Get() == VK_NULL_HANDLE, TEXT("Trying to open more than one shader pipeline cache"));
|
|
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheExclusive.Get()));
|
|
}
|
|
|
|
if (PlatformFile.FileExists(*CompiledPSOCacheFolderName))
|
|
{
|
|
// Try to move the file to a temporary filename before the scan, so we won't try to read it again if it's corrupted
|
|
PlatformFile.DeleteFile(*TempName);
|
|
PlatformFile.MoveFile(*TempName, *CompiledPSOCacheFolderName);
|
|
|
|
TArray<FString> CacheFilenames;
|
|
CacheFilenames.Add(TempName);
|
|
|
|
// Rename the file back after a successful scan.
|
|
if (Load(CacheFilenames, CurrentPrecompilingPSOCache))
|
|
{
|
|
bPrecompilingCacheLoadedFromFile = true;
|
|
PlatformFile.MoveFile(*CompiledPSOCacheFolderName, *TempName);
|
|
|
|
if (CVarPipelineLRUCacheEvictBinary.GetValueOnAnyThread())
|
|
{
|
|
bEvictImmediately = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager: PSO cache failed to load, deleting file: %s"), *CompiledPSOCacheFolderName);
|
|
FPlatformFileManager::Get().GetPlatformFile().DeleteFile(*CompiledPSOCacheFolderName);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager: %s does not exist."), *CompiledPSOCacheFolderName);
|
|
}
|
|
|
|
|
|
|
|
if (!bPrecompilingCacheLoadedFromFile || (bEvictImmediately && CVarPipelineLRUCacheEvictBinaryPreloadScreen.GetValueOnAnyThread()))
|
|
{
|
|
ShaderCachePrecompileContext.SetPrecompilationIsSlowTask();
|
|
#if PLATFORM_ANDROID
|
|
if (GNumRemoteProgramCompileServices)
|
|
{
|
|
FVulkanAndroidPlatform::StartRemoteCompileServices(GNumRemoteProgramCompileServices);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::OnShaderPipelineCachePrecompilationComplete(uint32 Count, double Seconds, const FShaderPipelineCache::FShaderCachePrecompileContext& ShaderCachePrecompileContext)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanPipelineStateCacheManager::OnShaderPipelineCachePrecompilationComplete"));
|
|
|
|
#if PLATFORM_ANDROID
|
|
if (FVulkanAndroidPlatform::AreRemoteCompileServicesActive())
|
|
{
|
|
FVulkanAndroidPlatform::StopRemoteCompileServices();
|
|
}
|
|
#endif
|
|
|
|
bEvictImmediately = false;
|
|
if (!bPrecompilingCacheLoadedFromFile)
|
|
{
|
|
//Save PSO cache only if it failed to load
|
|
SavePSOCache(CompiledPSOCacheFolderName, CurrentPrecompilingPSOCache);
|
|
}
|
|
|
|
if (!CompiledPSOCaches.Contains(CurrentPrecompilingPSOCacheGuid))
|
|
{
|
|
CompiledPSOCaches.Add(CurrentPrecompilingPSOCacheGuid);
|
|
|
|
//Merge this CurrentOpenedPSOCache with global PSOCache
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_VulkanPSOCacheMerge);
|
|
FScopedPipelineCache GlobalPipelineCacheExclusive = GlobalPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
FScopedPipelineCache CurrentPipelineCacheExclusive = CurrentPrecompilingPSOCache.Get(EPipelineCacheAccess::Exclusive);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkMergePipelineCaches(Device->GetInstanceHandle(), GlobalPipelineCacheExclusive.Get(), 1, &CurrentPipelineCacheExclusive.Get()));
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), CurrentPipelineCacheExclusive.Get(), VULKAN_CPU_ALLOCATOR);
|
|
|
|
CurrentPipelineCacheExclusive.Get() = VK_NULL_HANDLE;
|
|
}
|
|
|
|
bPrecompilingCacheLoadedFromFile = false;
|
|
CurrentPrecompilingPSOCacheGuid = FGuid();
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::SavePSOCache(const FString& CacheFilename, FPipelineCache& Cache)
|
|
{
|
|
FScopedPipelineCache PipelineCacheExclusive = Cache.Get(EPipelineCacheAccess::Exclusive);
|
|
FScopeLock Lock1(&GraphicsPSOLockedCS); //TODO: Do we really need this here?!
|
|
FScopeLock Lock2(&LRUCS);
|
|
|
|
|
|
// First save Device Cache
|
|
size_t Size = 0;
|
|
VERIFYVULKANRESULT(VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), &Size, nullptr));
|
|
// 16 is HeaderSize + HeaderVersion
|
|
if (Size >= 16 + VK_UUID_SIZE)
|
|
{
|
|
TArray<uint8> DeviceCache;
|
|
DeviceCache.AddUninitialized(Size);
|
|
VkResult Result = VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), &Size, DeviceCache.GetData());
|
|
if (Result == VK_SUCCESS)
|
|
{
|
|
FString BinaryCacheFilename = FVulkanPlatform::CreatePSOBinaryCacheFilename(Device, CacheFilename);
|
|
|
|
if (FFileHelper::SaveArrayToFile(DeviceCache, *BinaryCacheFilename))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("FVulkanPipelineStateCacheManager: Saved device pipeline cache file '%s', %d bytes"), *BinaryCacheFilename, DeviceCache.Num());
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("FVulkanPipelineStateCacheManager: Failed to save device pipeline cache file '%s', %d bytes"), *BinaryCacheFilename, DeviceCache.Num());
|
|
}
|
|
}
|
|
else if (Result == VK_INCOMPLETE || Result == VK_ERROR_OUT_OF_HOST_MEMORY)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Failed to get Vulkan pipeline cache data. Error %d, %d bytes"), Result, Size);
|
|
|
|
//TODO: Resave it when we shutdown the manager?!
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), VULKAN_CPU_ALLOCATOR);
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheExclusive.Get()));
|
|
}
|
|
else
|
|
{
|
|
VERIFYVULKANRESULT(Result);
|
|
}
|
|
}
|
|
|
|
if (CVarEnableLRU.GetValueOnAnyThread() != 0)
|
|
{
|
|
// LRU cache file
|
|
TArray<uint8> MemFile;
|
|
FMemoryWriter Ar(MemFile);
|
|
FVulkanLRUCacheFile File;
|
|
File.Header.Version = FVulkanLRUCacheFile::LRU_CACHE_VERSION;
|
|
File.Header.SizeOfPipelineSizes = (int32)sizeof(FVulkanPipelineSize);
|
|
LRU2SizeList.GenerateValueArray(File.PipelineSizes);
|
|
File.Save(Ar);
|
|
|
|
FString LruCacheFilename = FVulkanPlatform::CreatePSOBinaryCacheFilename(Device, CacheFilename);
|
|
LruCacheFilename += TEXT(".lru");
|
|
|
|
if (FFileHelper::SaveArrayToFile(MemFile, *LruCacheFilename))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Display, TEXT("FVulkanPipelineStateCacheManager: Saved pipeline lru pipeline cache file '%s', %d hashes, %d bytes"), *LruCacheFilename, LRU2SizeList.Num(), MemFile.Num());
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("FVulkanPipelineStateCacheManager: Failed to save pipeline lru pipeline cache file '%s', %d hashes, %d bytes"), *LruCacheFilename, LRU2SizeList.Num(), MemFile.Num());
|
|
}
|
|
}
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FBlendAttachment& Attachment)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Attachment.bBlend;
|
|
Ar << Attachment.ColorBlendOp;
|
|
Ar << Attachment.SrcColorBlendFactor;
|
|
Ar << Attachment.DstColorBlendFactor;
|
|
Ar << Attachment.AlphaBlendOp;
|
|
Ar << Attachment.SrcAlphaBlendFactor;
|
|
Ar << Attachment.DstAlphaBlendFactor;
|
|
Ar << Attachment.ColorWriteMask;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FBlendAttachment::ReadFrom(const VkPipelineColorBlendAttachmentState& InState)
|
|
{
|
|
bBlend = InState.blendEnable != VK_FALSE;
|
|
ColorBlendOp = (uint8)InState.colorBlendOp;
|
|
SrcColorBlendFactor = (uint8)InState.srcColorBlendFactor;
|
|
DstColorBlendFactor = (uint8)InState.dstColorBlendFactor;
|
|
AlphaBlendOp = (uint8)InState.alphaBlendOp;
|
|
SrcAlphaBlendFactor = (uint8)InState.srcAlphaBlendFactor;
|
|
DstAlphaBlendFactor = (uint8)InState.dstAlphaBlendFactor;
|
|
ColorWriteMask = (uint8)InState.colorWriteMask;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FBlendAttachment::WriteInto(VkPipelineColorBlendAttachmentState& Out) const
|
|
{
|
|
Out.blendEnable = bBlend ? VK_TRUE : VK_FALSE;
|
|
Out.colorBlendOp = (VkBlendOp)ColorBlendOp;
|
|
Out.srcColorBlendFactor = (VkBlendFactor)SrcColorBlendFactor;
|
|
Out.dstColorBlendFactor = (VkBlendFactor)DstColorBlendFactor;
|
|
Out.alphaBlendOp = (VkBlendOp)AlphaBlendOp;
|
|
Out.srcAlphaBlendFactor = (VkBlendFactor)SrcAlphaBlendFactor;
|
|
Out.dstAlphaBlendFactor = (VkBlendFactor)DstAlphaBlendFactor;
|
|
Out.colorWriteMask = (VkColorComponentFlags)ColorWriteMask;
|
|
}
|
|
|
|
|
|
void FDescriptorSetLayoutBinding::ReadFrom(const VkDescriptorSetLayoutBinding& InState)
|
|
{
|
|
Binding = InState.binding;
|
|
ensure(InState.descriptorCount == 1);
|
|
//DescriptorCount = InState.descriptorCount;
|
|
DescriptorType = InState.descriptorType;
|
|
StageFlags = InState.stageFlags;
|
|
}
|
|
|
|
void FDescriptorSetLayoutBinding::WriteInto(VkDescriptorSetLayoutBinding& Out) const
|
|
{
|
|
Out.binding = Binding;
|
|
//Out.descriptorCount = DescriptorCount;
|
|
Out.descriptorType = (VkDescriptorType)DescriptorType;
|
|
Out.stageFlags = StageFlags;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FDescriptorSetLayoutBinding& Binding)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Binding.Binding;
|
|
//Ar << Binding.DescriptorCount;
|
|
Ar << Binding.DescriptorType;
|
|
Ar << Binding.StageFlags;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FVertexBinding::ReadFrom(const VkVertexInputBindingDescription& InState)
|
|
{
|
|
Binding = InState.binding;
|
|
InputRate = (uint16)InState.inputRate;
|
|
Stride = InState.stride;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FVertexBinding::WriteInto(VkVertexInputBindingDescription& Out) const
|
|
{
|
|
Out.binding = Binding;
|
|
Out.inputRate = (VkVertexInputRate)InputRate;
|
|
Out.stride = Stride;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FVertexBinding& Binding)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Binding.Stride;
|
|
Ar << Binding.Binding;
|
|
Ar << Binding.InputRate;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FVertexAttribute::ReadFrom(const VkVertexInputAttributeDescription& InState)
|
|
{
|
|
Binding = InState.binding;
|
|
Format = (uint32)InState.format;
|
|
Location = InState.location;
|
|
Offset = InState.offset;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FVertexAttribute::WriteInto(VkVertexInputAttributeDescription& Out) const
|
|
{
|
|
Out.binding = Binding;
|
|
Out.format = (VkFormat)Format;
|
|
Out.location = Location;
|
|
Out.offset = Offset;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FVertexAttribute& Attribute)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Attribute.Location;
|
|
Ar << Attribute.Binding;
|
|
Ar << Attribute.Format;
|
|
Ar << Attribute.Offset;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRasterizer::ReadFrom(const VkPipelineRasterizationStateCreateInfo& InState)
|
|
{
|
|
PolygonMode = InState.polygonMode;
|
|
CullMode = InState.cullMode;
|
|
DepthBiasSlopeScale = InState.depthBiasSlopeFactor;
|
|
DepthBiasConstantFactor = InState.depthBiasConstantFactor;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRasterizer::WriteInto(VkPipelineRasterizationStateCreateInfo& Out) const
|
|
{
|
|
Out.polygonMode = (VkPolygonMode)PolygonMode;
|
|
Out.cullMode = (VkCullModeFlags)CullMode;
|
|
Out.frontFace = VK_FRONT_FACE_CLOCKWISE;
|
|
Out.depthClampEnable = VK_FALSE;
|
|
Out.depthBiasEnable = DepthBiasConstantFactor != 0.0f ? VK_TRUE : VK_FALSE;
|
|
Out.rasterizerDiscardEnable = VK_FALSE;
|
|
Out.depthBiasSlopeFactor = DepthBiasSlopeScale;
|
|
Out.depthBiasConstantFactor = DepthBiasConstantFactor;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRasterizer& Rasterizer)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Rasterizer.PolygonMode;
|
|
Ar << Rasterizer.CullMode;
|
|
Ar << Rasterizer.DepthBiasSlopeScale;
|
|
Ar << Rasterizer.DepthBiasConstantFactor;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FDepthStencil::ReadFrom(const VkPipelineDepthStencilStateCreateInfo& InState)
|
|
{
|
|
DepthCompareOp = (uint8)InState.depthCompareOp;
|
|
bDepthTestEnable = InState.depthTestEnable != VK_FALSE;
|
|
bDepthWriteEnable = InState.depthWriteEnable != VK_FALSE;
|
|
bDepthBoundsTestEnable = InState.depthBoundsTestEnable != VK_FALSE;
|
|
bStencilTestEnable = InState.stencilTestEnable != VK_FALSE;
|
|
FrontFailOp = (uint8)InState.front.failOp;
|
|
FrontPassOp = (uint8)InState.front.passOp;
|
|
FrontDepthFailOp = (uint8)InState.front.depthFailOp;
|
|
FrontCompareOp = (uint8)InState.front.compareOp;
|
|
FrontCompareMask = (uint8)InState.front.compareMask;
|
|
FrontWriteMask = InState.front.writeMask;
|
|
FrontReference = InState.front.reference;
|
|
BackFailOp = (uint8)InState.back.failOp;
|
|
BackPassOp = (uint8)InState.back.passOp;
|
|
BackDepthFailOp = (uint8)InState.back.depthFailOp;
|
|
BackCompareOp = (uint8)InState.back.compareOp;
|
|
BackCompareMask = (uint8)InState.back.compareMask;
|
|
BackWriteMask = InState.back.writeMask;
|
|
BackReference = InState.back.reference;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FDepthStencil::WriteInto(VkPipelineDepthStencilStateCreateInfo& Out) const
|
|
{
|
|
Out.depthCompareOp = (VkCompareOp)DepthCompareOp;
|
|
Out.depthTestEnable = bDepthTestEnable;
|
|
Out.depthWriteEnable = bDepthWriteEnable;
|
|
Out.depthBoundsTestEnable = bDepthBoundsTestEnable;
|
|
Out.stencilTestEnable = bStencilTestEnable;
|
|
Out.front.failOp = (VkStencilOp)FrontFailOp;
|
|
Out.front.passOp = (VkStencilOp)FrontPassOp;
|
|
Out.front.depthFailOp = (VkStencilOp)FrontDepthFailOp;
|
|
Out.front.compareOp = (VkCompareOp)FrontCompareOp;
|
|
Out.front.compareMask = FrontCompareMask;
|
|
Out.front.writeMask = FrontWriteMask;
|
|
Out.front.reference = FrontReference;
|
|
Out.back.failOp = (VkStencilOp)BackFailOp;
|
|
Out.back.passOp = (VkStencilOp)BackPassOp;
|
|
Out.back.depthFailOp = (VkStencilOp)BackDepthFailOp;
|
|
Out.back.compareOp = (VkCompareOp)BackCompareOp;
|
|
Out.back.writeMask = BackWriteMask;
|
|
Out.back.compareMask = BackCompareMask;
|
|
Out.back.reference = BackReference;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FDepthStencil& DepthStencil)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << DepthStencil.DepthCompareOp;
|
|
Ar << DepthStencil.bDepthTestEnable;
|
|
Ar << DepthStencil.bDepthWriteEnable;
|
|
Ar << DepthStencil.bDepthBoundsTestEnable;
|
|
Ar << DepthStencil.bStencilTestEnable;
|
|
Ar << DepthStencil.FrontFailOp;
|
|
Ar << DepthStencil.FrontPassOp;
|
|
Ar << DepthStencil.FrontDepthFailOp;
|
|
Ar << DepthStencil.FrontCompareOp;
|
|
Ar << DepthStencil.FrontCompareMask;
|
|
Ar << DepthStencil.FrontWriteMask;
|
|
Ar << DepthStencil.FrontReference;
|
|
Ar << DepthStencil.BackFailOp;
|
|
Ar << DepthStencil.BackPassOp;
|
|
Ar << DepthStencil.BackDepthFailOp;
|
|
Ar << DepthStencil.BackCompareOp;
|
|
Ar << DepthStencil.BackCompareMask;
|
|
Ar << DepthStencil.BackWriteMask;
|
|
Ar << DepthStencil.BackReference;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FAttachmentRef::ReadFrom(const VkAttachmentReference& InState)
|
|
{
|
|
Attachment = InState.attachment;
|
|
Layout = (uint64)InState.layout;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FAttachmentRef::WriteInto(VkAttachmentReference& Out) const
|
|
{
|
|
Out.attachment = Attachment;
|
|
Out.layout = (VkImageLayout)Layout;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRenderTargets::FAttachmentRef& AttachmentRef)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << AttachmentRef.Attachment;
|
|
Ar << AttachmentRef.Layout;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FStencilAttachmentRef::ReadFrom(const VkAttachmentReferenceStencilLayout& InState)
|
|
{
|
|
Layout = (uint64)InState.stencilLayout;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FStencilAttachmentRef::WriteInto(VkAttachmentReferenceStencilLayout& Out) const
|
|
{
|
|
Out.stencilLayout = (VkImageLayout)Layout;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRenderTargets::FStencilAttachmentRef& AttachmentRef)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << AttachmentRef.Layout;
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FAttachmentDesc::ReadFrom(const VkAttachmentDescription &InState)
|
|
{
|
|
Format = (uint32)InState.format;
|
|
Flags = (uint8)InState.flags;
|
|
Samples = (uint8)InState.samples;
|
|
LoadOp = (uint8)InState.loadOp;
|
|
StoreOp = (uint8)InState.storeOp;
|
|
StencilLoadOp = (uint8)InState.stencilLoadOp;
|
|
StencilStoreOp = (uint8)InState.stencilStoreOp;
|
|
InitialLayout = (uint64)InState.initialLayout;
|
|
FinalLayout = (uint64)InState.finalLayout;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FAttachmentDesc::WriteInto(VkAttachmentDescription& Out) const
|
|
{
|
|
Out.format = (VkFormat)Format;
|
|
Out.flags = Flags;
|
|
Out.samples = (VkSampleCountFlagBits)Samples;
|
|
Out.loadOp = (VkAttachmentLoadOp)LoadOp;
|
|
Out.storeOp = (VkAttachmentStoreOp)StoreOp;
|
|
Out.stencilLoadOp = (VkAttachmentLoadOp)StencilLoadOp;
|
|
Out.stencilStoreOp = (VkAttachmentStoreOp)StencilStoreOp;
|
|
Out.initialLayout = (VkImageLayout)InitialLayout;
|
|
Out.finalLayout = (VkImageLayout)FinalLayout;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRenderTargets::FAttachmentDesc& AttachmentDesc)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << AttachmentDesc.Format;
|
|
Ar << AttachmentDesc.Flags;
|
|
Ar << AttachmentDesc.Samples;
|
|
Ar << AttachmentDesc.LoadOp;
|
|
Ar << AttachmentDesc.StoreOp;
|
|
Ar << AttachmentDesc.StencilLoadOp;
|
|
Ar << AttachmentDesc.StencilStoreOp;
|
|
Ar << AttachmentDesc.InitialLayout;
|
|
Ar << AttachmentDesc.FinalLayout;
|
|
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FStencilAttachmentDesc::ReadFrom(const VkAttachmentDescriptionStencilLayout& InState)
|
|
{
|
|
InitialLayout = (uint64)InState.stencilInitialLayout;
|
|
FinalLayout = (uint64)InState.stencilFinalLayout;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::FStencilAttachmentDesc::WriteInto(VkAttachmentDescriptionStencilLayout& Out) const
|
|
{
|
|
Out.stencilInitialLayout = (VkImageLayout)InitialLayout;
|
|
Out.stencilFinalLayout = (VkImageLayout)FinalLayout;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRenderTargets::FStencilAttachmentDesc& StencilAttachmentDesc)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << StencilAttachmentDesc.InitialLayout;
|
|
Ar << StencilAttachmentDesc.FinalLayout;
|
|
|
|
return Ar;
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::ReadFrom(const FVulkanRenderTargetLayout& RTLayout)
|
|
{
|
|
NumAttachments = RTLayout.NumAttachmentDescriptions;
|
|
NumColorAttachments = RTLayout.NumColorAttachments;
|
|
|
|
bHasDepthStencil = RTLayout.bHasDepthStencil != 0;
|
|
bHasResolveAttachments = RTLayout.bHasResolveAttachments != 0;
|
|
bHasDepthStencilResolve = RTLayout.bHasDepthStencilResolve != 0;
|
|
bHasFragmentDensityAttachment = RTLayout.bHasFragmentDensityAttachment != 0;
|
|
NumUsedClearValues = RTLayout.NumUsedClearValues;
|
|
|
|
RenderPassCompatibleHash = RTLayout.GetRenderPassCompatibleHash();
|
|
|
|
Extent3D.X = RTLayout.Extent.Extent3D.width;
|
|
Extent3D.Y = RTLayout.Extent.Extent3D.height;
|
|
Extent3D.Z = RTLayout.Extent.Extent3D.depth;
|
|
|
|
auto CopyAttachmentRefs = [&](TArray<FGfxPipelineDesc::FRenderTargets::FAttachmentRef>& Dest, const VkAttachmentReference* Source, uint32 Count)
|
|
{
|
|
for (uint32 Index = 0; Index < Count; ++Index)
|
|
{
|
|
FGfxPipelineDesc::FRenderTargets::FAttachmentRef& New = Dest.AddDefaulted_GetRef();
|
|
New.ReadFrom(Source[Index]);
|
|
}
|
|
};
|
|
CopyAttachmentRefs(ColorAttachments, RTLayout.ColorReferences, UE_ARRAY_COUNT(RTLayout.ColorReferences));
|
|
CopyAttachmentRefs(ResolveAttachments, RTLayout.ResolveReferences, UE_ARRAY_COUNT(RTLayout.ResolveReferences));
|
|
Depth.ReadFrom(RTLayout.DepthReference);
|
|
Stencil.ReadFrom(RTLayout.StencilReference);
|
|
FragmentDensity.ReadFrom(RTLayout.FragmentDensityReference);
|
|
|
|
Descriptions.AddZeroed(UE_ARRAY_COUNT(RTLayout.Desc));
|
|
for (int32 Index = 0; Index < UE_ARRAY_COUNT(RTLayout.Desc); ++Index)
|
|
{
|
|
Descriptions[Index].ReadFrom(RTLayout.Desc[Index]);
|
|
}
|
|
StencilDescription.ReadFrom(RTLayout.StencilDesc);
|
|
}
|
|
|
|
void FGfxPipelineDesc::FRenderTargets::WriteInto(FVulkanRenderTargetLayout& Out) const
|
|
{
|
|
Out.NumAttachmentDescriptions = NumAttachments;
|
|
Out.NumColorAttachments = NumColorAttachments;
|
|
|
|
Out.bHasDepthStencil = bHasDepthStencil;
|
|
Out.bHasResolveAttachments = bHasResolveAttachments;
|
|
Out.bHasDepthStencilResolve = bHasDepthStencilResolve;
|
|
Out.bHasFragmentDensityAttachment = bHasFragmentDensityAttachment;
|
|
Out.NumUsedClearValues = NumUsedClearValues;
|
|
|
|
ensure(0);
|
|
Out.RenderPassCompatibleHash = RenderPassCompatibleHash;
|
|
|
|
Out.Extent.Extent3D.width = Extent3D.X;
|
|
Out.Extent.Extent3D.height = Extent3D.Y;
|
|
Out.Extent.Extent3D.depth = Extent3D.Z;
|
|
|
|
auto CopyAttachmentRefs = [&](const TArray<FGfxPipelineDesc::FRenderTargets::FAttachmentRef>& Source, VkAttachmentReference* Dest, uint32 Count)
|
|
{
|
|
for (uint32 Index = 0; Index < Count; ++Index, ++Dest)
|
|
{
|
|
Source[Index].WriteInto(*Dest);
|
|
}
|
|
};
|
|
CopyAttachmentRefs(ColorAttachments, Out.ColorReferences, UE_ARRAY_COUNT(Out.ColorReferences));
|
|
CopyAttachmentRefs(ResolveAttachments, Out.ResolveReferences, UE_ARRAY_COUNT(Out.ResolveReferences));
|
|
Depth.WriteInto(Out.DepthReference);
|
|
Stencil.WriteInto(Out.StencilReference);
|
|
FragmentDensity.WriteInto(Out.FragmentDensityReference);
|
|
|
|
|
|
for (int32 Index = 0; Index < UE_ARRAY_COUNT(Out.Desc); ++Index)
|
|
{
|
|
Descriptions[Index].WriteInto(Out.Desc[Index]);
|
|
}
|
|
StencilDescription.WriteInto(Out.StencilDesc);
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc::FRenderTargets& RTs)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << RTs.NumAttachments;
|
|
Ar << RTs.NumColorAttachments;
|
|
Ar << RTs.NumUsedClearValues;
|
|
Ar << RTs.ColorAttachments;
|
|
Ar << RTs.ResolveAttachments;
|
|
Ar << RTs.Depth;
|
|
Ar << RTs.Stencil;
|
|
Ar << RTs.FragmentDensity;
|
|
|
|
Ar << RTs.Descriptions;
|
|
Ar << RTs.StencilDescription;
|
|
|
|
Ar << RTs.bHasDepthStencil;
|
|
Ar << RTs.bHasResolveAttachments;
|
|
Ar << RTs.bHasDepthStencilResolve;
|
|
Ar << RTs.RenderPassCompatibleHash;
|
|
Ar << RTs.Extent3D;
|
|
|
|
return Ar;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc& Entry)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Entry.VertexInputKey;
|
|
Ar << Entry.RasterizationSamples;
|
|
Ar << Entry.ControlPoints;
|
|
Ar << Entry.Topology;
|
|
|
|
Ar << Entry.ColorAttachmentStates;
|
|
|
|
Ar << Entry.DescriptorSetLayoutBindings;
|
|
|
|
Ar << Entry.VertexBindings;
|
|
Ar << Entry.VertexAttributes;
|
|
Ar << Entry.Rasterizer;
|
|
|
|
Ar << Entry.DepthStencil;
|
|
|
|
#if VULKAN_USE_SHADERKEYS
|
|
for (uint64& ShaderKey : Entry.ShaderKeys)
|
|
{
|
|
Ar << ShaderKey;
|
|
}
|
|
#else
|
|
for (int32 Index = 0; Index < UE_ARRAY_COUNT(Entry.ShaderHashes.Stages); ++Index)
|
|
{
|
|
Ar << Entry.ShaderHashes.Stages[Index];
|
|
}
|
|
#endif
|
|
Ar << Entry.RenderTargets;
|
|
|
|
uint8 ShadingRate = static_cast<uint8>(Entry.ShadingRate);
|
|
uint8 Combiner = static_cast<uint8>(Entry.Combiner);
|
|
|
|
Ar << ShadingRate;
|
|
Ar << Combiner;
|
|
|
|
Ar << Entry.UseAlphaToCoverage;
|
|
|
|
return Ar;
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FGfxPipelineDesc* Entry)
|
|
{
|
|
return Ar << (*Entry);
|
|
}
|
|
|
|
FArchive& operator << (FArchive& Ar, FVulkanPipelineSize& PS)
|
|
{
|
|
Ar << PS.ShaderHash;
|
|
Ar << PS.PipelineSize;
|
|
|
|
return Ar;
|
|
}
|
|
|
|
|
|
FVulkanPSOKey FGfxPipelineDesc::CreateKey2() const
|
|
{
|
|
FVulkanPSOKey Result;
|
|
Result.GenerateFromArchive([this](FArchive& Ar)
|
|
{
|
|
Ar << const_cast<FGfxPipelineDesc&>(*this);
|
|
});
|
|
return Result;
|
|
}
|
|
|
|
// Map Unreal VRS combiner operation enums to Vulkan enums.
|
|
static const TMap<uint8, VkFragmentShadingRateCombinerOpKHR> FragmentCombinerOpMap
|
|
{
|
|
{ VRSRB_Passthrough, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR },
|
|
{ VRSRB_Override, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR },
|
|
{ VRSRB_Min, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR },
|
|
{ VRSRB_Max, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR },
|
|
{ VRSRB_Sum, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR }, // No concept of Sum in Vulkan - fall back to max.
|
|
// @todo: Add "VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR"?
|
|
};
|
|
|
|
static FString GfxShaderHashesToString(FVulkanShader* Shaders[ShaderStage::NumGraphicsStages])
|
|
{
|
|
FString ShaderHashes = "";
|
|
if (Shaders[ShaderStage::Vertex])
|
|
{
|
|
ShaderHashes += TEXT("VS: ") + static_cast<FVulkanVertexShader*>(Shaders[ShaderStage::Vertex])->GetHash().ToString() + TEXT("\n");
|
|
}
|
|
if (Shaders[ShaderStage::Pixel] )
|
|
{
|
|
ShaderHashes += TEXT("PS: ") + static_cast<FVulkanPixelShader*>(Shaders[ShaderStage::Pixel])->GetHash().ToString() + TEXT("\n");
|
|
}
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
if (Shaders[ShaderStage::Mesh])
|
|
{
|
|
ShaderHashes += TEXT("MS: ") + static_cast<FVulkanMeshShader*>(Shaders[ShaderStage::Mesh])->GetHash().ToString() + TEXT("\n");
|
|
}
|
|
if (Shaders[ShaderStage::Task])
|
|
{
|
|
ShaderHashes += TEXT("AS: ") + static_cast<FVulkanTaskShader*>(Shaders[ShaderStage::Task])->GetHash().ToString() + TEXT("\n");
|
|
}
|
|
#endif
|
|
#if VULKAN_SUPPORTS_GEOMETRY_SHADERS
|
|
if (Shaders[ShaderStage::Geometry])
|
|
{
|
|
ShaderHashes += TEXT("GS: ") + static_cast<FVulkanGeometryShader*>(Shaders[ShaderStage::Geometry])->GetHash().ToString() + TEXT("\n");
|
|
}
|
|
#endif
|
|
return ShaderHashes;
|
|
}
|
|
|
|
bool FVulkanPipelineStateCacheManager::CreateGfxPipelineFromEntry(FVulkanRHIGraphicsPipelineState* PSO, FVulkanShader* Shaders[ShaderStage::NumGraphicsStages], FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType PSOCompileType)
|
|
{
|
|
VkPipeline* Pipeline = &PSO->VulkanPipeline;
|
|
const FGfxPipelineDesc* GfxEntry = &PSO->Desc;
|
|
if (Shaders[ShaderStage::Pixel] == nullptr && !FVulkanPlatform::SupportsNullPixelShader())
|
|
{
|
|
Shaders[ShaderStage::Pixel] = ResourceCast(TShaderMapRef<FNULLPS>(GetGlobalShaderMap(GMaxRHIFeatureLevel)).GetPixelShader());
|
|
}
|
|
|
|
TRefCountPtr<FVulkanShaderModule> ShaderModules[ShaderStage::NumGraphicsStages];
|
|
|
|
PSO->GetOrCreateShaderModules(ShaderModules, Shaders);
|
|
|
|
// Pipeline
|
|
VkGraphicsPipelineCreateInfo PipelineInfo;
|
|
ZeroVulkanStruct(PipelineInfo, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
|
|
PipelineInfo.layout = PSO->Layout->GetPipelineLayout();
|
|
|
|
// Color Blend
|
|
VkPipelineColorBlendStateCreateInfo CBInfo;
|
|
ZeroVulkanStruct(CBInfo, VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO);
|
|
CBInfo.attachmentCount = GfxEntry->ColorAttachmentStates.Num();
|
|
VkPipelineColorBlendAttachmentState BlendStates[MaxSimultaneousRenderTargets];
|
|
FMemory::Memzero(BlendStates);
|
|
uint32 ColorWriteMask = 0xffffffff;
|
|
if(Shaders[ShaderStage::Pixel])
|
|
{
|
|
ColorWriteMask = Shaders[ShaderStage::Pixel]->CodeHeader.InOutMask;
|
|
}
|
|
for (int32 Index = 0; Index < GfxEntry->ColorAttachmentStates.Num(); ++Index)
|
|
{
|
|
GfxEntry->ColorAttachmentStates[Index].WriteInto(BlendStates[Index]);
|
|
|
|
if(0 == (ColorWriteMask & 1)) //clear write mask of rendertargets not written by pixelshader.
|
|
{
|
|
BlendStates[Index].colorWriteMask = 0;
|
|
}
|
|
ColorWriteMask >>= 1;
|
|
}
|
|
CBInfo.pAttachments = BlendStates;
|
|
CBInfo.blendConstants[0] = 1.0f;
|
|
CBInfo.blendConstants[1] = 1.0f;
|
|
CBInfo.blendConstants[2] = 1.0f;
|
|
CBInfo.blendConstants[3] = 1.0f;
|
|
|
|
// Viewport
|
|
VkPipelineViewportStateCreateInfo VPInfo;
|
|
ZeroVulkanStruct(VPInfo, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO);
|
|
VPInfo.viewportCount = 1;
|
|
VPInfo.scissorCount = 1;
|
|
|
|
// Multisample
|
|
VkPipelineMultisampleStateCreateInfo MSInfo;
|
|
ZeroVulkanStruct(MSInfo, VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO);
|
|
MSInfo.rasterizationSamples = (VkSampleCountFlagBits)FMath::Max<uint16>(1u, GfxEntry->RasterizationSamples);
|
|
MSInfo.alphaToCoverageEnable = GfxEntry->UseAlphaToCoverage;
|
|
|
|
VkPipelineShaderStageCreateInfo ShaderStages[ShaderStage::NumGraphicsStages];
|
|
FMemory::Memzero(ShaderStages);
|
|
PipelineInfo.stageCount = 0;
|
|
PipelineInfo.pStages = ShaderStages;
|
|
// main_00000000_00000000
|
|
ANSICHAR EntryPoints[ShaderStage::NumGraphicsStages][24];
|
|
VkPipelineShaderStageRequiredSubgroupSizeCreateInfo RequiredSubgroupSizeCreateInfo[ShaderStage::NumGraphicsStages];
|
|
for (int32 ShaderStage = 0; ShaderStage < ShaderStage::NumGraphicsStages; ++ShaderStage)
|
|
{
|
|
if (!ShaderModules[ShaderStage].IsValid() || (Shaders[ShaderStage] == nullptr))
|
|
{
|
|
continue;
|
|
}
|
|
const ShaderStage::EStage CurrStage = (ShaderStage::EStage)ShaderStage;
|
|
|
|
ShaderStages[PipelineInfo.stageCount].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
const VkShaderStageFlagBits Stage = UEFrequencyToVKStageBit(ShaderStage::GetFrequencyForGfxStage(CurrStage));
|
|
ShaderStages[PipelineInfo.stageCount].stage = Stage;
|
|
ShaderStages[PipelineInfo.stageCount].module = ShaderModules[CurrStage]->GetVkShaderModule();
|
|
Shaders[ShaderStage]->GetEntryPoint(EntryPoints[PipelineInfo.stageCount], 24);
|
|
ShaderStages[PipelineInfo.stageCount].pName = EntryPoints[PipelineInfo.stageCount];
|
|
|
|
if (Device->GetOptionalExtensions().HasEXTSubgroupSizeControl)
|
|
{
|
|
const FVulkanShaderHeader& ShaderHeader = Shaders[ShaderStage]->GetCodeHeader();
|
|
if (ShaderHeader.WaveSize > 0)
|
|
{
|
|
// Check if supported by this stage and Check if requested size is supported
|
|
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& SubgroupSizeControlProperties = Device->GetOptionalExtensionProperties().SubgroupSizeControlProperties;
|
|
const bool bSupportedStage = (VKHasAllFlags(SubgroupSizeControlProperties.requiredSubgroupSizeStages, Stage));
|
|
const bool bSupportedSize = ((ShaderHeader.WaveSize >= SubgroupSizeControlProperties.minSubgroupSize) && (ShaderHeader.WaveSize <= SubgroupSizeControlProperties.maxSubgroupSize));
|
|
if (bSupportedStage && bSupportedSize)
|
|
{
|
|
ZeroVulkanStruct(RequiredSubgroupSizeCreateInfo[PipelineInfo.stageCount], VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
|
|
RequiredSubgroupSizeCreateInfo[PipelineInfo.stageCount].requiredSubgroupSize = ShaderHeader.WaveSize;
|
|
ShaderStages[PipelineInfo.stageCount].pNext = &RequiredSubgroupSizeCreateInfo[PipelineInfo.stageCount];
|
|
}
|
|
}
|
|
}
|
|
|
|
PipelineInfo.stageCount++;
|
|
}
|
|
|
|
check(PipelineInfo.stageCount != 0);
|
|
|
|
// Vertex Input. The structure is mandatory even without vertex attributes.
|
|
VkPipelineVertexInputStateCreateInfo VBInfo;
|
|
ZeroVulkanStruct(VBInfo, VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO);
|
|
TArray<VkVertexInputBindingDescription, TInlineAllocator<32>> VBBindings;
|
|
for (const FGfxPipelineDesc::FVertexBinding& SourceBinding : GfxEntry->VertexBindings)
|
|
{
|
|
VkVertexInputBindingDescription& Binding = VBBindings.AddDefaulted_GetRef();
|
|
SourceBinding.WriteInto(Binding);
|
|
}
|
|
VBInfo.vertexBindingDescriptionCount = VBBindings.Num();
|
|
VBInfo.pVertexBindingDescriptions = VBBindings.GetData();
|
|
TArray<VkVertexInputAttributeDescription, TInlineAllocator<32>> VBAttributes;
|
|
for (const FGfxPipelineDesc::FVertexAttribute& SourceAttr : GfxEntry->VertexAttributes)
|
|
{
|
|
VkVertexInputAttributeDescription& Attr = VBAttributes.AddDefaulted_GetRef();
|
|
SourceAttr.WriteInto(Attr);
|
|
}
|
|
VBInfo.vertexAttributeDescriptionCount = VBAttributes.Num();
|
|
VBInfo.pVertexAttributeDescriptions = VBAttributes.GetData();
|
|
PipelineInfo.pVertexInputState = &VBInfo;
|
|
|
|
PipelineInfo.pColorBlendState = &CBInfo;
|
|
PipelineInfo.pMultisampleState = &MSInfo;
|
|
PipelineInfo.pViewportState = &VPInfo;
|
|
|
|
PipelineInfo.renderPass = PSO->RenderPass->GetHandle();
|
|
PipelineInfo.subpass = GfxEntry->SubpassIndex;
|
|
|
|
VkPipelineInputAssemblyStateCreateInfo InputAssembly;
|
|
ZeroVulkanStruct(InputAssembly, VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO);
|
|
InputAssembly.topology = (VkPrimitiveTopology)GfxEntry->Topology;
|
|
|
|
PipelineInfo.pInputAssemblyState = &InputAssembly;
|
|
|
|
VkPipelineRasterizationStateCreateInfo RasterizerState;
|
|
FVulkanRasterizerState::ResetCreateInfo(RasterizerState);
|
|
GfxEntry->Rasterizer.WriteInto(RasterizerState);
|
|
|
|
VkPipelineDepthStencilStateCreateInfo DepthStencilState;
|
|
ZeroVulkanStruct(DepthStencilState, VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO);
|
|
GfxEntry->DepthStencil.WriteInto(DepthStencilState);
|
|
|
|
PipelineInfo.pRasterizationState = &RasterizerState;
|
|
PipelineInfo.pDepthStencilState = &DepthStencilState;
|
|
|
|
VkPipelineDynamicStateCreateInfo DynamicState;
|
|
ZeroVulkanStruct(DynamicState, VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO);
|
|
VkDynamicState DynamicStatesEnabled[VK_DYNAMIC_STATE_RANGE_SIZE];
|
|
DynamicState.pDynamicStates = DynamicStatesEnabled;
|
|
FMemory::Memzero(DynamicStatesEnabled);
|
|
DynamicStatesEnabled[DynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
|
|
DynamicStatesEnabled[DynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
|
|
DynamicStatesEnabled[DynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
|
|
DynamicStatesEnabled[DynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
|
|
|
|
PipelineInfo.pDynamicState = &DynamicState;
|
|
|
|
const bool bUsingVariableRateShading = PSO->Desc.ShadingRate != EVRSShadingRate::VRSSR_1x1 || (PSO->RenderPass->Layout.bHasFragmentDensityAttachment && PSO->Desc.Combiner != EVRSRateCombiner::VRSRB_Passthrough);
|
|
|
|
VkPipelineFragmentShadingRateStateCreateInfoKHR PipelineFragmentShadingRate;
|
|
if (GRHISupportsPipelineVariableRateShading && GRHIVariableRateShadingImageDataType == VRSImage_Palette && bUsingVariableRateShading)
|
|
{
|
|
const VkExtent2D FragmentSize = Device->GetBestMatchedFragmentSize(PSO->Desc.ShadingRate);
|
|
VkFragmentShadingRateCombinerOpKHR PipelineToPrimitiveCombinerOperation = FragmentCombinerOpMap[(uint8)PSO->Desc.Combiner];
|
|
|
|
ZeroVulkanStruct(PipelineFragmentShadingRate, VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
|
|
PipelineFragmentShadingRate.fragmentSize = FragmentSize;
|
|
PipelineFragmentShadingRate.combinerOps[0] = PipelineToPrimitiveCombinerOperation;
|
|
PipelineFragmentShadingRate.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR; // @todo: This needs to be specified too.
|
|
|
|
PipelineInfo.pNext = (void*)&PipelineFragmentShadingRate;
|
|
}
|
|
|
|
if (PSO->UsesBindless())
|
|
{
|
|
PipelineInfo.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
|
|
}
|
|
|
|
VkResult Result = VK_ERROR_INITIALIZATION_FAILED;
|
|
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
|
|
Result = CreateVKPipeline(PSO, Shaders, PipelineInfo, PSOCompileType);
|
|
|
|
if (Result != VK_SUCCESS)
|
|
{
|
|
FString ShaderHashes = GfxShaderHashesToString(Shaders);
|
|
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("Failed to create graphics pipeline.\nShaders in pipeline: %s"), *ShaderHashes);
|
|
return false;
|
|
}
|
|
|
|
double EndTime = FPlatformTime::Seconds();
|
|
double Delta = EndTime - BeginTime;
|
|
if (Delta > HitchTime)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Verbose, TEXT("Hitchy gfx pipeline key CS (%.3f ms)"), (float)(Delta * 1000.0));
|
|
}
|
|
|
|
INC_DWORD_STAT(STAT_VulkanNumPSOs);
|
|
return true;
|
|
}
|
|
|
|
#if PLATFORM_ANDROID
|
|
|
|
static VkResult CreatePSOWithExternalService(FVulkanDevice* Device, FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType PSOCompileType, FVulkanRHIGraphicsPipelineState* PSO, FVulkanShader* Shaders[ShaderStage::NumGraphicsStages], const VkGraphicsPipelineCreateInfo& PipelineInfo, VkPipelineCache DestPipelineCache, FRWLock& PipelineLock)
|
|
{
|
|
VkResult Result = VK_ERROR_INITIALIZATION_FAILED;
|
|
FVulkanShader::FSpirvCode VS = PSO->GetPatchedSpirvCode(Shaders[ShaderStage::Vertex]);
|
|
FVulkanShader::FSpirvCode PS = PSO->GetPatchedSpirvCode(Shaders[ShaderStage::Pixel]);
|
|
TArrayView<uint32_t> VSCode = VS.GetCodeView();
|
|
TArrayView<uint32_t> PSCode = PS.GetCodeView();
|
|
size_t AfterSize = 0;
|
|
|
|
VkPipelineCache LocalPipelineCache = VK_NULL_HANDLE;
|
|
const FGfxPipelineDesc* GfxEntry = &PSO->Desc;
|
|
|
|
TArray<uint8> InitialCacheData;
|
|
bool bSupplyDestPSOCacheData = false; // this can be an optimization, but only if the PSO compile is able to use content from an existing cache.
|
|
if( bSupplyDestPSOCacheData )
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_ExternalInitialCache);
|
|
FRWScopeLock Lock(PipelineLock, SLT_Write);
|
|
size_t InitialCacheSize = 0;
|
|
VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), DestPipelineCache, &InitialCacheSize, nullptr);
|
|
InitialCacheData.SetNumUninitialized(InitialCacheSize);
|
|
VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), DestPipelineCache, &InitialCacheSize, InitialCacheData.GetData());
|
|
}
|
|
FString FailLog;
|
|
LocalPipelineCache = FVulkanPlatform::PrecompilePSO(Device, InitialCacheData, PSOCompileType, &PipelineInfo, GfxEntry, &PSO->RenderPass->GetLayout(), VSCode, PSCode, AfterSize,&FailLog);
|
|
|
|
if (ensure(LocalPipelineCache != VK_NULL_HANDLE))
|
|
{
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_ExternalMergeResult);
|
|
FRWScopeLock Lock(PipelineLock, SLT_Write);
|
|
Result = VK_SUCCESS;
|
|
VERIFYVULKANRESULT(VulkanRHI::vkMergePipelineCaches(Device->GetInstanceHandle(), DestPipelineCache, 1, &LocalPipelineCache));
|
|
}
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), LocalPipelineCache, VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("Android RemoteCompileServices Failed to create graphics pipeline (%s).\nShaders in pipeline %s"),*FailLog, *GfxShaderHashesToString(Shaders));
|
|
}
|
|
return Result;
|
|
}
|
|
#endif
|
|
|
|
VkResult FVulkanPipelineStateCacheManager::CreateVKPipeline(FVulkanRHIGraphicsPipelineState* PSO, FVulkanShader* Shaders[ShaderStage::NumGraphicsStages], const VkGraphicsPipelineCreateInfo& PipelineInfo, FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType PSOCompileType)
|
|
{
|
|
bool bIsPrecompileJob = PSOCompileType != FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType::NotSet;
|
|
if(FVulkanChunkedPipelineCacheManager::IsEnabled())
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_VKPIPELINE);
|
|
|
|
// Use chunk caching and bypass FVulkanPipelineStateCacheManager's PSO caching
|
|
// Placeholder PSO size - TODO: remove pipeline cache size stuff.
|
|
PSO->PipelineCacheSize = 20 * 1024; // This is only required bUseLRU == true.
|
|
return FVulkanChunkedPipelineCacheManager::Get().CreatePSO(PSO, bIsPrecompileJob, FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<FVulkanRHIGraphicsPipelineState>(
|
|
[&](FVulkanChunkedPipelineCacheManager::FPSOCreateFuncParams<FVulkanRHIGraphicsPipelineState>& Params)
|
|
{
|
|
FVulkanRHIGraphicsPipelineState* PSO = Params.PSO;
|
|
VkPipelineCache& PipelineCache = Params.DestPipelineCache;
|
|
FVulkanChunkedPipelineCacheManager::EPSOOperation PSOOperation = Params.PSOOperation;
|
|
|
|
check(PSO->VulkanPipeline == 0);
|
|
check(PSOOperation == FVulkanChunkedPipelineCacheManager::EPSOOperation::CreateAndStorePSO || PSOOperation == FVulkanChunkedPipelineCacheManager::EPSOOperation::CreateIfPresent);
|
|
VkResult Result = VK_ERROR_UNKNOWN;
|
|
|
|
if(PSOOperation == FVulkanChunkedPipelineCacheManager::EPSOOperation::CreateIfPresent)
|
|
{
|
|
const bool bCanTestForExistence = Device->GetOptionalExtensions().HasEXTPipelineCreationCacheControl;
|
|
if (bCanTestForExistence)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_TestCreate);
|
|
FRWScopeLock Lock(Params.DestPipelineCacheLock, SLT_ReadOnly);
|
|
VkGraphicsPipelineCreateInfo TestPipelineInfo = PipelineInfo;
|
|
TestPipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT;
|
|
Result = VulkanRHI::vkCreateGraphicsPipelines(Device->GetInstanceHandle(), PipelineCache, 1, &TestPipelineInfo, VULKAN_CPU_ALLOCATOR, &PSO->VulkanPipeline);
|
|
}
|
|
else
|
|
{
|
|
// if we cant test we must create.
|
|
Result = VK_PIPELINE_COMPILE_REQUIRED_EXT;
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_vkCreateGraphicsPipeline);
|
|
#if PLATFORM_ANDROID
|
|
if (FVulkanAndroidPlatform::AreRemoteCompileServicesActive() && bIsPrecompileJob)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_ExternalCreate);
|
|
Result = CreatePSOWithExternalService(Device, PSOCompileType, PSO, Shaders, PipelineInfo, PipelineCache, Params.DestPipelineCacheLock);
|
|
}
|
|
// if the external service did not produce a result the following will create it in-process as a best effort fallback.
|
|
#endif
|
|
if (Result != VK_SUCCESS) //-V547
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_vkCreate);
|
|
FRWScopeLock Lock(Params.DestPipelineCacheLock, SLT_ReadOnly);
|
|
Result = VulkanRHI::vkCreateGraphicsPipelines(Device->GetInstanceHandle(), PipelineCache, 1, &PipelineInfo, VULKAN_CPU_ALLOCATOR, &PSO->VulkanPipeline);
|
|
}
|
|
return Result;
|
|
}));
|
|
}
|
|
|
|
VkPipeline* Pipeline = &PSO->VulkanPipeline;
|
|
|
|
FPipelineCache& Cache = bIsPrecompileJob ? CurrentPrecompilingPSOCache : GlobalPSOCache;
|
|
|
|
VkPipelineCache LocalPipelineCache = VK_NULL_HANDLE;
|
|
VkResult Result = VK_ERROR_INITIALIZATION_FAILED;
|
|
uint32 PSOSize = 0;
|
|
bool bWantPSOSize = false;
|
|
const FGfxPipelineDesc* GfxEntry = &PSO->Desc;
|
|
uint64 ShaderHash = 0;
|
|
bool bValidateServicePSO = false;
|
|
if (bUseLRU)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_VulkanPSOLRUSizeLookup);
|
|
|
|
#if VULKAN_USE_SHADERKEYS
|
|
ShaderHash = GfxEntry->ShaderKeyShared;
|
|
#else
|
|
ShaderHash = GfxEntry->ShaderHashes.Hash;
|
|
#endif
|
|
{
|
|
FScopeLock Lock(&LRUCS);
|
|
FVulkanPipelineSize* Found = LRU2SizeList.Find(ShaderHash);
|
|
if (Found)
|
|
{
|
|
PSOSize = Found->PipelineSize;
|
|
}
|
|
else
|
|
{
|
|
bWantPSOSize = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if PLATFORM_ANDROID
|
|
if (bIsPrecompileJob && FVulkanAndroidPlatform::AreRemoteCompileServicesActive() /*&&
|
|
CVarPipelineLRUCacheEvictBinary.GetValueOnAnyThread()*/)
|
|
{
|
|
FVulkanShader::FSpirvCode VS = PSO->GetPatchedSpirvCode(Shaders[ShaderStage::Vertex]);
|
|
FVulkanShader::FSpirvCode PS = PSO->GetPatchedSpirvCode(Shaders[ShaderStage::Pixel]);
|
|
TArrayView<uint32_t> VSCode = VS.GetCodeView();
|
|
TArrayView<uint32_t> PSCode = PS.GetCodeView();
|
|
size_t AfterSize = 0;
|
|
FString FailLog;
|
|
LocalPipelineCache = FVulkanPlatform::PrecompilePSO(Device, MakeArrayView<uint8>(nullptr,0), PSOCompileType, &PipelineInfo, GfxEntry, &PSO->RenderPass->GetLayout(), VSCode, PSCode, AfterSize,&FailLog);
|
|
|
|
if (ensure(LocalPipelineCache != VK_NULL_HANDLE))
|
|
{
|
|
Pipeline[0] = VK_NULL_HANDLE;
|
|
Result = VK_SUCCESS;
|
|
|
|
// enable bValidateServicePSO to compare PSOService result against engine's result.
|
|
//bValidateServicePSO = true;
|
|
if(bValidateServicePSO)
|
|
{
|
|
Result = VK_ERROR_INITIALIZATION_FAILED;
|
|
bWantPSOSize = true;
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), LocalPipelineCache, VULKAN_CPU_ALLOCATOR);
|
|
LocalPipelineCache = VK_NULL_HANDLE;
|
|
}
|
|
|
|
if(bWantPSOSize)
|
|
{
|
|
PSOSize = AfterSize;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
FString ShaderHashes = GfxShaderHashesToString(Shaders);
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("Android RemoteCompileServices Failed to create graphics pipeline (%s).\nShaders in pipeline: %s"), *FailLog, *ShaderHashes);
|
|
}
|
|
}
|
|
#endif
|
|
// Disabling 'V547: Expression is always true'
|
|
// The precompile code above can set Result to success on Android platforms.
|
|
if(Result != VK_SUCCESS) //-V547
|
|
{
|
|
if (bWantPSOSize)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_VulkanPSOCreationTimeLRU);
|
|
|
|
// We create a single pipeline cache for this create so we can observe the size for LRU cache's accounting.
|
|
// measuring deltas from the global PipelineCache is not thread safe.
|
|
VkPipelineCacheCreateInfo PipelineCacheInfo;
|
|
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(Device->GetInstanceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &LocalPipelineCache));
|
|
Result = VulkanRHI::vkCreateGraphicsPipelines(Device->GetInstanceHandle(), LocalPipelineCache, 1, &PipelineInfo, VULKAN_CPU_ALLOCATOR, Pipeline);
|
|
if (bValidateServicePSO)
|
|
{
|
|
size_t Diff = 0;
|
|
if (ensure(LocalPipelineCache != VK_NULL_HANDLE))
|
|
{
|
|
VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), LocalPipelineCache, &Diff, nullptr);
|
|
}
|
|
UE_CLOG(Diff != PSOSize, LogVulkanRHI, Warning, TEXT("PSO service size mismatches engine size! [PSOService = %d, Game Process = %d]"), PSOSize, Diff);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanPSOVulkanCreationTime);
|
|
FScopedPipelineCache PipelineCacheShared = Cache.Get(EPipelineCacheAccess::Shared);
|
|
Result = VulkanRHI::vkCreateGraphicsPipelines(Device->GetInstanceHandle(), PipelineCacheShared.Get(), 1, &PipelineInfo, VULKAN_CPU_ALLOCATOR, Pipeline);
|
|
}
|
|
}
|
|
|
|
if (LocalPipelineCache != VK_NULL_HANDLE)
|
|
{
|
|
if (bWantPSOSize)
|
|
{
|
|
FScopeLock Lock(&LRUCS);
|
|
FVulkanPipelineSize* Found = LRU2SizeList.Find(ShaderHash);
|
|
if (Found == nullptr) // Check we're not beaten to it..
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_Calc_LRU_Size);
|
|
size_t Diff = 0;
|
|
|
|
if (LocalPipelineCache != VK_NULL_HANDLE)
|
|
{
|
|
VulkanRHI::vkGetPipelineCacheData(Device->GetInstanceHandle(), LocalPipelineCache, &Diff, nullptr);
|
|
}
|
|
|
|
if (!Diff)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Shader size was computed as zero, using 20k instead."));
|
|
Diff = 20 * 1024;
|
|
}
|
|
FVulkanPipelineSize PipelineSize;
|
|
PipelineSize.ShaderHash = ShaderHash;
|
|
PipelineSize.PipelineSize = (uint32)Diff;
|
|
LRU2SizeList.Add(ShaderHash, PipelineSize);
|
|
PSOSize = Diff;
|
|
}
|
|
else
|
|
{
|
|
PSOSize = Found->PipelineSize;
|
|
}
|
|
}
|
|
|
|
FScopedPipelineCache PipelineCacheExclusive = Cache.Get(EPipelineCacheAccess::Exclusive);
|
|
if (PipelineCacheExclusive.Get() != VK_NULL_HANDLE)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_VulkanPSOCacheMerge);
|
|
VERIFYVULKANRESULT(VulkanRHI::vkMergePipelineCaches(Device->GetInstanceHandle(), PipelineCacheExclusive.Get(), 1, &LocalPipelineCache));
|
|
}
|
|
VulkanRHI::vkDestroyPipelineCache(Device->GetInstanceHandle(), LocalPipelineCache, VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
|
|
VERIFYVULKANRESULT(Result);
|
|
|
|
PSO->PipelineCacheSize = PSOSize;
|
|
|
|
return Result;
|
|
}
|
|
|
|
VkResult FVulkanPipelineStateCacheManager::CreateRayTracingPipeline(const VkRayTracingPipelineCreateInfoKHR& CreateInfo, bool bIsPartial, VkPipeline& OutPipeline)
|
|
{
|
|
if (bIsPartial)
|
|
{
|
|
FScopedPipelineCache PipelineCacheShared = CurrentPrecompilingPSOCache.Get(EPipelineCacheAccess::Shared);
|
|
if (PipelineCacheShared.Get() != VK_NULL_HANDLE)
|
|
{
|
|
return VulkanDynamicAPI::vkCreateRayTracingPipelinesKHR(
|
|
Device->GetInstanceHandle(),
|
|
VK_NULL_HANDLE, // Deferred Operation
|
|
PipelineCacheShared.Get(), // Pipeline Cache
|
|
1,
|
|
&CreateInfo,
|
|
VULKAN_CPU_ALLOCATOR,
|
|
&OutPipeline);
|
|
}
|
|
}
|
|
|
|
FScopedPipelineCache PipelineCacheShared = GlobalPSOCache.Get(EPipelineCacheAccess::Shared);
|
|
return VulkanDynamicAPI::vkCreateRayTracingPipelinesKHR(
|
|
Device->GetInstanceHandle(),
|
|
VK_NULL_HANDLE, // Deferred Operation
|
|
PipelineCacheShared.Get(), // Pipeline Cache
|
|
1,
|
|
&CreateInfo,
|
|
VULKAN_CPU_ALLOCATOR,
|
|
&OutPipeline);
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::DestroyCache()
|
|
{
|
|
VkDevice DeviceHandle = Device->GetInstanceHandle();
|
|
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_DestroyCache_PSOLock);
|
|
FScopeLock Lock1(&GraphicsPSOLockedCS);
|
|
int idx = 0;
|
|
for (auto& Pair : GraphicsPSOLockedMap)
|
|
{
|
|
FVulkanRHIGraphicsPipelineState* Pipeline = Pair.Value;
|
|
FPlatformMisc::LowLevelOutputDebugStringf(TEXT("Leaked PSO %05d: RefCount=%d Handle=0x%p\n"), idx++, Pipeline->GetRefCount(), Pipeline);
|
|
}
|
|
LRU2SizeList.Reset();
|
|
|
|
#if LRU_DEBUG
|
|
LRUDump();
|
|
#endif
|
|
|
|
// Compute pipelines already deleted...
|
|
ComputePipelineEntries.Reset();
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::RebuildCache()
|
|
{
|
|
if (IsInGameThread())
|
|
{
|
|
FlushRenderingCommands();
|
|
}
|
|
DestroyCache();
|
|
}
|
|
|
|
FVulkanShaderHashes::FVulkanShaderHashes(const FGraphicsPipelineStateInitializer& PSOInitializer)
|
|
{
|
|
Stages[ShaderStage::Vertex] = GetShaderHash<FRHIVertexShader, FVulkanVertexShader>(PSOInitializer.BoundShaderState.VertexShaderRHI);
|
|
Stages[ShaderStage::Pixel] = GetShaderHash<FRHIPixelShader, FVulkanPixelShader>(PSOInitializer.BoundShaderState.PixelShaderRHI);
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
Stages[ShaderStage::Mesh] = GetShaderHash<FRHIMeshShader, FVulkanMeshShader>(PSOInitializer.BoundShaderState.GetMeshShader());
|
|
Stages[ShaderStage::Task] = GetShaderHash<FRHIAmplificationShader, FVulkanTaskShader>(PSOInitializer.BoundShaderState.GetAmplificationShader());
|
|
#endif
|
|
#if VULKAN_SUPPORTS_GEOMETRY_SHADERS
|
|
Stages[ShaderStage::Geometry] = GetShaderHash<FRHIGeometryShader, FVulkanGeometryShader>(PSOInitializer.BoundShaderState.GetGeometryShader());
|
|
#endif
|
|
Finalize();
|
|
}
|
|
|
|
FVulkanShaderHashes::FVulkanShaderHashes()
|
|
{
|
|
FMemory::Memzero(Stages);
|
|
Hash = 0;
|
|
}
|
|
|
|
FVulkanLayout* FVulkanPipelineStateCacheManager::FindOrAddLayout(const FVulkanDescriptorSetsLayoutInfo& DescriptorSetLayoutInfo, bool bGfxLayout, bool bUsesBindless)
|
|
{
|
|
FScopeLock Lock(&LayoutMapCS);
|
|
if (FVulkanLayout** FoundLayout = LayoutMap.Find(DescriptorSetLayoutInfo))
|
|
{
|
|
check(bGfxLayout == (*FoundLayout)->IsGfxLayout());
|
|
return *FoundLayout;
|
|
}
|
|
|
|
FVulkanLayout* Layout = new FVulkanLayout(Device, bGfxLayout, bUsesBindless);
|
|
Layout->DescriptorSetLayout.CopyFrom(DescriptorSetLayoutInfo);
|
|
Layout->Compile(DSetLayoutMap);
|
|
|
|
LayoutMap.Add(DescriptorSetLayoutInfo, Layout);
|
|
return Layout;
|
|
}
|
|
|
|
static inline VkPrimitiveTopology UEToVulkanTopologyType(const FVulkanDevice* InDevice, EPrimitiveType PrimitiveType, uint16& OutControlPoints)
|
|
{
|
|
OutControlPoints = 0;
|
|
switch (PrimitiveType)
|
|
{
|
|
case PT_PointList:
|
|
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
case PT_LineList:
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
|
case PT_TriangleList:
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
case PT_TriangleStrip:
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
|
default:
|
|
checkf(false, TEXT("Unsupported EPrimitiveType %d"), (uint32)PrimitiveType);
|
|
break;
|
|
}
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_MAX_ENUM;
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::CreateGfxEntry(const FGraphicsPipelineStateInitializer& PSOInitializer, FVulkanDescriptorSetsLayoutInfo& DescriptorSetLayoutInfo, FGfxPipelineDesc* Desc)
|
|
{
|
|
FGfxPipelineDesc* OutGfxEntry = Desc;
|
|
|
|
FVulkanShader* Shaders[ShaderStage::NumGraphicsStages];
|
|
GetVulkanGfxShaders(PSOInitializer.BoundShaderState, Shaders);
|
|
|
|
FVulkanVertexInputStateInfo VertexInputState;
|
|
|
|
{
|
|
const FBoundShaderStateInput& BSI = PSOInitializer.BoundShaderState;
|
|
|
|
FUniformBufferGatherInfo UBGatherInfo;
|
|
uint32 NumActiveShaders = 0;
|
|
uint32 NumBindlessShaders = 0;
|
|
|
|
auto ProcessShaderStage = [&DescriptorSetLayoutInfo, &UBGatherInfo, &NumActiveShaders, &NumBindlessShaders](VkShaderStageFlagBits StageFlag, ShaderStage::EStage Stage, FVulkanShader* Shader)
|
|
{
|
|
if (Shader)
|
|
{
|
|
const FVulkanShaderHeader& Header = Shader->GetCodeHeader();
|
|
DescriptorSetLayoutInfo.ProcessBindingsForStage(StageFlag, Stage, Header, UBGatherInfo);
|
|
NumActiveShaders++;
|
|
if (Shader->UsesBindless())
|
|
{
|
|
NumBindlessShaders++;
|
|
}
|
|
}
|
|
};
|
|
|
|
if (Shaders[ShaderStage::Vertex])
|
|
{
|
|
const FVulkanShaderHeader& VSHeader = Shaders[ShaderStage::Vertex]->GetCodeHeader();
|
|
VertexInputState.Generate(ResourceCast(PSOInitializer.BoundShaderState.VertexDeclarationRHI), VSHeader.InOutMask);
|
|
}
|
|
|
|
if (Shaders[ShaderStage::Pixel] && Shaders[ShaderStage::Pixel]->GetCodeHeader().InputAttachmentInfos.Num())
|
|
{
|
|
// input attachements can't exist in a first sub-pass
|
|
check(PSOInitializer.SubpassHint != ESubpassHint::None);
|
|
check(PSOInitializer.SubpassIndex != 0);
|
|
}
|
|
|
|
ProcessShaderStage(VK_SHADER_STAGE_VERTEX_BIT, ShaderStage::Vertex, Shaders[ShaderStage::Vertex]);
|
|
ProcessShaderStage(VK_SHADER_STAGE_FRAGMENT_BIT, ShaderStage::Pixel, Shaders[ShaderStage::Pixel]);
|
|
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
ProcessShaderStage(VK_SHADER_STAGE_MESH_BIT_EXT, ShaderStage::Mesh, Shaders[ShaderStage::Mesh]);
|
|
ProcessShaderStage(VK_SHADER_STAGE_TASK_BIT_EXT, ShaderStage::Task, Shaders[ShaderStage::Task]);
|
|
#endif
|
|
|
|
#if VULKAN_SUPPORTS_GEOMETRY_SHADERS
|
|
ProcessShaderStage(VK_SHADER_STAGE_GEOMETRY_BIT, ShaderStage::Geometry, Shaders[ShaderStage::Geometry]);
|
|
#endif
|
|
|
|
checkf((NumBindlessShaders == 0) || (NumBindlessShaders == NumActiveShaders), TEXT("All shaders must be bindless or non-bindless."));
|
|
|
|
// Second pass
|
|
const int32 NumImmutableSamplers = PSOInitializer.ImmutableSamplerState.ImmutableSamplers.Num();
|
|
TArrayView<FRHISamplerState*> ImmutableSamplers(NumImmutableSamplers > 0 ? &(FRHISamplerState*&)PSOInitializer.ImmutableSamplerState.ImmutableSamplers[0] : nullptr, NumImmutableSamplers);
|
|
DescriptorSetLayoutInfo.FinalizeBindings<false>(*Device, UBGatherInfo, ImmutableSamplers, (NumBindlessShaders != 0));
|
|
}
|
|
|
|
OutGfxEntry->SubpassIndex = PSOInitializer.SubpassIndex;
|
|
|
|
FVulkanBlendState* BlendState = ResourceCast(PSOInitializer.BlendState);
|
|
|
|
OutGfxEntry->UseAlphaToCoverage = PSOInitializer.NumSamples > 1 && BlendState->Initializer.bUseAlphaToCoverage ? 1 : 0;
|
|
|
|
OutGfxEntry->RasterizationSamples = PSOInitializer.NumSamples;
|
|
OutGfxEntry->Topology = (uint32)UEToVulkanTopologyType(Device, PSOInitializer.PrimitiveType, OutGfxEntry->ControlPoints);
|
|
uint32 NumRenderTargets = PSOInitializer.ComputeNumValidRenderTargets();
|
|
|
|
if (PSOInitializer.SubpassHint == ESubpassHint::DeferredShadingSubpass && PSOInitializer.SubpassIndex >= 2)
|
|
{
|
|
// GBuffer attachements are not used as output in a shading sub-pass
|
|
// Only SceneColor is used as a color attachment
|
|
NumRenderTargets = 1;
|
|
}
|
|
|
|
if (PSOInitializer.SubpassHint == ESubpassHint::DepthReadSubpass && PSOInitializer.SubpassIndex >= 1)
|
|
{
|
|
// Only SceneColor is used as a color attachment after the first subpass (not SceneDepthAux)
|
|
NumRenderTargets = 1;
|
|
}
|
|
|
|
if (PSOInitializer.SubpassHint == ESubpassHint::CustomResolveSubpass)
|
|
{
|
|
NumRenderTargets = 1; // This applies to base and depth passes as well. One render target for base and depth, another one for custom resolve.
|
|
if (PSOInitializer.SubpassIndex >= 2)
|
|
{
|
|
// the resolve subpass renders to a non MSAA surface
|
|
OutGfxEntry->RasterizationSamples = 1;
|
|
}
|
|
}
|
|
|
|
OutGfxEntry->ColorAttachmentStates.AddUninitialized(NumRenderTargets);
|
|
for (int32 Index = 0; Index < OutGfxEntry->ColorAttachmentStates.Num(); ++Index)
|
|
{
|
|
OutGfxEntry->ColorAttachmentStates[Index].ReadFrom(BlendState->BlendStates[Index]);
|
|
}
|
|
|
|
{
|
|
const VkPipelineVertexInputStateCreateInfo& VBInfo = VertexInputState.GetInfo();
|
|
OutGfxEntry->VertexBindings.AddUninitialized(VBInfo.vertexBindingDescriptionCount);
|
|
for (uint32 Index = 0; Index < VBInfo.vertexBindingDescriptionCount; ++Index)
|
|
{
|
|
OutGfxEntry->VertexBindings[Index].ReadFrom(VBInfo.pVertexBindingDescriptions[Index]);
|
|
}
|
|
|
|
OutGfxEntry->VertexAttributes.AddUninitialized(VBInfo.vertexAttributeDescriptionCount);
|
|
for (uint32 Index = 0; Index < VBInfo.vertexAttributeDescriptionCount; ++Index)
|
|
{
|
|
OutGfxEntry->VertexAttributes[Index].ReadFrom(VBInfo.pVertexAttributeDescriptions[Index]);
|
|
}
|
|
}
|
|
|
|
const TArray<FVulkanDescriptorSetsLayout::FSetLayout>& Layouts = DescriptorSetLayoutInfo.GetLayouts();
|
|
OutGfxEntry->DescriptorSetLayoutBindings.AddDefaulted(Layouts.Num());
|
|
for (int32 Index = 0; Index < Layouts.Num(); ++Index)
|
|
{
|
|
for (int32 SubIndex = 0; SubIndex < Layouts[Index].LayoutBindings.Num(); ++SubIndex)
|
|
{
|
|
FDescriptorSetLayoutBinding& Binding = OutGfxEntry->DescriptorSetLayoutBindings[Index].AddDefaulted_GetRef();
|
|
Binding.ReadFrom(Layouts[Index].LayoutBindings[SubIndex]);
|
|
}
|
|
}
|
|
|
|
OutGfxEntry->Rasterizer.ReadFrom(ResourceCast(PSOInitializer.RasterizerState)->RasterizerState);
|
|
{
|
|
VkPipelineDepthStencilStateCreateInfo DSInfo;
|
|
ResourceCast(PSOInitializer.DepthStencilState)->SetupCreateInfo(PSOInitializer, DSInfo);
|
|
OutGfxEntry->DepthStencil.ReadFrom(DSInfo);
|
|
}
|
|
|
|
int32 NumShaders = 0;
|
|
#if VULKAN_USE_SHADERKEYS
|
|
uint64 SharedKey = 0;
|
|
uint64 Primes[] = {
|
|
6843488303525203279llu,
|
|
3095754086865563867llu,
|
|
8242695776924673527llu,
|
|
7556751872809527943llu,
|
|
8278265491465149053llu,
|
|
1263027877466626099llu,
|
|
2698115308251696101llu,
|
|
};
|
|
static_assert(sizeof(Primes) / sizeof(Primes[0]) >= ShaderStage::NumGraphicsStages);
|
|
for (int32 Index = 0; Index < ShaderStage::NumGraphicsStages; ++Index)
|
|
{
|
|
FVulkanShader* Shader = Shaders[Index];
|
|
uint64 Key = 0;
|
|
if (Shader)
|
|
{
|
|
Key = Shader->GetShaderKey();
|
|
++NumShaders;
|
|
}
|
|
OutGfxEntry->ShaderKeys[Index] = Key;
|
|
SharedKey += Key * Primes[Index];
|
|
}
|
|
OutGfxEntry->ShaderKeyShared = SharedKey;
|
|
#else
|
|
for (int32 Index = 0; Index < ShaderStage::NumGraphicsStages; ++Index)
|
|
{
|
|
FVulkanShader* Shader = Shaders[Index];
|
|
if (Shader)
|
|
{
|
|
check(Shader->Spirv.Num() != 0);
|
|
|
|
FSHAHash Hash = GetShaderHashForStage(PSOInitializer, (ShaderStage::EStage)Index);
|
|
OutGfxEntry->ShaderHashes.Stages[Index] = Hash;
|
|
|
|
++NumShaders;
|
|
}
|
|
}
|
|
OutGfxEntry->ShaderHashes.Finalize();
|
|
#endif
|
|
check(NumShaders > 0);
|
|
|
|
FVulkanRenderTargetLayout RTLayout(PSOInitializer);
|
|
OutGfxEntry->RenderTargets.ReadFrom(RTLayout);
|
|
|
|
// Shading rate:
|
|
OutGfxEntry->ShadingRate = PSOInitializer.bAllowVariableRateShading ? PSOInitializer.ShadingRate : EVRSShadingRate::VRSSR_1x1;
|
|
OutGfxEntry->Combiner = PSOInitializer.bAllowVariableRateShading ? EVRSRateCombiner::VRSRB_Max : EVRSRateCombiner::VRSRB_Passthrough; // Forces using the 1x1 rate over any fragment density attachment when VRS is disallowed in material settings
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
FVulkanRHIGraphicsPipelineState::FVulkanRHIGraphicsPipelineState(FVulkanDevice* InDevice, const FGraphicsPipelineStateInitializer& PSOInitializer_, const FGfxPipelineDesc& InDesc, FVulkanPSOKey* VulkanKey)
|
|
: bIsRegistered(false)
|
|
, PrimitiveType(PSOInitializer_.PrimitiveType)
|
|
, VulkanPipeline(0)
|
|
, Device(InDevice)
|
|
, Desc(InDesc)
|
|
, VulkanKey(VulkanKey->CopyDeep())
|
|
{
|
|
#if !UE_BUILD_SHIPPING
|
|
SGraphicsRHICount++;
|
|
#endif
|
|
|
|
FMemory::Memset(VulkanShaders, 0, sizeof(VulkanShaders));
|
|
VulkanShaders[ShaderStage::Vertex] = static_cast<FVulkanVertexShader*>(PSOInitializer_.BoundShaderState.VertexShaderRHI);
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
VulkanShaders[ShaderStage::Mesh] = static_cast<FVulkanMeshShader*>(PSOInitializer_.BoundShaderState.GetMeshShader());
|
|
VulkanShaders[ShaderStage::Task] = static_cast<FVulkanTaskShader*>(PSOInitializer_.BoundShaderState.GetAmplificationShader());
|
|
#endif
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
VulkanShaders[ShaderStage::Geometry] = static_cast<FVulkanGeometryShader*>(PSOInitializer_.BoundShaderState.GetGeometryShader());
|
|
#endif
|
|
VulkanShaders[ShaderStage::Pixel] = static_cast<FVulkanPixelShader*>(PSOInitializer_.BoundShaderState.PixelShaderRHI);
|
|
|
|
uint32 ActiveShaderCount = 0;
|
|
uint32 BindlessShaderCount = 0;
|
|
for (int32 ShaderStageIndex = 0; ShaderStageIndex < ShaderStage::NumGraphicsStages; ShaderStageIndex++)
|
|
{
|
|
if (VulkanShaders[ShaderStageIndex] != nullptr)
|
|
{
|
|
VulkanShaders[ShaderStageIndex]->AddRef();
|
|
|
|
ActiveShaderCount++;
|
|
if (VulkanShaders[ShaderStageIndex]->UsesBindless())
|
|
{
|
|
BindlessShaderCount++;
|
|
}
|
|
}
|
|
}
|
|
checkf((BindlessShaderCount == 0) || (ActiveShaderCount == BindlessShaderCount), TEXT("Pipelines can't be created with mix of bindless and non-bindless shaders."));
|
|
bUsesBindless = (BindlessShaderCount != 0);
|
|
|
|
#if VULKAN_PSO_CACHE_DEBUG
|
|
PixelShaderRHI = PSOInitializer_.BoundShaderState.PixelShaderRHI;
|
|
VertexShaderRHI = PSOInitializer_.BoundShaderState.VertexShaderRHI;
|
|
VertexDeclarationRHI = PSOInitializer_.BoundShaderState.VertexDeclarationRHI;
|
|
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
GeometryShaderRHI = PSOInitializer_.BoundShaderState.GeometryShaderRHI;
|
|
#endif
|
|
|
|
PSOInitializer = PSOInitializer_;
|
|
#endif
|
|
PrecacheKey = RHIComputePrecachePSOHash(PSOInitializer_);
|
|
INC_DWORD_STAT(STAT_VulkanNumGraphicsPSOs);
|
|
INC_DWORD_STAT_BY(STAT_VulkanPSOKeyMemory, this->VulkanKey.GetDataRef().Num());
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::NotifyDeletedGraphicsPSO(FRHIGraphicsPipelineState* PSO)
|
|
{
|
|
FVulkanRHIGraphicsPipelineState* VkPSO = (FVulkanRHIGraphicsPipelineState*)PSO;
|
|
Device->NotifyDeletedGfxPipeline(VkPSO);
|
|
FVulkanPSOKey& Key = VkPSO->VulkanKey;
|
|
DEC_DWORD_STAT_BY(STAT_VulkanPSOKeyMemory, Key.GetDataRef().Num());
|
|
if(VkPSO->bIsRegistered)
|
|
{
|
|
FScopeLock Lock(&GraphicsPSOLockedCS);
|
|
FVulkanRHIGraphicsPipelineState** Contained = GraphicsPSOLockedMap.Find(Key);
|
|
check(Contained && *Contained == PSO);
|
|
VkPSO->bIsRegistered = false;
|
|
if(bUseLRU)
|
|
{
|
|
LRURemove(*Contained);
|
|
check((*Contained)->LRUNode == 0);
|
|
}
|
|
else
|
|
{
|
|
(*Contained)->DeleteVkPipeline(true);
|
|
check(VkPSO->GetVulkanPipeline() == 0 );
|
|
}
|
|
GraphicsPSOLockedMap.Remove(Key);
|
|
}
|
|
else
|
|
{
|
|
FScopeLock Lock(&GraphicsPSOLockedCS);
|
|
FVulkanRHIGraphicsPipelineState** Contained = GraphicsPSOLockedMap.Find(Key);
|
|
if (Contained && *Contained == VkPSO)
|
|
{
|
|
check(0);
|
|
}
|
|
VkPSO->DeleteVkPipeline(true);
|
|
}
|
|
}
|
|
|
|
|
|
static FCriticalSection CreateGraphicsPSOMutex;
|
|
|
|
FGraphicsPipelineStateRHIRef FVulkanPipelineStateCacheManager::RHICreateGraphicsPipelineState(const FGraphicsPipelineStateInitializer& Initializer)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_NEW);
|
|
|
|
// Optional lock for PSO creation, GVulkanPSOForceSingleThreaded is used to work around driver bugs.
|
|
// GVulkanPSOForceSingleThreaded == Precompile can be used when the driver internally serializes PSO creation, this option reduces the driver queue size.
|
|
// We stall precompile PSOs which increases the likelihood for non-precompile PSO to jump the queue.
|
|
// Not using GraphicsPSOLockedCS as the create could take a long time on some platforms, holding GraphicsPSOLockedCS the whole time could cause hitching.
|
|
const ESingleThreadedPSOCreateMode ThreadingMode = (ESingleThreadedPSOCreateMode)GVulkanPSOForceSingleThreaded;
|
|
const bool bIsPrecache = Initializer.bFromPSOFileCache || Initializer.bPSOPrecache;
|
|
bool bShouldLock = ThreadingMode == ESingleThreadedPSOCreateMode::All
|
|
|| (ThreadingMode == ESingleThreadedPSOCreateMode::Precompile && bIsPrecache)
|
|
|| (ThreadingMode == ESingleThreadedPSOCreateMode::NonPrecompiled && !bIsPrecache);
|
|
|
|
UE::TConditionalScopeLock PSOSingleThreadedLock(CreateGraphicsPSOMutex, bShouldLock);
|
|
|
|
FVulkanPSOKey Key;
|
|
FGfxPipelineDesc Desc;
|
|
FVulkanDescriptorSetsLayoutInfo DescriptorSetLayoutInfo;
|
|
{
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanPSOHeaderInitTime);
|
|
CreateGfxEntry(Initializer, DescriptorSetLayoutInfo, &Desc);
|
|
Key = Desc.CreateKey2();
|
|
}
|
|
|
|
|
|
FVulkanRHIGraphicsPipelineState* NewPSO = 0;
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanPSOLookupTime);
|
|
FScopeLock Lock(&GraphicsPSOLockedCS);
|
|
{
|
|
FVulkanRHIGraphicsPipelineState** PSO = GraphicsPSOLockedMap.Find(Key);
|
|
if(PSO)
|
|
{
|
|
check(*PSO);
|
|
if(!bIsPrecache)
|
|
{
|
|
LRUTouch(*PSO);
|
|
}
|
|
return *PSO;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
{
|
|
// Workers can be creating PSOs while FRHIResource::FlushPendingDeletes is running on the RHI thread
|
|
// so let it get enqueued for a delete with Release() instead. Only used for failed or duplicate PSOs...
|
|
auto DeleteNewPSO = [](FVulkanRHIGraphicsPipelineState* PSOPtr)
|
|
{
|
|
PSOPtr->AddRef();
|
|
const uint32 RefCount = PSOPtr->Release();
|
|
check(RefCount == 0);
|
|
};
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanPSOCreationTime);
|
|
NewPSO = new FVulkanRHIGraphicsPipelineState(Device, Initializer, Desc, &Key);
|
|
{
|
|
FVulkanLayout* Layout = FindOrAddLayout(DescriptorSetLayoutInfo, true, NewPSO->UsesBindless());
|
|
NewPSO->Layout = Layout;
|
|
NewPSO->bHasInputAttachments = Layout->GetDescriptorSetsLayout().HasInputAttachments();
|
|
}
|
|
NewPSO->RenderPass = Device->GetImmediateContext().PrepareRenderPassForPSOCreation(Initializer);
|
|
{
|
|
const FBoundShaderStateInput& BSI = Initializer.BoundShaderState;
|
|
for (int32 StageIdx = 0; StageIdx < ShaderStage::NumGraphicsStages; ++StageIdx)
|
|
{
|
|
NewPSO->ShaderKeys[StageIdx] = GetShaderKeyForGfxStage(BSI, (ShaderStage::EStage)StageIdx);
|
|
}
|
|
|
|
if (Initializer.BoundShaderState.VertexDeclarationRHI)
|
|
{
|
|
check(BSI.VertexShaderRHI);
|
|
FVulkanVertexShader* VS = ResourceCast(BSI.VertexShaderRHI);
|
|
const FVulkanShaderHeader& VSHeader = VS->GetCodeHeader();
|
|
NewPSO->VertexInputState.Generate(ResourceCast(Initializer.BoundShaderState.VertexDeclarationRHI), VSHeader.InOutMask);
|
|
}
|
|
|
|
if((!bIsPrecache || !LRUEvictImmediately())
|
|
#if !UE_BUILD_SHIPPING
|
|
&& 0 == CVarPipelineDebugForceEvictImmediately.GetValueOnAnyThread()
|
|
#endif
|
|
)
|
|
{
|
|
|
|
// Create the pipeline
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
FVulkanShader* VulkanShaders[ShaderStage::NumGraphicsStages];
|
|
GetVulkanGfxShaders(Initializer.BoundShaderState, VulkanShaders);
|
|
|
|
for (int32 StageIdx = 0; StageIdx < ShaderStage::NumGraphicsStages; ++StageIdx)
|
|
{
|
|
uint64 key = GetShaderKeyForGfxStage(BSI, (ShaderStage::EStage)StageIdx);
|
|
check(key == NewPSO->ShaderKeys[StageIdx]);
|
|
}
|
|
|
|
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_PART0);
|
|
|
|
if(!CreateGfxPipelineFromEntry(NewPSO, VulkanShaders, Initializer.GetPSOPrecacheCompileType()))
|
|
{
|
|
DeleteNewPSO(NewPSO);
|
|
return nullptr;
|
|
}
|
|
// Recover if we failed to create the pipeline.
|
|
double EndTime = FPlatformTime::Seconds();
|
|
double Delta = EndTime - BeginTime;
|
|
if (Delta > HitchTime)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Verbose, TEXT("Hitchy gfx pipeline (%.3f ms)"), (float)(Delta * 1000.0));
|
|
}
|
|
}
|
|
FScopeLock Lock(&GraphicsPSOLockedCS);
|
|
FVulkanRHIGraphicsPipelineState** MapPSO = GraphicsPSOLockedMap.Find(Key);
|
|
if(MapPSO)//another thread could end up creating it.
|
|
{
|
|
DeleteNewPSO(NewPSO);
|
|
NewPSO = *MapPSO;
|
|
}
|
|
else
|
|
{
|
|
GraphicsPSOLockedMap.Add(MoveTemp(Key), NewPSO);
|
|
if (bUseLRU && NewPSO->VulkanPipeline != VK_NULL_HANDLE)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_LRU_PSOLock);
|
|
// we add only created pipelines to the LRU
|
|
FScopeLock LockRU(&LRUCS);
|
|
NewPSO->bIsRegistered = true;
|
|
LRUTrim(NewPSO->PipelineCacheSize);
|
|
LRUAdd(NewPSO);
|
|
if(bIsPrecache)
|
|
{
|
|
// immediately evict precache PSOs from the LRU.
|
|
// precache PSOs can saturate the LRU. precache PSOs can end up being trimmed/evicted in the same frame which LRUTrim does not expect.
|
|
// This means we are LRU-ing rendered PSOs only.
|
|
LRURemove(NewPSO);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
NewPSO->bIsRegistered = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return NewPSO;
|
|
}
|
|
|
|
|
|
FGraphicsPipelineStateRHIRef FVulkanDynamicRHI::RHICreateGraphicsPipelineState(const FGraphicsPipelineStateInitializer& PSOInitializer)
|
|
{
|
|
#if VULKAN_ENABLE_AGGRESSIVE_STATS
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanGetOrCreatePipeline);
|
|
#endif
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState);
|
|
LLM_SCOPE_VULKAN(ELLMTagVulkan::VulkanShaders);
|
|
|
|
return Device->PipelineStateCache->RHICreateGraphicsPipelineState(PSOInitializer);
|
|
}
|
|
|
|
FVulkanComputePipeline* FVulkanPipelineStateCacheManager::RHICreateComputePipelineState(FRHIComputeShader* ComputeShaderRHI)
|
|
{
|
|
FVulkanComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
|
|
return Device->GetPipelineStateCache()->GetOrCreateComputePipeline(ComputeShader);
|
|
}
|
|
|
|
FComputePipelineStateRHIRef FVulkanDynamicRHI::RHICreateComputePipelineState(const FComputePipelineStateInitializer& Initializer)
|
|
{
|
|
#if VULKAN_ENABLE_AGGRESSIVE_STATS
|
|
SCOPE_CYCLE_COUNTER(STAT_VulkanGetOrCreatePipeline);
|
|
#endif
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateComputePipelineState);
|
|
LLM_SCOPE_VULKAN(ELLMTagVulkan::VulkanShaders);
|
|
|
|
return Device->PipelineStateCache->RHICreateComputePipelineState(Initializer.ComputeShader);
|
|
}
|
|
|
|
FVulkanComputePipeline* FVulkanPipelineStateCacheManager::GetOrCreateComputePipeline(FVulkanComputeShader* ComputeShader)
|
|
{
|
|
check(ComputeShader);
|
|
const uint64 Key = ComputeShader->GetShaderKey();
|
|
{
|
|
FRWScopeLock ScopeLock(ComputePipelineLock, SLT_ReadOnly);
|
|
FVulkanComputePipeline** ComputePipelinePtr = ComputePipelineEntries.Find(Key);
|
|
if (ComputePipelinePtr)
|
|
{
|
|
return *ComputePipelinePtr;
|
|
}
|
|
}
|
|
|
|
// create pipeline of entry + store entry
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
|
|
FVulkanComputePipeline* ComputePipeline = CreateComputePipelineFromShader(ComputeShader);
|
|
|
|
double EndTime = FPlatformTime::Seconds();
|
|
double Delta = EndTime - BeginTime;
|
|
if (Delta > HitchTime)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Verbose, TEXT("Hitchy compute pipeline key CS (%.3f ms)"), (float)(Delta * 1000.0));
|
|
}
|
|
|
|
{
|
|
FRWScopeLock ScopeLock(ComputePipelineLock, SLT_Write);
|
|
if(0 == ComputePipelineEntries.Find(Key))
|
|
{
|
|
ComputePipelineEntries.FindOrAdd(Key) = ComputePipeline;
|
|
}
|
|
}
|
|
|
|
return ComputePipeline;
|
|
}
|
|
|
|
FVulkanComputePipeline* FVulkanPipelineStateCacheManager::CreateComputePipelineFromShader(FVulkanComputeShader* Shader)
|
|
{
|
|
FVulkanComputePipeline* Pipeline = new FVulkanComputePipeline(Device, Shader);
|
|
|
|
FVulkanDescriptorSetsLayoutInfo DescriptorSetLayoutInfo;
|
|
const FVulkanShaderHeader& CSHeader = Shader->GetCodeHeader();
|
|
FUniformBufferGatherInfo UBGatherInfo;
|
|
DescriptorSetLayoutInfo.ProcessBindingsForStage(VK_SHADER_STAGE_COMPUTE_BIT, ShaderStage::Compute, CSHeader, UBGatherInfo);
|
|
DescriptorSetLayoutInfo.FinalizeBindings<true>(*Device, UBGatherInfo, TArrayView<FRHISamplerState*>(), Shader->UsesBindless());
|
|
FVulkanLayout* Layout = FindOrAddLayout(DescriptorSetLayoutInfo, false, Shader->UsesBindless());
|
|
checkSlow(!Layout->IsGfxLayout());
|
|
|
|
TRefCountPtr<FVulkanShaderModule> ShaderModule = Shader->GetOrCreateHandle(Layout, Layout->GetDescriptorSetLayoutHash());
|
|
|
|
VkComputePipelineCreateInfo PipelineInfo;
|
|
ZeroVulkanStruct(PipelineInfo, VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
|
|
PipelineInfo.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
PipelineInfo.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
PipelineInfo.stage.module = ShaderModule->GetVkShaderModule();
|
|
// main_00000000_00000000
|
|
ANSICHAR EntryPoint[24];
|
|
Shader->GetEntryPoint(EntryPoint, 24);
|
|
PipelineInfo.stage.pName = EntryPoint;
|
|
PipelineInfo.layout = Layout->GetPipelineLayout();
|
|
|
|
if (Shader->UsesBindless())
|
|
{
|
|
PipelineInfo.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
|
|
}
|
|
|
|
VkPipelineShaderStageRequiredSubgroupSizeCreateInfo RequiredSubgroupSizeCreateInfo;
|
|
if ((CSHeader.WaveSize > 0) && Device->GetOptionalExtensions().HasEXTSubgroupSizeControl)
|
|
{
|
|
// Check if supported by this stage
|
|
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& SubgroupSizeControlProperties = Device->GetOptionalExtensionProperties().SubgroupSizeControlProperties;
|
|
if (VKHasAllFlags(SubgroupSizeControlProperties.requiredSubgroupSizeStages, VK_SHADER_STAGE_COMPUTE_BIT))
|
|
{
|
|
// Check if requested size is supported
|
|
if ((CSHeader.WaveSize >= SubgroupSizeControlProperties.minSubgroupSize) && (CSHeader.WaveSize <= SubgroupSizeControlProperties.maxSubgroupSize))
|
|
{
|
|
ZeroVulkanStruct(RequiredSubgroupSizeCreateInfo, VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
|
|
RequiredSubgroupSizeCreateInfo.requiredSubgroupSize = CSHeader.WaveSize;
|
|
PipelineInfo.stage.pNext = &RequiredSubgroupSizeCreateInfo;
|
|
}
|
|
}
|
|
}
|
|
|
|
VkResult Result;
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_VulkanComputePSOCreate);
|
|
FScopedPipelineCache PipelineCacheShared = GlobalPSOCache.Get(EPipelineCacheAccess::Shared);
|
|
Result = VulkanRHI::vkCreateComputePipelines(Device->GetInstanceHandle(), PipelineCacheShared.Get(), 1, &PipelineInfo, VULKAN_CPU_ALLOCATOR, &Pipeline->Pipeline);
|
|
}
|
|
|
|
if (Result != VK_SUCCESS)
|
|
{
|
|
FString ComputeHash = Shader->GetHash().ToString();
|
|
UE_LOG(LogVulkanRHI, Error, TEXT("Failed to create compute pipeline.\nShaders in pipeline: CS: %s"), *ComputeHash);
|
|
Pipeline->SetValid(false);
|
|
}
|
|
|
|
Pipeline->Layout = Layout;
|
|
|
|
INC_DWORD_STAT(STAT_VulkanNumPSOs);
|
|
|
|
return Pipeline;
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::NotifyDeletedComputePipeline(FVulkanComputePipeline* Pipeline)
|
|
{
|
|
FVulkanComputeShader* ComputeShader = ResourceCast(Pipeline->GetComputeShader());
|
|
if (ComputeShader)
|
|
{
|
|
const uint64 Key = ComputeShader->GetShaderKey();
|
|
FRWScopeLock ScopeLock(ComputePipelineLock, SLT_Write);
|
|
ComputePipelineEntries.Remove(Key);
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
static bool SerializeArray(FArchive& Ar, TArray<T>& Array)
|
|
{
|
|
int32 Num = Array.Num();
|
|
Ar << Num;
|
|
if (Ar.IsLoading())
|
|
{
|
|
if (Num < 0)
|
|
{
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
Array.SetNum(Num);
|
|
for (int32 Index = 0; Index < Num; ++Index)
|
|
{
|
|
Ar << Array[Index];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int32 Index = 0; Index < Num; ++Index)
|
|
{
|
|
Ar << Array[Index];
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
void FVulkanPipelineStateCacheManager::FVulkanLRUCacheFile::Save(FArchive& Ar)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Header.Version;
|
|
Ar << Header.SizeOfPipelineSizes;
|
|
|
|
SerializeArray(Ar, PipelineSizes);
|
|
}
|
|
|
|
bool FVulkanPipelineStateCacheManager::FVulkanLRUCacheFile::Load(FArchive& Ar)
|
|
{
|
|
// Modify VERSION if serialization changes
|
|
Ar << Header.Version;
|
|
if (Header.Version != LRU_CACHE_VERSION)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Unable to load lru pipeline cache due to mismatched Version %d != %d"), Header.Version, (int32)LRU_CACHE_VERSION);
|
|
return false;
|
|
}
|
|
|
|
Ar << Header.SizeOfPipelineSizes;
|
|
if (Header.SizeOfPipelineSizes != (int32)(sizeof(FVulkanPipelineSize)))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Unable to load lru pipeline cache due to mismatched size of FVulkanPipelineSize %d != %d; forgot to bump up LRU_CACHE_VERSION?"), Header.SizeOfPipelineSizes, (int32)sizeof(FVulkanPipelineSize));
|
|
return false;
|
|
}
|
|
|
|
if (!SerializeArray(Ar, PipelineSizes))
|
|
{
|
|
UE_LOG(LogVulkanRHI, Warning, TEXT("Unable to load lru pipeline cache due to invalid archive data!"));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
void GetVulkanGfxShaders(const FBoundShaderStateInput& BSI, FVulkanShader* OutShaders[ShaderStage::NumGraphicsStages])
|
|
{
|
|
FMemory::Memzero(OutShaders, ShaderStage::NumGraphicsStages * sizeof(*OutShaders));
|
|
|
|
OutShaders[ShaderStage::Vertex] = ResourceCast(BSI.VertexShaderRHI);
|
|
|
|
if (BSI.PixelShaderRHI)
|
|
{
|
|
OutShaders[ShaderStage::Pixel] = ResourceCast(BSI.PixelShaderRHI);
|
|
}
|
|
|
|
if (BSI.GetMeshShader())
|
|
{
|
|
OutShaders[ShaderStage::Mesh] = ResourceCast(BSI.GetMeshShader());
|
|
}
|
|
|
|
if (BSI.GetAmplificationShader())
|
|
{
|
|
OutShaders[ShaderStage::Task] = ResourceCast(BSI.GetAmplificationShader());
|
|
}
|
|
|
|
if (BSI.GetGeometryShader())
|
|
{
|
|
#if VULKAN_SUPPORTS_GEOMETRY_SHADERS
|
|
OutShaders[ShaderStage::Geometry] = ResourceCast(BSI.GetGeometryShader());
|
|
#else
|
|
ensureMsgf(0, TEXT("Geometry not supported!"));
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void GetVulkanGfxShaders(FVulkanDevice* Device, const FVulkanRHIGraphicsPipelineState& GfxPipelineState, FVulkanShader* OutShaders[ShaderStage::NumGraphicsStages])
|
|
{
|
|
FMemory::Memzero(OutShaders, ShaderStage::NumGraphicsStages * sizeof(*OutShaders));
|
|
Device->GetShaderFactory().LookupGfxShaders(GfxPipelineState.ShaderKeys, OutShaders);
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::TickLRU()
|
|
{
|
|
if(FVulkanChunkedPipelineCacheManager::IsEnabled())
|
|
{
|
|
FVulkanChunkedPipelineCacheManager::Get().Tick();
|
|
}
|
|
|
|
if (!bUseLRU || GVulkanPSOLRUEvictAfterUnusedFrames == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FScopeLock Lock(&LRUCS);
|
|
const int MaxEvictsPerTick = 5;
|
|
for(int i = 0 ; i<MaxEvictsPerTick; i++)
|
|
{
|
|
FVulkanRHIGraphicsPipelineStateLRUNode* Node = LRU.GetTail();
|
|
if (!Node)
|
|
{
|
|
return;
|
|
}
|
|
|
|
TRefCountPtr<FVulkanRHIGraphicsPipelineState> PSO = Node->GetValue();
|
|
bool bTimeToDie = PSO->LRUFrame + GVulkanPSOLRUEvictAfterUnusedFrames < GFrameNumberRenderThread;
|
|
if (bTimeToDie)
|
|
{
|
|
LRUPRINT_DEBUG(TEXT("Evicting after %d frames of unuse (%d : %d) %d\n"), GVulkanPSOLRUEvictAfterUnusedFrames, PSO->LRUFrame, GFrameNumberRenderThread, PSO->PipelineCacheSize);
|
|
LRURemove(PSO);
|
|
}
|
|
else
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUDump()
|
|
{
|
|
#if !UE_BUILD_SHIPPING
|
|
uint32 tid = FPlatformTLS::GetCurrentThreadId();
|
|
LRUPRINT(TEXT("//***** LRU DUMP *****\\\\\n"));
|
|
FVulkanRHIGraphicsPipelineStateLRUNode* Node= LRU.GetHead();
|
|
uint32_t Size = 0;
|
|
uint32_t Index = 0;
|
|
while(Node)
|
|
{
|
|
FVulkanRHIGraphicsPipelineState* PSO = Node->GetValue();
|
|
Size += PSO->PipelineCacheSize;
|
|
LRUPRINT(TEXT("\t%08x PSO %p :: %d :: %06d \\ %06d\n"), tid, PSO, PSO->LRUFrame, PSO->PipelineCacheSize, Size);
|
|
Node = Node->GetNextNode();
|
|
Index++;
|
|
}
|
|
LRUPRINT(TEXT("\\\\***** LRU DUMP *****//\n"));
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
bool FVulkanPipelineStateCacheManager::LRUEvictImmediately()
|
|
{
|
|
return bEvictImmediately && CVarEnableLRU.GetValueOnAnyThread() != 0;
|
|
}
|
|
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUTrim(uint32 nSpaceNeeded)
|
|
{
|
|
if(!bUseLRU)
|
|
{
|
|
return;
|
|
}
|
|
uint32 tid = FPlatformTLS::GetCurrentThreadId();
|
|
uint32 MaxSize = (uint32)CVarLRUMaxPipelineSize.GetValueOnAnyThread();
|
|
while (LRUUsedPipelineSize + nSpaceNeeded > MaxSize || LRUUsedPipelineCount > LRUUsedPipelineMax)
|
|
{
|
|
LRUPRINT_DEBUG(TEXT("%d EVICTING %d + %d > %d || %d > %d\n"), tid, LRUUsedPipelineSize , nSpaceNeeded, MaxSize ,LRUUsedPipelineCount ,LRUUsedPipelineMax);
|
|
LRUEvictOne();
|
|
}
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUDebugEvictAll()
|
|
{
|
|
check(bUseLRU);
|
|
FScopeLock Lock(&LRUCS);
|
|
int Count = 0;
|
|
while(LRUEvictOne(true))
|
|
Count++;
|
|
|
|
LRUPRINT_DEBUG(TEXT("Evicted %d\n"), Count);
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUAdd(FVulkanRHIGraphicsPipelineState* PSO)
|
|
{
|
|
if(!bUseLRU)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FScopeLock Lock(&LRUCS);
|
|
check(PSO->LRUNode == 0);
|
|
check(PSO->GetVulkanPipeline());
|
|
uint32 MaxSize = (uint32)CVarLRUMaxPipelineSize.GetValueOnAnyThread();
|
|
uint32 PSOSize = PSO->PipelineCacheSize;
|
|
|
|
LRUUsedPipelineSize += PSOSize;
|
|
LRUUsedPipelineCount += 1;
|
|
|
|
SET_DWORD_STAT(STAT_VulkanNumPSOLRUSize, LRUUsedPipelineSize);
|
|
SET_DWORD_STAT(STAT_VulkanNumPSOLRU, LRUUsedPipelineCount);
|
|
|
|
check(LRUUsedPipelineSize <= MaxSize); //should always be trimmed before.
|
|
LRU.AddHead(PSO);
|
|
PSO->LRUNode = LRU.GetHead();
|
|
PSO->LRUFrame = GFrameNumberRenderThread;
|
|
LRUPRINT_DEBUG(TEXT("LRUADD %p .. Frame %d :: %d VKPSO %08x, cache size %d\n"), PSO, PSO->LRUFrame, GFrameNumberRenderThread, PSO->GetVulkanPipeline(), PSOSize);
|
|
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUTouch(FVulkanRHIGraphicsPipelineState* PSO)
|
|
{
|
|
if(!bUseLRU)
|
|
{
|
|
return;
|
|
}
|
|
FScopeLock Lock(&LRUCS);
|
|
check((PSO->GetVulkanPipeline() == 0) == (PSO->LRUNode == 0));
|
|
|
|
if(PSO->LRUNode)
|
|
{
|
|
check(PSO->GetVulkanPipeline());
|
|
if(PSO->LRUNode != LRU.GetHead())
|
|
{
|
|
LRU.RemoveNode(PSO->LRUNode, false);
|
|
LRU.AddHead(PSO->LRUNode);
|
|
}
|
|
PSO->LRUFrame = GFrameNumberRenderThread;
|
|
}
|
|
else
|
|
{
|
|
PSO->LRUFrame = GFrameNumberRenderThread;
|
|
if(!PSO->GetVulkanPipeline())
|
|
{
|
|
// Create the pipeline
|
|
double BeginTime = FPlatformTime::Seconds();
|
|
FVulkanShader* VulkanShaders[ShaderStage::NumGraphicsStages];
|
|
|
|
GetVulkanGfxShaders(Device, *PSO, VulkanShaders);
|
|
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_LRUMiss);
|
|
|
|
if (!CreateGfxPipelineFromEntry(PSO, VulkanShaders, FGraphicsPipelineStateInitializer::EPSOPrecacheCompileType::NotSet))
|
|
{
|
|
check(0);
|
|
}
|
|
double EndTime = FPlatformTime::Seconds();
|
|
double Delta = EndTime - BeginTime;
|
|
if (Delta > HitchTime)
|
|
{
|
|
UE_LOG(LogVulkanRHI, Verbose, TEXT("Hitchy gfx pipeline (%.3f ms)"), (float)(Delta * 1000.0));
|
|
}
|
|
|
|
if(bUseLRU)
|
|
{
|
|
LRUTrim(PSO->PipelineCacheSize);
|
|
LRUAdd(PSO);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
check(PSO->LRUNode);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FVulkanRHIGraphicsPipelineState::DeleteVkPipeline(bool bImmediate)
|
|
{
|
|
if (VulkanPipeline != VK_NULL_HANDLE)
|
|
{
|
|
if (bImmediate)
|
|
{
|
|
VulkanRHI::vkDestroyPipeline(Device->GetInstanceHandle(), VulkanPipeline, VULKAN_CPU_ALLOCATOR);
|
|
}
|
|
else
|
|
{
|
|
Device->GetDeferredDeletionQueue().EnqueueResource(VulkanRHI::FDeferredDeletionQueue2::EType::Pipeline, VulkanPipeline);
|
|
}
|
|
VulkanPipeline = VK_NULL_HANDLE;
|
|
}
|
|
|
|
Device->PipelineStateCache->LRUCheckNotInside(this);
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRUCheckNotInside(FVulkanRHIGraphicsPipelineState* PSO)
|
|
{
|
|
FScopeLock Lock(&LRUCS);
|
|
|
|
|
|
FVulkanRHIGraphicsPipelineStateLRUNode* Node = LRU.GetHead();
|
|
uint32_t Size = 0;
|
|
uint32_t Index = 0;
|
|
while (Node)
|
|
{
|
|
FVulkanRHIGraphicsPipelineState* foo = Node->GetValue();
|
|
if (foo == PSO)
|
|
{
|
|
check(0 == foo->LRUNode);
|
|
|
|
}
|
|
check(foo != PSO);
|
|
Node = Node->GetNextNode();
|
|
}
|
|
check(0 == PSO->LRUNode);
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRURemove(FVulkanRHIGraphicsPipelineState* PSO)
|
|
{
|
|
check(bUseLRU);
|
|
if (PSO->LRUNode != 0)
|
|
{
|
|
bool bImmediate = PSO->LRUFrame + 3 < GFrameNumberRenderThread;
|
|
LRU.RemoveNode(PSO->LRUNode);
|
|
PSO->LRUNode = 0;
|
|
|
|
LRUUsedPipelineSize -= PSO->PipelineCacheSize;
|
|
LRUUsedPipelineCount--;
|
|
|
|
PSO->DeleteVkPipeline(bImmediate);
|
|
if (GVulkanReleaseShaderModuleWhenEvictingPSO)
|
|
{
|
|
for (int ShaderStageIndex = 0; ShaderStageIndex < ShaderStage::NumGraphicsStages; ShaderStageIndex++)
|
|
{
|
|
if (PSO->VulkanShaders[ShaderStageIndex] != nullptr)
|
|
{
|
|
PSO->VulkanShaders[ShaderStageIndex]->PurgeShaderModules();
|
|
}
|
|
}
|
|
}
|
|
SET_DWORD_STAT(STAT_VulkanNumPSOLRUSize, LRUUsedPipelineSize);
|
|
SET_DWORD_STAT(STAT_VulkanNumPSOLRU, LRUUsedPipelineCount);
|
|
}
|
|
else
|
|
{
|
|
check(0 == PSO->GetVulkanPipeline());
|
|
}
|
|
}
|
|
|
|
bool FVulkanPipelineStateCacheManager::LRUEvictOne(bool bOnlyOld)
|
|
{
|
|
check(bUseLRU);
|
|
uint32 tid = FPlatformTLS::GetCurrentThreadId();
|
|
FVulkanRHIGraphicsPipelineStateLRUNode* Node = LRU.GetTail();
|
|
check(Node != 0);
|
|
TRefCountPtr<FVulkanRHIGraphicsPipelineState> PSO = Node->GetValue();
|
|
|
|
bool bImmediate = PSO->LRUFrame + 3 < GFrameNumberRenderThread;
|
|
if(bOnlyOld && !bImmediate)
|
|
{
|
|
return false;
|
|
}
|
|
check(PSO->LRUFrame != GFrameNumberRenderThread);
|
|
|
|
LRURemove(PSO);
|
|
return true;
|
|
}
|
|
|
|
void FVulkanPipelineStateCacheManager::LRURemoveAll()
|
|
{
|
|
if (!bUseLRU)
|
|
{
|
|
return;
|
|
}
|
|
check(0);
|
|
}
|
|
|
|
|