Files
UnrealEngine/Engine/Source/Runtime/VulkanRHI/Private/VulkanChunkedPipelineCache.cpp
2025-05-18 13:04:45 +08:00

1254 lines
47 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VulkanChunkedPipelineCache.cpp:
=============================================================================*/
#include "VulkanChunkedPipelineCache.h"
#include "VulkanRHIPrivate.h"
#include "VulkanPipeline.h"
#include "HAL/PlatformFileManager.h"
#include "HAL/FileManager.h"
#include "Misc/Paths.h"
#include "Misc/FileHelper.h"
#include "Serialization/MemoryReader.h"
#include "Serialization/MemoryWriter.h"
#include "VulkanPendingState.h"
#include "VulkanContext.h"
#include "GlobalShader.h"
#include "VulkanLLM.h"
#include "Misc/ScopeRWLock.h"
#include "HAL/PlatformFramePacer.h"
#include "Templates/Greater.h"
#include "Async/MappedFileHandle.h"
/*
* Chunked PSO cache is a collection of multiple VK PSO cache objects (referred to as a cache chunks)
* Cache chunks accumulate only, we do not currently delete them except at startup when the entire cache is removed (when it is deemed over sized).
*
* FVulkanChunkedPipelineCacheManagerImpl implements the external API, it contains:
* a map of PSO precache hash -> cache chunk key, a map of chunkkey->FVulkanPipelineCacheChunk,
* and an LRU of recently used cache chunks, (used when cache eviction is active)
*
* when a PSO is created its hash is checked against the PSOPrecacheHash->cache chunk map, if found FVulkanPipelineCacheChunk is responsible for creating and managing the VK PSO object.
* If not found the most recent FVulkanPipelineCacheChunk is queried for capacity, if space is available the new PSO is added to the chunk.
* If the most recent chunk is full, it is closed and its data is appended to the permanent storage file. a new chunk is created and the PSO is added to it.
*
* At startup the entire cache file is scanned to build the PSOPrecacheHash->chunk map, the entire cache is mmapped but the PSO cache data is not initially loaded.
* FVulkanPipelineCacheChunk will load its data from the mmapped cache file on demand. the cache is then considered 'resident'
*
* If the sum of the resident cache chunks goes beyond a cvar specified memory threshold then the least recently used cache chunk is evicted. (eviction means the VK PSO cache object is destroyed and its memory freed up.)
* Only chunks that are mmapped can be evicted.
*
*/
#define LOGCACHEINFO 0
#if LOGCACHEINFO
#include "ProfilingDebugging/ScopedTimers.h"
#define FScopedTimeToLog FScopedDurationTimeLogger
#else
class FScopedTimeToLog
{
public:
FScopedTimeToLog(FString, class FOutputDevice* output = nullptr) {};
};
#endif
#include "Containers/LockFreeList.h"
#if PLATFORM_ANDROID && USE_ANDROID_FILE
// TODO:
extern FString GExternalFilePath;
#endif
namespace UE
{
namespace Vulkan
{
static int32 GUseChunkedPSOCache = PLATFORM_ANDROID;
static FAutoConsoleVariableRef GVulkanGUseNewCacheCodeCVar(
TEXT("r.Vulkan.UseChunkedPSOCache"),
GUseChunkedPSOCache,
TEXT("\n")
TEXT("")
, ECVF_RenderThreadSafe | ECVF_ReadOnly
);
static FVulkanDevice* GetVulkanDevice()
{
return GVulkanRHI->GetDevice();
}
static VkDevice GetVulkanDeviceHandle()
{
return GetVulkanDevice()->GetInstanceHandle();
}
int32 GMaxPSOsPerChunk = 20;
int32 GTargetResidentCacheSizeMb = 40;
int32 GMaxTotalCacheSizeMb = 300;
int32 GUntouchedChunkEvictTimeSeconds = 60;
static FAutoConsoleVariableRef GMaxSingleCachePSOCountCVar(
TEXT("r.Vulkan.ChunkedPSOCache.MaxSingleCachePSOCount"),
GMaxPSOsPerChunk,
TEXT("The target PSO count for an individual PSO cache.\n")
TEXT("existing caches with different PSO counts are discarded at startup.\n")
TEXT("(default) 20")
, ECVF_RenderThreadSafe | ECVF_ReadOnly
);
static FAutoConsoleVariableRef GTargetResidentCacheSizeCVar(
TEXT("r.Vulkan.ChunkedPSOCache.TargetResidentCacheSizeMb"),
GTargetResidentCacheSizeMb,
TEXT("A target resident cache size in MB, if the combined memory usage of all the currently loaded cache chunks is above this threshold\n")
TEXT("the least recently used chunks will be considered for eviction.\n")
TEXT("(default) 40")
, ECVF_RenderThreadSafe | ECVF_ReadOnly
);
static FAutoConsoleVariableRef GMaxTotalCacheSizeMbCVar(
TEXT("r.Vulkan.ChunkedPSOCache.MaxTotalCacheSizeMb"),
GMaxTotalCacheSizeMb,
TEXT("At startup, if the entire cache is above this threshold the cache will be deleted\n")
TEXT("and rebuilt during the subsequent run.\n")
TEXT("(default) 300\n")
TEXT("0 to disable cache size limit, note that the cache will grow indefinitely.")
, ECVF_RenderThreadSafe | ECVF_ReadOnly
);
static bool GMemoryMapChunkedPSOCache = true;
static FAutoConsoleVariableRef CVarMemoryMapChunkedPSOCache(
TEXT("r.Vulkan.MemoryMapChunkedPSOCache"),
GMemoryMapChunkedPSOCache,
TEXT("If true enabled memory mapping of the chunked vulkan PSO cache. (default)\n")
TEXT("\n")
TEXT("")
,
ECVF_ReadOnly | ECVF_RenderThreadSafe
);
static FAutoConsoleVariableRef GChunkEvictTimeCVar(
TEXT("r.Vulkan.ChunkedPSOCache.ChunkEvictTime"),
GUntouchedChunkEvictTimeSeconds,
TEXT("Time in seconds for a cache chunk to be unused before it can be evicted from ram.\n")
TEXT("(default) 60")
, ECVF_RenderThreadSafe | ECVF_ReadOnly
);
bool CanMemoryMapChunkedPSOCache()
{
return FPlatformProperties::SupportsMemoryMappedFiles() && GMemoryMapChunkedPSOCache;
}
static FCriticalSection CacheVersionedFolderCriticalSection;
static FString GetPSOBinaryCacheVersionedFolder()
{
static FString BinaryCacheVersionKey;
if (BinaryCacheVersionKey.IsEmpty())
{
FScopeLock Lock(&CacheVersionedFolderCriticalSection);
if (BinaryCacheVersionKey.IsEmpty())
{
BinaryCacheVersionKey.Append(LegacyShaderPlatformToShaderFormat(GMaxRHIShaderPlatform).ToString());
const VkPhysicalDeviceProperties& DeviceProperties = GetVulkanDevice()->GetDeviceProperties();
BinaryCacheVersionKey.Append(FString::Printf(TEXT(".%x.%x.%x"), FCrc::MemCrc32(DeviceProperties.pipelineCacheUUID, VK_UUID_SIZE), DeviceProperties.vendorID, DeviceProperties.deviceID));
#if PLATFORM_ANDROID
// Apparently we can't rely on version alone to assume binary compatibility.
// Some devices have reported binary compatibility errors after minor OS updates even though the driver version number does not change.
const FString BuildNumber = FAndroidMisc::GetDeviceBuildNumber();
BinaryCacheVersionKey.Append(BuildNumber);
// Optional configrule variable for triggering a rebuild of the cache.
const FString* ConfigRulesVulkanProgramKey = FAndroidMisc::GetConfigRulesVariable(TEXT("VulkanProgramCacheKey"));
if (ConfigRulesVulkanProgramKey && !ConfigRulesVulkanProgramKey->IsEmpty())
{
BinaryCacheVersionKey.Append(*ConfigRulesVulkanProgramKey);
}
#endif
}
}
check(!BinaryCacheVersionKey.IsEmpty());
return BinaryCacheVersionKey;
}
static FString GetRHICacheRootFolder()
{
#if PLATFORM_ANDROID && USE_ANDROID_FILE
static FString RHICacheTopFolderPath = GExternalFilePath / TEXT("RHICache") / GVulkanRHI->GetName();
#else
static FString RHICacheTopFolderPath = FPaths::ProjectSavedDir() / TEXT("RHICache") / GVulkanRHI->GetName();
#endif
return RHICacheTopFolderPath;
}
using FPipelineCacheChunkKey = uint32;
using FVulkanRHIGraphicsPipelineStateLRU = TDoubleLinkedList<FPipelineCacheChunkKey>;
using FVulkanPipelineCacheChunkLRUNode = FVulkanRHIGraphicsPipelineStateLRU::TDoubleLinkedListNode;
std::atomic<int32> TotalResidentCacheSize = 0;
}
}
using EPSOOperation = FVulkanChunkedPipelineCacheManager::EPSOOperation;
// this class manages a file that combines all of the cache chunks
// It manages access to each chunk via a single mmap alloc. mmap support is relied on for perf, as a fallback where it's not supported synchronous file access is used.
class FVulkanCombinedChunkCacheFile
{
TUniquePtr<IMappedFileHandle> MappedCacheFile;
mutable FRWLock MappingLock;
TUniquePtr<IMappedFileRegion> MappedRegion;
TUniquePtr<FArchive> PSOFileWriter = nullptr;
static const uint32 CacheFileVersion = 5;
constexpr static TCHAR FileName[] = TEXT("VulkanPSOChunks");
void UpdateMapping(uint32 Size)
{
FRWScopeLock Lock(MappingLock, SLT_Write);
MappedRegion = TUniquePtr<IMappedFileRegion>(MappedCacheFile->MapRegion(0, Size));
}
public:
static FVulkanCombinedChunkCacheFile& Get()
{
static FVulkanCombinedChunkCacheFile Impl;
return Impl;
}
const TCHAR* GetFilename() const { return FileName; }
const FString& GetFullCachePath()
{
static FString FullCachePath;
if(FullCachePath.IsEmpty())
{
const FString RootCacheFolder = UE::Vulkan::GetRHICacheRootFolder();
const FString CacheSubDir = UE::Vulkan::GetPSOBinaryCacheVersionedFolder();
const FString CombinedCacheSubDir = FPaths::Combine(RootCacheFolder, CacheSubDir);
FullCachePath = FPaths::Combine(CombinedCacheSubDir, FileName);
}
return FullCachePath;
}
// hash of parameters used while building the cache
// a clash would mean some cache chunks would not be honoring the cvar size limits.
static uint32 GetCacheBuildingParamHash()
{
uint32 ParamHash = 0;
ParamHash = FCrc::MemCrc32(&UE::Vulkan::GMaxPSOsPerChunk, sizeof(UE::Vulkan::GMaxPSOsPerChunk), ParamHash);
return ParamHash;
}
static void WriteFileHeader(FArchive& Archive, uint32 LastValidOffset)
{
uint32 Version = CacheFileVersion;
uint32 ParamHash = GetCacheBuildingParamHash();
uint32 PrecacheHashVersion = FVulkanDynamicRHI::GetPrecachePSOHashVersion();
Archive << Version;
Archive << PrecacheHashVersion;
Archive << ParamHash;
Archive << LastValidOffset;
}
class FPSOArchiveReader
{
const uint8* PSOBytes;
TUniquePtr<FArchive> Archive;
TUniquePtr<FRWScopeLock> MappingLock;
public:
explicit FPSOArchiveReader(TUniquePtr<FArchive>&& InArchive, const uint8* MappedBytes, TUniquePtr<FRWScopeLock>&& LockIn)
: PSOBytes(MappedBytes), Archive(MoveTemp(InArchive)), MappingLock(MoveTemp(LockIn))
{ }
explicit FPSOArchiveReader(TUniquePtr<FArchive>&& InArchive)
: PSOBytes(nullptr), Archive(MoveTemp(InArchive))
{ }
bool IsValid() const { return Archive.IsValid(); }
const uint8* GetData() const { return PSOBytes; }
FArchive* GetArchive() { return Archive.Get(); }
};
bool ReadAllCacheChunks(FPSOArchiveReader& ArchiveReader, TUniqueFunction<void(FPSOArchiveReader& ArchiveReader)> OnFoundCacheChunk)
{
const FString FullCachePath = GetFullCachePath();
FArchive& Archive = *ArchiveReader.GetArchive();
uint32 Version;
Archive << Version;
if (Version != CacheFileVersion)
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: incorrect Cache file version (%d, expected %d)"), Version, CacheFileVersion);
return false;
}
uint32 PrecacheHashVersion;
Archive << PrecacheHashVersion;
if(PrecacheHashVersion != FVulkanDynamicRHI::GetPrecachePSOHashVersion())
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: mismatched hash version (%d, expected %d)"), PrecacheHashVersion, FVulkanDynamicRHI::GetPrecachePSOHashVersion());
return false;
}
uint32 ParamHash;
uint32 LastValidOffset;
Archive << ParamHash;
Archive << LastValidOffset;
if (LastValidOffset == 0 || ParamHash != GetCacheBuildingParamHash())
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: Cache file params have changed (%d, %x=%x)"), LastValidOffset, ParamHash, GetCacheBuildingParamHash());
return false;
}
if (UE::Vulkan::CanMemoryMapChunkedPSOCache())
{
FOpenMappedResult Result = FPlatformFileManager::Get().GetPlatformFile().OpenMappedEx(*FullCachePath);
MappedCacheFile = Result.HasError() ? nullptr : Result.StealValue();
UpdateMapping(LastValidOffset);
}
for (int ChunkIdx = 0; Archive.Tell() < LastValidOffset; ChunkIdx++)
{
OnFoundCacheChunk(ArchiveReader);
}
check(Archive.Tell() == LastValidOffset);
return true;
}
// open the file and callback OnFoundCacheChunk for each contained cache chunk
bool LoadAllCacheChunks(TUniqueFunction<void(FPSOArchiveReader& ArchiveReader)> OnFoundCacheChunk)
{
const FString FullCachePath = GetFullCachePath();
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
FFileStatData Stats = PlatformFile.GetStatData(*FullCachePath);
bool bReadSuccess = false;
TArray<int32> OffsetsToChunkData;
// TODO: simplistic 'GC', blow cache away and rebuild.
// This should be acceptable as the cache rebuild needs to be as transparent as possible..
if (Stats.FileSize < (UE::Vulkan::GMaxTotalCacheSizeMb * 1024 * 1024))
{
FPSOArchiveReader ArchiveReader(TUniquePtr<FArchive>(IFileManager::Get().CreateFileReader(*FullCachePath, FILEREAD_AllowWrite)), nullptr, nullptr);
if (ArchiveReader.IsValid())
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: loading %s"), *FullCachePath);
bReadSuccess = ReadAllCacheChunks(ArchiveReader, MoveTemp(OnFoundCacheChunk));
}
else
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: Cache file could not open %s for reading"), *FullCachePath);
}
}
else
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: deleted oversized cache (%" INT64_FMT ", %d)"), Stats.FileSize , (UE::Vulkan::GMaxTotalCacheSizeMb * 1024 * 1024));
}
// prepare the output handle.
bool bWriteSuccess = OpenWriteHandle(bReadSuccess);
check(bWriteSuccess);
if (!bReadSuccess && bWriteSuccess)
{
// fill in the new header and flush to ensure there's actually something for 'OpenMapped' to use.
WriteFileHeader(*PSOFileWriter, 0);
PSOFileWriter->Flush();
}
if (UE::Vulkan::CanMemoryMapChunkedPSOCache() && !MappedCacheFile.IsValid())
{
FOpenMappedResult Result = FPlatformFileManager::Get().GetPlatformFile().OpenMappedEx(*FullCachePath);
MappedCacheFile = Result.HasError() ? nullptr : Result.StealValue();
}
return bWriteSuccess;
}
FPSOArchiveReader GetReader(uint32 Offset)
{
if (UE::Vulkan::CanMemoryMapChunkedPSOCache())
{
check(MappedRegion);
TUniquePtr<FRWScopeLock> MappingScopeLock(new FRWScopeLock(FVulkanCombinedChunkCacheFile::Get().MappingLock, SLT_ReadOnly));
FMemoryView MemView(MappedRegion->GetMappedPtr() + Offset, MappedRegion->GetMappedSize() - Offset);
return FPSOArchiveReader(TUniquePtr<FArchive>(new FMemoryReaderView(MemView)), (const uint8*)MemView.GetData(), MoveTemp(MappingScopeLock));
}
else
{
FPSOArchiveReader Reader(TUniquePtr<FArchive>(IFileManager::Get().CreateFileReader(*GetFullCachePath(), FILEREAD_AllowWrite)));
if (Reader.GetArchive())
{
Reader.GetArchive()->Seek(Offset);
}
return Reader;
}
}
bool OpenWriteHandle(bool bAppend)
{
check(PSOFileWriter == nullptr);
const FString& FullCachePath = GetFullCachePath();
uint32 WriteFlags = EFileWrite::FILEWRITE_AllowRead | (bAppend ? EFileWrite::FILEWRITE_Append : 0);
PSOFileWriter = TUniquePtr<FArchive>(IFileManager::Get().CreateFileWriter(*FullCachePath, WriteFlags ));
UE_CLOG(PSOFileWriter, LogRHI, Log, TEXT("Opened binary cache for write (%s)"), *FullCachePath);
UE_CLOG(PSOFileWriter == nullptr, LogRHI, Warning, TEXT("Failed to open Vulkan binary cache output file. (%s)"), *FullCachePath);
UE_CLOG(PSOFileWriter && (PSOFileWriter->IsError() || PSOFileWriter->IsCriticalError()), LogRHI, Error, TEXT("Vulkan binary cache output archive error (%s, %d,%d)"), *FullCachePath, PSOFileWriter->IsError(), PSOFileWriter->IsCriticalError());
return PSOFileWriter != nullptr;
}
FArchive& GetWriter()
{
check(PSOFileWriter.IsValid());
return *PSOFileWriter.Get();
}
void FlushWriteHandle()
{
FArchive& Archive = *PSOFileWriter;
Archive.Flush();
uint32 CurrentOffsetPos = Archive.Tell();
Archive.Seek(0);
WriteFileHeader(Archive, CurrentOffsetPos);
Archive.Seek(CurrentOffsetPos);
Archive.Flush();
// Bring the mmap up the new write position.
UpdateMapping(CurrentOffsetPos);
}
};
using FPSOArchiveReader = FVulkanCombinedChunkCacheFile::FPSOArchiveReader;
// A FVulkanPipelineCacheChunk represents a single instance of the VK PSO cache object.
// GMaxPSOsPerChunk is used to limit the size of a chunk during creation.
class FVulkanPipelineCacheChunk
{
using FVulkanPipelineCacheChunkLRUNode = UE::Vulkan::FVulkanPipelineCacheChunkLRUNode;
FVulkanPipelineCacheChunkLRUNode* LRUNode;
public:
explicit FVulkanPipelineCacheChunk(FVulkanPipelineCacheChunkLRUNode* LRUNodeIn = nullptr) : LRUNode(LRUNodeIn) { }
~FVulkanPipelineCacheChunk()
{
if (PipelineCacheObj != VK_NULL_HANDLE)
{
VulkanRHI::vkDestroyPipelineCache(UE::Vulkan::GetVulkanDeviceHandle(), PipelineCacheObj, VULKAN_CPU_ALLOCATOR);
}
}
void SetLRUNode(FVulkanPipelineCacheChunkLRUNode* InLRUNode) { LRUNode = InLRUNode; }
FVulkanPipelineCacheChunkLRUNode* GetLRUNode() { return LRUNode; }
uint32 OffsetWithinFile = 0; // Location of the binary data as required by
void SetCacheOffset(uint32 OffsetWithinFileIn) { OffsetWithinFile = OffsetWithinFileIn; }
uint32 GetCacheOffset() { return OffsetWithinFile; }
void InitNewCache()
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineChunk_init);
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_Write);
FRWScopeLock Lock(CacheStateLock, SLT_Write);
VkPipelineCacheCreateInfo PipelineCacheInfo;
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(UE::Vulkan::GetVulkanDeviceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheObj));
CacheState = ECacheState::Building;
check(LastSaveSize == 0);
check(BinaryCacheFileInfo.Filename.IsEmpty());
BinaryCacheFileInfo.Filename = GenerateNewFileName();
}
void Touch()
{
LastUsedFrame = GFrameNumber;
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_Chunk_Touch);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly);
if (CacheState != ECacheState::FinalizedEvicted)
{
return;
}
}
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_Chunk_TouchReinstate);
PipelineLock.ReleaseReadOnlyLockAndAcquireWriteLock_USE_WITH_CAUTION();
FRWScopeLock Lock(CacheStateLock, SLT_Write);
if(CacheState == ECacheState::FinalizedEvicted)
{
ReinstateDriverBlobInternal();
}
}
}
enum class EPSOCacheFindResult
{
NotFound, // PSO was not found and should contribute to the binary cache.
MatchedExisting, // The PSO hash was not found but the PSO would not create a new entry in the cache, so we say there is a match in the cache.
Found, // PSO hash has an entry in
};
template<class TPipelineState>
bool PSORequiresCompile(TPipelineState* Initializer, FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<TPipelineState>& PSOCreateFunc)
{
const bool bCanTestForExistence = UE::Vulkan::GetVulkanDevice()->GetOptionalExtensions().HasEXTPipelineCreationCacheControl;
if(!bCanTestForExistence)
{
return true;
}
FVulkanChunkedPipelineCacheManager::FPSOCreateFuncParams<TPipelineState> Params(Initializer, PipelineCacheObj, EPSOOperation::CreateIfPresent, PipelineCacheObjLock);
VkResult Result = PSOCreateFunc(Params);
check(Result == VK_SUCCESS || Result == VK_PIPELINE_COMPILE_REQUIRED_EXT);
return Result != VK_SUCCESS;
}
template<class TPipelineState>
VkResult CreatePSO(TPipelineState* Initializer, EPSOCacheFindResult PSOCacheFindResult, FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<TPipelineState> PSOCreateFunc)
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: FVulkanPipelineCacheChunk.CreatePSO tot %d "), FPlatformTLS::GetCurrentThreadId()));
Touch();
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_CreatePSO);
VkResult retcode;
uint64 PSOHash;
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_CreatePSOFUNC);
FScopedTimeToLog Timer2(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: FVulkanPipelineCacheChunk.CreatePSO(lock tot %d "), FPlatformTLS::GetCurrentThreadId()));
PSOHash = GetPrecacheKey(Initializer);
if (PSOCacheFindResult == EPSOCacheFindResult::Found)
{
// it's possible to still add a new PSO if we have hash collisions or imperfect PSO hash calc.
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_CacheManagerCreatePSO_WARM);
FVulkanChunkedPipelineCacheManager::FPSOCreateFuncParams<TPipelineState> Params (Initializer, PipelineCacheObj, EPSOOperation::CreateAndStorePSO, PipelineCacheObjLock);
retcode = PSOCreateFunc(Params);
}
else if(PSOCacheFindResult == EPSOCacheFindResult::NotFound)
{
{
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly);
check(CacheState == ECacheState::Building || CacheState == ECacheState::Closing);
}
// Even though we know we're modifying the cache, we're not taking the write lock.
// Holding the write lock for the duration of the create is too costly, better to let the driver manage this.
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_CacheManagerCreatePSO_COLD);
FVulkanChunkedPipelineCacheManager::FPSOCreateFuncParams<TPipelineState> Params(Initializer, PipelineCacheObj, EPSOOperation::CreateAndStorePSO, PipelineCacheObjLock);
retcode = PSOCreateFunc(Params);
}
}
else
{
retcode = VK_SUCCESS;
}
}
if (PSOCacheFindResult != EPSOCacheFindResult::Found)
{
// if EPSOCacheFindResult::MatchedExisting we record the hash only.
// 'existing' pso's will not contribute to the cache, we dont consider them for pending compiles.
FRWScopeLock Lock(CacheStateLock, SLT_Write);
static_assert(sizeof(void*) == sizeof(uint64));
PSOsToBeFlushed2.Push((void*)PSOHash);
TotalNumPSOs++;
if (PSOCacheFindResult == EPSOCacheFindResult::NotFound)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_Chunk_AddTo);
FScopedTimeToLog Timer3(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: FVulkanPipelineCacheChunk.CreatePSO AddTo tot %d "), FPlatformTLS::GetCurrentThreadId()));
TotalNumUniquePSOs++;
check(PendingAddToCompiles.load() != 0);
--PendingAddToCompiles;
if (CacheState == ECacheState::Closing && PendingAddToCompiles.load() == 0)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineState_CREATE_Chunk_Flush);
#if LOGCACHEINFO
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: create pso - Chunk Capacity reached %s, Finalizing as No pending jobs remain.."), *BinaryCacheFileInfo.Filename);
#endif
SavePSOCacheInternal();
}
}
}
return retcode;
}
// Reserve is protected by the ChunkedPipelineCacheLock mutex.
void ReservePendingPSO()
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineChunk_Reserve);
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly); // take the cache readlock,
check(CacheState == ECacheState::Building);
++PendingAddToCompiles;
if ((TotalNumUniquePSOs + PendingAddToCompiles) >= UE::Vulkan::GMaxPSOsPerChunk)
{
Lock.ReleaseReadOnlyLockAndAcquireWriteLock_USE_WITH_CAUTION();
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineChunk_ReserveWrite);
if ((TotalNumUniquePSOs + PendingAddToCompiles) >= UE::Vulkan::GMaxPSOsPerChunk)
{
// become closed..
#if LOGCACHEINFO
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: Chunk Capacity reached , cachestate %d, CacheSize %d, TotalNumPSOs %d, TotalNumUniquePSOs %d, PendingAddToCompiles %d, %s, pending finalize.."), CacheState, CacheSize, TotalNumPSOs.Load(EMemoryOrder::Relaxed), TotalNumUniquePSOs.Load(EMemoryOrder::Relaxed), PendingAddToCompiles.load(), *BinaryCacheFileInfo.Filename);
#endif
CacheState = ECacheState::Closing;
}
}
}
void LogStats(FString&& LogInfo)
{
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly); // take the cache readlock,
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: %s Cache name %s num PSOs %d (%d unique), cache size %d, last used %llu, resident %d, state %d"), *LogInfo, *BinaryCacheFileInfo.Filename, TotalNumPSOs.Load(EMemoryOrder::Relaxed), TotalNumUniquePSOs.Load(EMemoryOrder::Relaxed), CacheSize, LastUsedFrame.load(), CacheState != ECacheState::FinalizedEvicted, (int)CacheState);
}
bool CanBeEvicted() const
{
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly); // take the cache readlock,
return CacheState == ECacheState::Finalized;
}
bool CheckCapacityReached()
{
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly); // take the cache readlock,
return CacheState != ECacheState::Building;
}
uint32 GetResidentSize() const
{
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_ReadOnly);
FRWScopeLock Lock(CacheStateLock, SLT_ReadOnly); // take the cache readlock,
uint32 Size = CacheState == ECacheState::FinalizedEvicted ? 0 : CacheSize;
return Size;
}
uint64 GetLastUsedFrame() const
{
return LastUsedFrame.load();
}
void Unload()
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineChunk_Unload);
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_Write);
FRWScopeLock Lock(CacheStateLock, SLT_Write);
check(CacheState == ECacheState::Finalized);
#if LOGCACHEINFO
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: Evicting cache %s, %d bytes"), *GetCombinedFilePath(), CacheSize);
#endif
// remove vulkan object from mem
VulkanRHI::vkDestroyPipelineCache(UE::Vulkan::GetVulkanDeviceHandle(), PipelineCacheObj, VULKAN_CPU_ALLOCATOR);
PipelineCacheObj = VK_NULL_HANDLE;
CacheState = ECacheState::FinalizedEvicted;
UE::Vulkan::TotalResidentCacheSize -= CacheSize;
}
enum class ECacheChunkLoadType { LoadAsEvicted, LoadAllData };
void Load(FPSOArchiveReader& ArchiveReader, TArray<uint64>* PSOsFoundOUT, ECacheChunkLoadType LoadType)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_RHICreateGraphicsPipelineChunk_Reserve);
FRWScopeLock PipelineLock(PipelineCacheObjLock, SLT_Write);
FRWScopeLock Lock(CacheStateLock, SLT_Write);
FArchive& Archive = *ArchiveReader.GetArchive();
check(PSOsFoundOUT->IsEmpty());
check(CacheState == ECacheState::Initialized);
SetCacheOffset(Archive.Tell());
LoadInternal(ArchiveReader, PSOsFoundOUT, LoadType);
// if we're loading as evicted then we're in a completed (RO) state.
CacheState = LoadType == ECacheChunkLoadType::LoadAsEvicted ? ECacheState::FinalizedEvicted : ECacheState::Building;
}
private:
mutable FRWLock PipelineCacheObjLock; // This locks access to the PipelineCacheObj, used for create/destroy operations. individual PSO creates use read lock, driver is thread safe, this lock can be held for 100s of ms.
mutable FRWLock CacheStateLock;
VkPipelineCache PipelineCacheObj = VK_NULL_HANDLE;
// TArray<uint64> PSOsToBeFlushed;
TLockFreePointerListUnordered<void, PLATFORM_CACHE_LINE_SIZE> PSOsToBeFlushed2;
int32 PSOsNotFlushed = 0;
int32 CacheSize = 0;
int32 LastSaveSize = 0;
TAtomic<int32> TotalNumPSOs = 0; // the number of PSO hashes that are known to be represented in the cache.
TAtomic<int32> TotalNumUniquePSOs = 0; // the number of PSOs that have contributed to the cache.
static inline const TCHAR TempFileSuffix[] = TEXT("write");
struct FFileInfo
{
FString Filename;
uint32 RawDataOffset = 0xffffffff;
} BinaryCacheFileInfo;
std::atomic<uint64> LastUsedFrame = 0;
std::atomic<uint64> PendingAddToCompiles = 0;
FString GetCombinedFilePath() const
{
const FString RootCacheFolder = UE::Vulkan::GetRHICacheRootFolder();
const FString CacheSubDir = UE::Vulkan::GetPSOBinaryCacheVersionedFolder();
const FString CombinedCacheSubDir = FPaths::Combine(RootCacheFolder, CacheSubDir);
return FPaths::Combine(CombinedCacheSubDir, BinaryCacheFileInfo.Filename);
}
static FString GenerateNewFileName()
{
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
return FString(TEXT("VulkanPSO_")) + FGuid::NewGuid().ToString();
}
void ReinstateDriverBlobInternal()
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: ReinstateDriverBlobInternal tot %d "), FPlatformTLS::GetCurrentThreadId()));
check(CacheState == ECacheState::FinalizedEvicted);
FPSOArchiveReader ArchiveReader = FVulkanCombinedChunkCacheFile::Get().GetReader(GetCacheOffset());
if(ensure(ArchiveReader.IsValid()))
{
LoadInternal(ArchiveReader, nullptr, ECacheChunkLoadType::LoadAllData);
CacheState = ECacheState::Finalized;
}
}
void CreateVKCacheInternal(TConstArrayView<uint8> CacheBytes)
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: CreateVKCacheInternal %s "), *BinaryCacheFileInfo.Filename));
check(CacheState == ECacheState::FinalizedEvicted || CacheState == ECacheState::Initialized);
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_CacheManagerLoadCache);
VkPipelineCacheCreateInfo PipelineCacheInfo;
ZeroVulkanStruct(PipelineCacheInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
PipelineCacheInfo.pInitialData = CacheBytes.GetData();
PipelineCacheInfo.initialDataSize = CacheBytes.Num();
VERIFYVULKANRESULT(VulkanRHI::vkCreatePipelineCache(UE::Vulkan::GetVulkanDeviceHandle(), &PipelineCacheInfo, VULKAN_CPU_ALLOCATOR, &PipelineCacheObj));
}
uint64 GetPrecacheKey(const FVulkanRHIGraphicsPipelineState* GraphicsPipelineState)
{
return GraphicsPipelineState->PrecacheKey;
}
void SavePSOCacheInternal()
{
static FCriticalSection ArchiveMutex;
FScopeLock Lock(&ArchiveMutex);
CacheState = ECacheState::PendingFlush;
FArchive& Archive = FVulkanCombinedChunkCacheFile::Get().GetWriter();
SavePSOCacheInternal(Archive);
FVulkanCombinedChunkCacheFile::Get().FlushWriteHandle();
}
enum class ECacheState
{
Unknown,
Initialized,
Building, // still accumulating PSOs
Closing, // cache reached the size limit waiting for remaining PSOs to complete.
PendingFlush, // capacity reached and all pending PSOs are stored. Cache has not been flushed to storage.
Finalized, // a finished and resident cache. Finalized caches are always backed by storage.
FinalizedEvicted, // a finished cache that is not resident in RAM.
};
ECacheState CacheState = ECacheState::Initialized;
void LoadInternal(FPSOArchiveReader& ArchiveReader, TArray<uint64>* PSOsInFileOUT, ECacheChunkLoadType LoadType)
{
FScopedTimeToLog Timer1(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: LoadInternal TOT ")));
FArchive& Archive = *ArchiveReader.GetArchive();
int32 TotalNumUniquePSOsRead;
Archive << TotalNumUniquePSOsRead;
TotalNumUniquePSOs = TotalNumUniquePSOsRead;
Archive << CacheSize;
LastSaveSize = CacheSize;
BinaryCacheFileInfo.RawDataOffset = Archive.Tell();
if (LoadType == ECacheChunkLoadType::LoadAllData)
{
if (ArchiveReader.GetData())
{
TConstArrayView<uint8> CacheBytes;
{
FScopedTimeToLog Timer3(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: LoadInternal MappedSerialize %d"), CacheSize));
// this avoids a pointless memcpy when mmapping is in use.
const uint8* RawData = ArchiveReader.GetData() + Archive.Tell();
CacheBytes = MakeArrayView(RawData, CacheSize);
Archive.Seek(Archive.Tell() + CacheSize);
}
CreateVKCacheInternal(CacheBytes);
}
else
{
TArray<uint8> CacheBytes;
{
FScopedTimeToLog Timer3(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: LoadInternal Serialize %d"), CacheSize));
CacheBytes.SetNumUninitialized(CacheSize);
Archive.Serialize(CacheBytes.GetData(), CacheSize);;
check(!Archive.IsError());
}
CreateVKCacheInternal(CacheBytes);
}
UE::Vulkan::TotalResidentCacheSize += CacheSize;
}
else
{
Archive.Seek(Archive.Tell() + CacheSize);
}
if (PSOsInFileOUT)
{
uint32 NumHashes = 0;
Archive << NumHashes;
PSOsInFileOUT->SetNumUninitialized(NumHashes);
Archive.Serialize(PSOsInFileOUT->GetData(), (int64)PSOsInFileOUT->Num() * PSOsInFileOUT->GetTypeSize());
TotalNumPSOs = PSOsInFileOUT->Num();
}
}
// Save this cache chunk to the archive. We have the state writelock but cache object read lock, its possible for the cache to be changed (added to).
void SavePSOCacheInternal(FArchive& Archive)
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: SavePSOCacheInternal TOT %s "), *BinaryCacheFileInfo.Filename));
check(CacheState == ECacheState::PendingFlush);
QUICK_SCOPE_CYCLE_COUNTER(STAT_Vulkan_CacheManagerWriteCache);
TArray<uint8> CacheBytes;
size_t TotalSize = 0;
{
FScopedTimeToLog Timer2(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: SavePSOCacheInternal get vk data %s "), *BinaryCacheFileInfo.Filename));
VulkanRHI::vkGetPipelineCacheData(UE::Vulkan::GetVulkanDeviceHandle(), PipelineCacheObj, &TotalSize, nullptr);
CacheBytes.SetNumUninitialized(TotalSize);
VulkanRHI::vkGetPipelineCacheData(UE::Vulkan::GetVulkanDeviceHandle(), PipelineCacheObj, &TotalSize, CacheBytes.GetData());
}
CacheSize = (uint32)TotalSize;
{
FScopedTimeToLog Timer4(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: SavePSOCacheInternal serialize %s %zu "), *BinaryCacheFileInfo.Filename, TotalSize));
SetCacheOffset((uint32)Archive.Tell());
int32 TotalNumUniquePSOsWrite = TotalNumUniquePSOs.Load(EMemoryOrder::Relaxed);
Archive << TotalNumUniquePSOsWrite;
uint32 RawDataSize = CacheBytes.Num();
Archive << RawDataSize;
BinaryCacheFileInfo.RawDataOffset = Archive.Tell();
Archive.Serialize(CacheBytes.GetData(), RawDataSize);
TArray<void*> FlushedPSOHashes;
PSOsToBeFlushed2.PopAll(FlushedPSOHashes);
// Just write out the void* as uint64s.
static_assert(sizeof(void*) == sizeof(uint64));
uint32 HashCount = FlushedPSOHashes.Num();
Archive << HashCount;
Archive.Serialize(FlushedPSOHashes.GetData(), (int64)FlushedPSOHashes.Num() * FlushedPSOHashes.GetTypeSize());
check(!Archive.IsError());
CacheState = ECacheState::Finalized;
UE::Vulkan::TotalResidentCacheSize += CacheSize;
}
}
};
//////////////////////////////////////////////////////////////////////////
class FVulkanChunkedPipelineCacheManagerImpl
{
mutable FRWLock ChunkedPipelineCacheLock; // This guards access to the manager but not individual chunks within the map.
using FCacheChunkKey = UE::Vulkan::FPipelineCacheChunkKey;
using FCacheChunksMap = TMap<FCacheChunkKey, TUniquePtr<FVulkanPipelineCacheChunk>>;
uint32 NumChunks = 0;
FCacheChunksMap CacheChunksMap;
// track number of PSOs created/found. logging use only.
std::atomic<int32> FoundPSOs = 0;
TMap<uint64, FCacheChunkKey> PrecachePSOToCacheChunkMap;
FCriticalSection LRUCS;
using FVulkanPipelineCacheChunkLRUNode = UE::Vulkan::FVulkanPipelineCacheChunkLRUNode;
using FVulkanRHIGraphicsPipelineStateLRU = UE::Vulkan::FVulkanRHIGraphicsPipelineStateLRU;
UE::Vulkan::FVulkanRHIGraphicsPipelineStateLRU CacheChunkLRU;
public:
static FVulkanChunkedPipelineCacheManagerImpl& Get()
{
static FVulkanChunkedPipelineCacheManagerImpl VulkanPipelineCacheManager;
return VulkanPipelineCacheManager;
}
FVulkanChunkedPipelineCacheManagerImpl()
{
LoadAllCaches();
}
template<class TPipelineState>
VkResult CreatePSO(TPipelineState* GraphicsPipelineState, bool bIsPrecompileJob, FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<TPipelineState> PSOCreateFunc)
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: CreatePSO (precomp %d) tot %d "), bIsPrecompileJob, FPlatformTLS::GetCurrentThreadId()));
FCacheChunkKey ChunkKey;
FVulkanPipelineCacheChunk::EPSOCacheFindResult FindResult;
FVulkanPipelineCacheChunk* Chunk = GetOrAddCache(PSOCreateFunc, GraphicsPipelineState, FindResult, ChunkKey);
// dont need to lock ChunkedPipelineCacheLock, we never remove an Chunk once it's added. There should be no PSO create tasks during cache shutdown.
// Do not create cached precompile PSOs
if (!bIsPrecompileJob || FindResult != FVulkanPipelineCacheChunk::EPSOCacheFindResult::Found)
{
FScopedTimeToLog Timer4(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: CreatePSO actualcreate %d "), FPlatformTLS::GetCurrentThreadId()));
uint64 Before = Chunk->GetLastUsedFrame();
VkResult Result = Chunk->CreatePSO(GraphicsPipelineState, FindResult, MoveTemp(PSOCreateFunc));
uint64 After = Chunk->GetLastUsedFrame();
if (Before != After)
{
FScopedTimeToLog Timer3(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: CreatePSO lru %d "), FPlatformTLS::GetCurrentThreadId()));
FScopeLock Lock(&LRUCS);
FVulkanPipelineCacheChunkLRUNode* LRUNode = Chunk->GetLRUNode();
if(CacheChunkLRU.GetHead() != LRUNode)
{
if (LRUNode->GetNextNode() || LRUNode->GetPrevNode() )
{
// if evicted it may not be in the lru yet..
CacheChunkLRU.RemoveNode(LRUNode, false);
}
CacheChunkLRU.AddHead(LRUNode);
}
}
return Result;
}
return VK_SUCCESS;
}
void Tick()
{
#if LOGCACHEINFO
{ // Logging
FRWScopeLock Lock(ChunkedPipelineCacheLock, SLT_ReadOnly);
static int32 CachedChunkCount = 0;
bool bLogMe = CachedChunkCount != PrecachePSOToCacheChunkMap.Num();
if (bLogMe)
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: Total precache PSOs stored %d, Num Chunks %d"), PrecachePSOToCacheChunkMap.Num(), CacheChunksMap.Num());
for (auto& ChunkPair : CacheChunksMap)
{
ChunkPair.Value.Get()->LogStats(FString::Printf(TEXT("Chunk id %d"), ChunkPair.Key));
}
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: PSOs created from cache %d "), FoundPSOs.load());
CachedChunkCount = PrecachePSOToCacheChunkMap.Num();
}
}
#endif
if (UE::Vulkan::GTargetResidentCacheSizeMb)
{
TryUnloadCacheChunks();
}
}
private:
bool LoadAllCaches()
{
FRWScopeLock WriteLock(ChunkedPipelineCacheLock, SLT_Write);
check(CacheChunksMap.IsEmpty() && PrecachePSOToCacheChunkMap.IsEmpty());
// the CacheSubDir is specific to the device+driver+shaderplatform.
// We clean out everything else from the cache folder.
const FString RootCacheFolder = UE::Vulkan::GetRHICacheRootFolder();
const FString CacheSubDir = UE::Vulkan::GetPSOBinaryCacheVersionedFolder();
const FString CombinedCacheSubDir = FPaths::Combine(RootCacheFolder, CacheSubDir);
// delete anything unexpected from the RHI cache root folder.
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
TArray<FString> FoundFiles;
IFileManager::Get().FindFiles(FoundFiles, *(RootCacheFolder / TEXT("*")), true, true);
for (FString& FoundFile : FoundFiles)
{
const FString FullPath = (RootCacheFolder / FoundFile);
const bool bIsDir = PlatformFile.DirectoryExists(*FullPath);
if (FoundFile != CacheSubDir || !bIsDir)
{
bool bSuccess;
if (bIsDir)
{
bSuccess = PlatformFile.DeleteDirectoryRecursively(*FullPath);
}
else
{
bSuccess = PlatformFile.DeleteFile(*FullPath);
}
UE_LOG(LogRHI, Verbose, TEXT("FVulkanChunkedPipelineCacheManagerImpl: Deleting %s %s"), bIsDir ? TEXT("dir") : TEXT("file"), *FullPath);
UE_CLOG(!bSuccess, LogRHI, Warning, TEXT("FVulkanChunkedPipelineCacheManagerImpl: Failed to delete %s"), *FullPath);
}
}
FoundFiles.Reset();
IFileManager::Get().FindFiles(FoundFiles, *(CombinedCacheSubDir / TEXT("*")), true, false);
for (FString& FoundFile : FoundFiles)
{
if (!FoundFile.Equals(FVulkanCombinedChunkCacheFile::Get().GetFilename()))
{
FString FullPath = FPaths::Combine(CombinedCacheSubDir, FoundFile);
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: deleting unexpected Cache %s."), *FullPath);
PlatformFile.DeleteFile(*FullPath);
}
}
bool bReadFailed = false;
TUniqueFunction<void(FPSOArchiveReader& ArchiveReader)> OnFoundCacheChunk = [this, &bReadFailed](FPSOArchiveReader& ArchiveReader)
{
TUniquePtr<FVulkanPipelineCacheChunk> PendingChunk = MakeUnique<FVulkanPipelineCacheChunk>();
TArray<uint64> PSOsOut;
PendingChunk->Load(ArchiveReader, &PSOsOut, FVulkanPipelineCacheChunk::ECacheChunkLoadType::LoadAsEvicted);
FCacheChunkKey NewChunkKey = ++NumChunks;
check(!CacheChunksMap.Contains(NewChunkKey));
FVulkanPipelineCacheChunkLRUNode* NewLRUNode = new FVulkanPipelineCacheChunkLRUNode(NewChunkKey);
{
FScopeLock Lock(&LRUCS);
CacheChunkLRU.AddHead(NewLRUNode);
PendingChunk->SetLRUNode(NewLRUNode);
}
CacheChunksMap.Emplace(NewChunkKey, MoveTemp(PendingChunk));
for (uint64 PSOHash : PSOsOut)
{
PrecachePSOToCacheChunkMap.Add(PSOHash, NewChunkKey);
}
};
bool bSuccess = FVulkanCombinedChunkCacheFile::Get().LoadAllCacheChunks(MoveTemp(OnFoundCacheChunk));
if (!bSuccess)
{
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: failed to open chunked PSO cache file, PSO caching is disabled."));
UE::Vulkan::GUseChunkedPSOCache = 0;
}
return bSuccess;
}
static uint64 GetPrecacheHash(const FVulkanRHIGraphicsPipelineState* GFXState) { return GFXState->PrecacheKey; }
template<class TPipelineState, class TInitializer>
FVulkanPipelineCacheChunk* GetChunk(FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<TPipelineState>& PSOCreateFunc, const TInitializer& Initializer, FVulkanPipelineCacheChunk::EPSOCacheFindResult& FindResult, FCacheChunkKey& ChunkKeyOUT, FRWScopeLock& Lock, const bool bTryAdd = false)
{
FVulkanPipelineCacheChunk* ReturnChunk = nullptr;
uint64 PSOPrecacheKey = GetPrecacheHash(Initializer);
const FCacheChunkKey* FoundChunkKey = PrecachePSOToCacheChunkMap.Find(PSOPrecacheKey);
if (FoundChunkKey)
{
// we have a cache for this PSO
ReturnChunk = CacheChunksMap.FindChecked(*FoundChunkKey).Get();
FoundPSOs++;
FindResult = FVulkanPipelineCacheChunk::EPSOCacheFindResult::Found;
ChunkKeyOUT = *FoundChunkKey;
}
else if (!bTryAdd)
{
FindResult = FVulkanPipelineCacheChunk::EPSOCacheFindResult::NotFound;
// Try again with the write lock, add if it's still missing.
Lock.ReleaseReadOnlyLockAndAcquireWriteLock_USE_WITH_CAUTION();
return GetChunk(PSOCreateFunc, Initializer, FindResult, ChunkKeyOUT, Lock, true);
}
if (!ReturnChunk)
{
check(bTryAdd);
auto FindChunk = [this,&FindResult,&Initializer,&PSOCreateFunc]
{
FVulkanPipelineCacheChunk* ReturnChunk = nullptr;
TUniquePtr<FVulkanPipelineCacheChunk>* Found = CacheChunksMap.Find(NumChunks);
if (Found)
{
ReturnChunk = (*Found).Get();
}
if ((!ReturnChunk) || (ReturnChunk && ReturnChunk->CheckCapacityReached()))
{
// create and return a new cache chunk.
FCacheChunkKey NewChunk = ++NumChunks;
check(!CacheChunksMap.Contains(NewChunk));
FVulkanPipelineCacheChunkLRUNode* NewLRUNode = new FVulkanPipelineCacheChunkLRUNode(NewChunk);
{
FScopeLock Lock(&LRUCS);
CacheChunkLRU.AddHead(NewLRUNode);
ReturnChunk = CacheChunksMap.Emplace(NewChunk, MakeUnique<FVulkanPipelineCacheChunk>(NewLRUNode)).Get();
}
ReturnChunk->InitNewCache();
}
if(ReturnChunk->PSORequiresCompile(Initializer, PSOCreateFunc))
{
ReturnChunk->ReservePendingPSO();
}
else
{
#if LOGCACHEINFO
UE_LOG(LogVulkanRHI, Log, TEXT("FVulkanChunkedPipelineCacheManager: redundant PSO .."));
#endif
FindResult = FVulkanPipelineCacheChunk::EPSOCacheFindResult::MatchedExisting;
}
return ReturnChunk;
};
uint32 CurrentChunk = NumChunks;
ReturnChunk = FindChunk();
ChunkKeyOUT = NumChunks;
PrecachePSOToCacheChunkMap.Add(PSOPrecacheKey, NumChunks);
}
check(ReturnChunk);
return ReturnChunk;
}
template<class TPipelineState, class TInitializer>
FVulkanPipelineCacheChunk* GetOrAddCache(FVulkanChunkedPipelineCacheManager::FPSOCreateCallbackFunc<TPipelineState>& PSOCreateFunc, const TInitializer& Initializer, FVulkanPipelineCacheChunk::EPSOCacheFindResult& FindResult, FCacheChunkKey& ChunkKeyOUT)
{
FScopedTimeToLog Timer(FString::Printf(TEXT("FVulkanChunkedPipelineCacheManager: GetOrAddCache tot %d "), FPlatformTLS::GetCurrentThreadId()));
FRWScopeLock Lock(ChunkedPipelineCacheLock, SLT_ReadOnly);
return GetChunk(PSOCreateFunc, Initializer, FindResult, ChunkKeyOUT, Lock);
}
void TryUnloadCacheChunks()
{
const uint32 FramePace = FPlatformRHIFramePacer::GetFramePace();
const uint32 LastFrameRequired = GFrameNumber - FMath::Min(GFrameNumber, (uint32)(FramePace * UE::Vulkan::GUntouchedChunkEvictTimeSeconds));
FRWScopeLock CacheLock(ChunkedPipelineCacheLock, SLT_ReadOnly);
FScopeLock LRULock(&LRUCS);
int32 CurrentResidentSize = UE::Vulkan::TotalResidentCacheSize;
const int32 TargetResidentCacheSizeBytes = (UE::Vulkan::GTargetResidentCacheSizeMb * 1024 * 1024);
const int32 MaxToUnloadPerTick = 3;
// unload oldest
FVulkanPipelineCacheChunkLRUNode* CurrentNode = CacheChunkLRU.GetTail();
for (int32 UnloadCount = 0; CurrentNode && UnloadCount < MaxToUnloadPerTick && CurrentResidentSize > TargetResidentCacheSizeBytes;)
{
FVulkanPipelineCacheChunkLRUNode* NextNode = CurrentNode->GetPrevNode();
UE::Vulkan::FPipelineCacheChunkKey ChunkKey = CurrentNode->GetValue();
FVulkanPipelineCacheChunk* FoundCacheChunk = CacheChunksMap.FindChecked(ChunkKey).Get();
int CacheSize = FoundCacheChunk->GetResidentSize();
if (FoundCacheChunk->CanBeEvicted())
{
if (LastFrameRequired < FoundCacheChunk->GetLastUsedFrame())
{
// exit, everything else will be too recent
break;
}
FoundCacheChunk->Unload();
CacheChunkLRU.RemoveNode(CurrentNode, false);
CurrentResidentSize -= CacheSize;
UnloadCount++;
}
CurrentNode = NextNode;
}
}
};
//////////////////////////////////////////////////////////////////////////
// public interface:
static FVulkanChunkedPipelineCacheManager VulkanPipelineCacheManager;
bool FVulkanChunkedPipelineCacheManager::IsEnabled()
{
return UE::Vulkan::GUseChunkedPSOCache != 0;
}
void FVulkanChunkedPipelineCacheManager::Init()
{
if (UE::Vulkan::GUseChunkedPSOCache == 0)
{
return;
}
check(!VulkanPipelineCacheManager.VulkanPipelineCacheManagerImpl.IsValid());
VulkanPipelineCacheManager.VulkanPipelineCacheManagerImpl = MakeUnique<FVulkanChunkedPipelineCacheManagerImpl>();
}
void FVulkanChunkedPipelineCacheManager::Shutdown()
{
if (UE::Vulkan::GUseChunkedPSOCache == 0)
{
return;
}
if (VulkanPipelineCacheManager.VulkanPipelineCacheManagerImpl.IsValid())
{
VulkanPipelineCacheManager.VulkanPipelineCacheManagerImpl = nullptr;
}
}
FVulkanChunkedPipelineCacheManager& FVulkanChunkedPipelineCacheManager::Get()
{
check(UE::Vulkan::GUseChunkedPSOCache);
check(VulkanPipelineCacheManager.VulkanPipelineCacheManagerImpl.IsValid());
return VulkanPipelineCacheManager;
}
template<class TPipelineState>
VkResult FVulkanChunkedPipelineCacheManager::CreatePSO(TPipelineState* GraphicsPipelineState, bool bIsPrecompileJob, FPSOCreateCallbackFunc<TPipelineState> PSOCreateFunc)
{
check(UE::Vulkan::GUseChunkedPSOCache);
return VulkanPipelineCacheManagerImpl->CreatePSO(GraphicsPipelineState, bIsPrecompileJob, MoveTemp(PSOCreateFunc));
}
void FVulkanChunkedPipelineCacheManager::Tick()
{
if (UE::Vulkan::GUseChunkedPSOCache == 0)
{
return;
}
VulkanPipelineCacheManagerImpl->Tick();
}
template VkResult FVulkanChunkedPipelineCacheManager::CreatePSO(FVulkanRHIGraphicsPipelineState* GraphicsPipelineState, bool bIsPrecompileJob, FPSOCreateCallbackFunc<FVulkanRHIGraphicsPipelineState> PSOCreateFunc);