// Copyright Epic Games, Inc. All Rights Reserved. // Implementation of D3D12 Pipelinestate related functions //----------------------------------------------------------------------------- // Include Files //----------------------------------------------------------------------------- #include "D3D12PipelineState.h" #include "D3D12RHIPrivate.h" #include "Hash/CityHash.h" #include "PipelineStateCache.h" static TAutoConsoleVariable CVarPSOStallWarningThresholdInMs( TEXT("D3D12.PSO.StallWarningThresholdInMs"), 100.0f, TEXT("Sets a threshold of when to logs messages about stalls due to PSO creation.\n") TEXT("Value is in milliseconds. (100 is the default)\n"), ECVF_ReadOnly); bool GLowLevelCacheKeepPrecachedPSOs = false; static FAutoConsoleVariableRef CVarPSOPrecacheKeepLowLevel( TEXT("D3D12.PSOPrecache.KeepLowLevel"), GLowLevelCacheKeepPrecachedPSOs, TEXT("Keep in memory the d3d12 PSO blob for precached PSOs. Consumes more memory but reduces stalls.\n"), ECVF_ReadOnly ); bool GLowLevelCacheKeepUsedPSOs = false; static FAutoConsoleVariableRef CVarLowLevelCacheKeepUsedPSOs( TEXT("D3D12.PSO.KeepUsedPSOsInLowLevelCache"), GLowLevelCacheKeepUsedPSOs, TEXT("Keep in memory the d3d12 PSO blob for used PSOs. Consumes more memory but reduces potential stalls.\n"), ECVF_ReadOnly ); /// @cond DOXYGEN_WARNINGS static void TranslateRenderTargetFormats( const FGraphicsPipelineStateInitializer& PsoInit, D3D12_RT_FORMAT_ARRAY& RTFormatArray, DXGI_FORMAT& DSVFormat) { RTFormatArray.NumRenderTargets = PsoInit.ComputeNumValidRenderTargets(); for (uint32 RTIdx = 0; RTIdx < PsoInit.RenderTargetsEnabled; ++RTIdx) { checkSlow(PsoInit.RenderTargetFormats[RTIdx] == PF_Unknown || GPixelFormats[PsoInit.RenderTargetFormats[RTIdx]].Supported); DXGI_FORMAT PlatformFormat = (DXGI_FORMAT)GPixelFormats[PsoInit.RenderTargetFormats[RTIdx]].PlatformFormat; ETextureCreateFlags Flags = PsoInit.RenderTargetFlags[RTIdx]; RTFormatArray.RTFormats[RTIdx] = UE::DXGIUtilities::FindShaderResourceFormat(UE::DXGIUtilities::GetPlatformTextureResourceFormat(PlatformFormat, Flags), EnumHasAnyFlags(Flags, ETextureCreateFlags::SRGB)); } checkSlow(PsoInit.DepthStencilTargetFormat == PF_Unknown || GPixelFormats[PsoInit.DepthStencilTargetFormat].Supported); DXGI_FORMAT PlatformFormat = (DXGI_FORMAT)GPixelFormats[PsoInit.DepthStencilTargetFormat].PlatformFormat; DSVFormat = UE::DXGIUtilities::FindDepthStencilFormat(UE::DXGIUtilities::GetPlatformTextureResourceFormat(PlatformFormat, PsoInit.DepthStencilTargetFlag)); } static FD3D12LowLevelGraphicsPipelineStateDesc GetLowLevelGraphicsPipelineStateDesc(const FGraphicsPipelineStateInitializer& Initializer, const FD3D12RootSignature* RootSignature) { FD3D12LowLevelGraphicsPipelineStateDesc Desc{}; Desc.pRootSignature = RootSignature; Desc.Desc.pRootSignature = RootSignature->GetRootSignature(); #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS Desc.Desc.BlendState = Initializer.BlendState ? FD3D12DynamicRHI::ResourceCast(Initializer.BlendState)->Desc : CD3DX12_BLEND_DESC(D3D12_DEFAULT); #endif // #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS #if !D3D12_USE_DERIVED_PSO Desc.Desc.SampleMask = 0xFFFFFFFF; Desc.Desc.RasterizerState = Initializer.RasterizerState ? FD3D12DynamicRHI::ResourceCast(Initializer.RasterizerState)->Desc : CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); Desc.Desc.DepthStencilState = Initializer.DepthStencilState ? CD3DX12_DEPTH_STENCIL_DESC1(FD3D12DynamicRHI::ResourceCast(Initializer.DepthStencilState)->Desc) : CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEFAULT); #endif // !D3D12_USE_DERIVED_PSO Desc.Desc.PrimitiveTopologyType = D3D12PrimitiveTypeToTopologyType(TranslatePrimitiveType(Initializer.PrimitiveType)); TranslateRenderTargetFormats(Initializer, Desc.Desc.RTFormatArray, Desc.Desc.DSVFormat); Desc.Desc.SampleDesc.Count = Initializer.NumSamples; Desc.Desc.SampleDesc.Quality = GetMaxMSAAQuality(Initializer.NumSamples); if (FD3D12VertexDeclaration* InputLayout = (FD3D12VertexDeclaration*) Initializer.BoundShaderState.VertexDeclarationRHI) { Desc.Desc.InputLayout.NumElements = InputLayout->VertexElements.Num(); Desc.Desc.InputLayout.pInputElementDescs = InputLayout->VertexElements.GetData(); Desc.InputLayoutHash = InputLayout->HashNoStrides; // Vertex stream stride does not affect the D3D12 PSO } #define COPY_SHADER(Initial, Name) \ if (FD3D12##Name##Shader* Shader = (FD3D12##Name##Shader*) Initializer.BoundShaderState.Get##Name##Shader()) \ { \ Desc.Desc.Initial##S = Shader->GetShaderBytecode(); \ Desc.Initial##SHash = Shader->GetBytecodeHash(); \ } COPY_SHADER(V, Vertex); COPY_SHADER(M, Mesh); COPY_SHADER(A, Amplification); COPY_SHADER(P, Pixel); COPY_SHADER(G, Geometry); #undef COPY_SHADER #if D3D12RHI_NEEDS_VENDOR_EXTENSIONS #define EXT_SHADER(Initial, Name) \ if (FD3D12##Name##Shader* Shader = (FD3D12##Name##Shader*) Initializer.BoundShaderState.Get##Name##Shader()) \ { \ if (Shader->VendorExtensions.Num() > 0) \ { \ Desc.Initial##SExtensions = &Shader->VendorExtensions; \ } \ } EXT_SHADER(V, Vertex); EXT_SHADER(M, Mesh); EXT_SHADER(A, Amplification); EXT_SHADER(P, Pixel); EXT_SHADER(G, Geometry); #undef EXT_SHADER #endif #if !D3D12_USE_DERIVED_PSO // TODO: [PSO API] For now, keep DBT enabled, if available, until it is added as part of a member to the Initializer's DepthStencilState Desc.Desc.DepthStencilState.DepthBoundsTestEnable = GSupportsDepthBoundsTest && Initializer.bDepthBounds; #endif // The blend state is liable to have non default values for RenderTarget indices that exceed // NumRenderTargets, this will in turn cause them to have a different `D3D12_PIPELINE_STATE_STREAM_DESC` // as far as the driver is concerned. // Which will then cause a hash collision when using driver level pipeline caching, due to the hash // only being derived from the active render target blend states. #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS for (UINT RenderTarget = Desc.Desc.RTFormatArray.NumRenderTargets; RenderTarget < 8; ++RenderTarget) { Desc.Desc.BlendState.RenderTarget[RenderTarget] = D3D12_RENDER_TARGET_BLEND_DESC{}; } #endif // !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS Desc.bFromPSOFileCache = Initializer.bFromPSOFileCache; return Desc; } static FD3D12ComputePipelineStateDesc GetComputePipelineStateDesc(const FD3D12ComputeShader* ComputeShader, const FD3D12RootSignature* RootSignature) { FD3D12ComputePipelineStateDesc Desc{}; Desc.pRootSignature = RootSignature; Desc.Desc.pRootSignature = RootSignature->GetRootSignature(); Desc.Desc.CS = ComputeShader->GetShaderBytecode(); Desc.CSHash = ComputeShader->GetBytecodeHash(); #if D3D12RHI_NEEDS_VENDOR_EXTENSIONS if (ComputeShader->VendorExtensions.Num() > 0) { Desc.Extensions = &ComputeShader->VendorExtensions; } #endif return Desc; } FD3D12PipelineStateWorker::FD3D12PipelineStateWorker(FD3D12Adapter* Adapter, const ComputePipelineCreationArgs& InArgs) : FD3D12AdapterChild(Adapter) , bIsGraphics(false) { CreationArgs.ComputeArgs = new ComputePipelineCreationArgs_POD(); CreationArgs.ComputeArgs->Init(InArgs.Args); }; FD3D12PipelineStateWorker::FD3D12PipelineStateWorker(FD3D12Adapter* Adapter, const GraphicsPipelineCreationArgs& InArgs) : FD3D12AdapterChild(Adapter) , bIsGraphics(true) { CreationArgs.GraphicsArgs = new GraphicsPipelineCreationArgs_POD(); CreationArgs.GraphicsArgs->Init(InArgs.Args); }; /// @endcond uint64 FD3D12PipelineStateCacheBase::HashData(const void* Data, int32 NumBytes) { return CityHash64((const char*) Data, NumBytes); } uint64 FD3D12PipelineStateCacheBase::HashPSODesc(const FD3D12LowLevelGraphicsPipelineStateDesc& Desc) { struct GraphicsPSOData { ShaderBytecodeHash VSHash; ShaderBytecodeHash MSHash; ShaderBytecodeHash ASHash; ShaderBytecodeHash GSHash; ShaderBytecodeHash PSHash; uint32 InputLayoutHash; #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS uint8 AlphaToCoverageEnable; uint8 IndependentBlendEnable; #endif // #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS #if !D3D12_USE_DERIVED_PSO uint32 SampleMask; D3D12_RASTERIZER_DESC RasterizerState; D3D12_DEPTH_STENCIL_DESC1 DepthStencilState; #endif // #if !D3D12_USE_DERIVED_PSO D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue; D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType; DXGI_FORMAT DSVFormat; DXGI_SAMPLE_DESC SampleDesc; uint32 NodeMask; D3D12_PIPELINE_STATE_FLAGS Flags; }; struct RenderTargetData { DXGI_FORMAT Format; #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS D3D12_RENDER_TARGET_BLEND_DESC BlendDesc; #endif // #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS }; const int32 NumRenderTargets = Desc.Desc.RTFormatArray.NumRenderTargets; const size_t GraphicsPSODataSize = sizeof(GraphicsPSOData); const size_t RenderTargetDataSize = NumRenderTargets * sizeof(RenderTargetData); const size_t TotalDataSize = GraphicsPSODataSize + RenderTargetDataSize; char Data[GraphicsPSODataSize + 8 * sizeof(RenderTargetData)]; FMemory::Memzero(Data, TotalDataSize); GraphicsPSOData* PSOData = (GraphicsPSOData*) Data; RenderTargetData* RTData = (RenderTargetData*) (Data + GraphicsPSODataSize); PSOData->VSHash = Desc.VSHash; PSOData->MSHash = Desc.MSHash; PSOData->ASHash = Desc.ASHash; PSOData->GSHash = Desc.GSHash; PSOData->PSHash = Desc.PSHash; PSOData->InputLayoutHash = Desc.InputLayoutHash; #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS PSOData->AlphaToCoverageEnable = Desc.Desc.BlendState.AlphaToCoverageEnable; PSOData->IndependentBlendEnable = Desc.Desc.BlendState.IndependentBlendEnable; #endif // #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS #if !D3D12_USE_DERIVED_PSO PSOData->SampleMask = Desc.Desc.SampleMask; PSOData->RasterizerState = Desc.Desc.RasterizerState; PSOData->DepthStencilState = Desc.Desc.DepthStencilState; #endif // #if !D3D12_USE_DERIVED_PSO PSOData->IBStripCutValue = Desc.Desc.IBStripCutValue; PSOData->PrimitiveTopologyType = Desc.Desc.PrimitiveTopologyType; PSOData->DSVFormat = Desc.Desc.DSVFormat; PSOData->SampleDesc = Desc.Desc.SampleDesc; PSOData->NodeMask = Desc.Desc.NodeMask; PSOData->Flags = Desc.Desc.Flags; for (int32 RT = 0; RT < NumRenderTargets; RT++) { RTData[RT].Format = Desc.Desc.RTFormatArray.RTFormats[RT]; #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS RTData[RT].BlendDesc = Desc.Desc.BlendState.RenderTarget[RT]; #endif // #if !D3D12_USE_DERIVED_PSO || D3D12_USE_DERIVED_PSO_SHADER_EXPORTS } return HashData(Data, TotalDataSize); } uint64 FD3D12PipelineStateCacheBase::HashPSODesc(const FD3D12ComputePipelineStateDesc& Desc) { struct ComputePSOData { ShaderBytecodeHash CSHash; UINT NodeMask; D3D12_PIPELINE_STATE_FLAGS Flags; }; ComputePSOData Data; FMemory::Memzero(Data); Data.CSHash = Desc.CSHash; Data.NodeMask = Desc.Desc.NodeMask; Data.Flags = Desc.Desc.Flags; return HashData(&Data, sizeof(Data)); } FD3D12PipelineStateCacheBase::FD3D12PipelineStateCacheBase(FD3D12Adapter* InParent) : FD3D12AdapterChild(InParent) { } FD3D12PipelineStateCacheBase::~FD3D12PipelineStateCacheBase() { CleanupPipelineStateCaches(); } FD3D12PipelineState::FD3D12PipelineState(FD3D12Adapter* Parent) : FD3D12AdapterChild(Parent) , FD3D12MultiNodeGPUObject(FRHIGPUMask::All(), FRHIGPUMask::All()) //Create on all, visible on all , Worker(nullptr) , InitState(PSOInitState::Uninitialized) { INC_DWORD_STAT(STAT_D3D12NumPSOs); } FD3D12PipelineState::~FD3D12PipelineState() { check(!UsePSORefCounting() || GetRefCount() == 0); if (Worker) { Worker->EnsureCompletion(true); delete Worker; Worker = nullptr; } DEC_DWORD_STAT(STAT_D3D12NumPSOs); } bool FD3D12PipelineState::UsePSORefCounting() { #if D3D12_USE_DERIVED_PSO return false; #else bool bShouldRefCountForPSOPrecaching = PipelineStateCache::IsPSOPrecachingEnabled() && !GLowLevelCacheKeepPrecachedPSOs; bool bShouldRefCountForUsedPSOs = !GLowLevelCacheKeepUsedPSOs; return bShouldRefCountForPSOPrecaching || bShouldRefCountForUsedPSOs; #endif } ID3D12PipelineState* FD3D12PipelineState::InternalGetPipelineState() { // Only one thread should finalize any tasks and set the cached pipeline state. // Other threads can wait efficiently behind the lock. Using Sleep() can wake threads 1ms too late. FRWScopeLock Lock(GetPipelineStateMutex, SLT_Write); if (Worker) { check(InitState == PSOInitState::Uninitialized); Worker->EnsureCompletion(true); check(Worker->IsWorkDone()); PipelineState = Worker->GetTask().PSO.GetReference(); // Cleanup the worker. delete Worker; Worker = nullptr; InitState = (PipelineState.GetReference() != nullptr)? PSOInitState::Initialized : PSOInitState::CreationFailed; } else { // Busy-wait for the PSO. This avoids giving up our time slice. if (InitState == PSOInitState::Uninitialized) { double StartTime = FPlatformTime::Seconds(); double BusyWaitWarningTime = CVarPSOStallWarningThresholdInMs.GetValueOnAnyThread() * 0.001; while (InitState == PSOInitState::Uninitialized) { const double Time = FPlatformTime::Seconds(); if (Time - StartTime > BusyWaitWarningTime) { UE_LOG(LogD3D12RHI, Log, TEXT("Waited for PSO creation for %fms"), BusyWaitWarningTime * 1000.0); BusyWaitWarningTime *= 2.0; } } } } return PipelineState.GetReference(); } FD3D12PipelineStateCommonData::FD3D12PipelineStateCommonData(const FD3D12RootSignature* InRootSignature, FD3D12PipelineState* InPipelineState) : RootSignature(InRootSignature) , PipelineState(InPipelineState) { } FD3D12GraphicsPipelineState::FD3D12GraphicsPipelineState( const FGraphicsPipelineStateInitializer& Initializer, const FD3D12RootSignature* InRootSignature, FD3D12PipelineState* InPipelineState) : FD3D12PipelineStateCommonData(InRootSignature, InPipelineState) , PipelineStateInitializer(Initializer) , StreamStrides(InPlace, 0) { // hold on to bound RHI resources PipelineStateInitializer.BoundShaderState.AddRefResources(); if (PipelineStateInitializer.BlendState) { PipelineStateInitializer.BlendState->AddRef(); } if (PipelineStateInitializer.RasterizerState) { PipelineStateInitializer.RasterizerState->AddRef(); } if (PipelineStateInitializer.DepthStencilState) { PipelineStateInitializer.DepthStencilState->AddRef(); } if (Initializer.BoundShaderState.VertexDeclarationRHI) { StreamStrides = static_cast(Initializer.BoundShaderState.VertexDeclarationRHI)->StreamStrides; } bShaderNeedsGlobalConstantBuffer[SF_Vertex] = GetVertexShader() && GetVertexShader()->UsesGlobalUniformBuffer(); bShaderNeedsGlobalConstantBuffer[SF_Mesh] = GetMeshShader() && GetMeshShader()->UsesGlobalUniformBuffer(); bShaderNeedsGlobalConstantBuffer[SF_Amplification] = GetAmplificationShader() && GetAmplificationShader()->UsesGlobalUniformBuffer(); bShaderNeedsGlobalConstantBuffer[SF_Pixel] = GetPixelShader() && GetPixelShader()->UsesGlobalUniformBuffer(); bShaderNeedsGlobalConstantBuffer[SF_Geometry] = GetGeometryShader() && GetGeometryShader()->UsesGlobalUniformBuffer(); // GRHISupportsPipelineStateSortKey SetSortKey(InPipelineState->GetContextSortKey()); } FD3D12GraphicsPipelineState::~FD3D12GraphicsPipelineState() { // At this point the object is not safe to use in the PSO cache. // Currently, the PSO cache manages the lifetime but we could potentially // stop doing an AddRef() and remove the PipelineState from any caches at this point. #if D3D12_USE_DERIVED_PSO delete PipelineState; PipelineState = nullptr; #else if (FD3D12PipelineState::UsePSORefCounting() && PipelineState != nullptr) { uint32 RefCount = PipelineState->Release(); check(RefCount > 0); // precache PSO are here to avoid hitches at runtime when we want to create one that is actually used. We don't need to keep them // around as this can add up to a lot of system memory bool bRemovePrecachedPSO = PipelineStateInitializer.bPSOPrecache && !GLowLevelCacheKeepPrecachedPSOs; bool bRemoveUsedPSO = !PipelineStateInitializer.bPSOPrecache && !GLowLevelCacheKeepUsedPSOs; if (RefCount == 1 && (bRemoveUsedPSO || bRemovePrecachedPSO)) { FD3D12DynamicRHI* D3D12RHI = FD3D12DynamicRHI::GetD3DRHI(); FD3D12PipelineStateCache& PSOCache = D3D12RHI->GetAdapter().GetPSOCache(); // NB: it's possible that this remove will not do anything because another thread might have requested the same PSO since we entered the if PSOCache.RemoveFromLowLevelCache(PipelineState, PipelineStateInitializer, RootSignature); } } #endif // release bound RHI resources PipelineStateInitializer.BoundShaderState.ReleaseResources(); if (PipelineStateInitializer.BlendState) { PipelineStateInitializer.BlendState->Release(); } if (PipelineStateInitializer.RasterizerState) { PipelineStateInitializer.RasterizerState->Release(); } if (PipelineStateInitializer.DepthStencilState) { PipelineStateInitializer.DepthStencilState->Release(); } } FD3D12ComputePipelineState::FD3D12ComputePipelineState(const FComputePipelineStateInitializer& InInitializer, const FD3D12RootSignature* InRootSignature, FD3D12PipelineState* InPipelineState) : FRHIComputePipelineState(InInitializer.ComputeShader) , FD3D12PipelineStateCommonData(InRootSignature, InPipelineState) { FD3D12ComputeShader* D3D12ComputeShader = static_cast(InInitializer.ComputeShader); bShaderNeedsGlobalConstantBuffer = D3D12ComputeShader && D3D12ComputeShader->UsesGlobalUniformBuffer(); bPSOPrecache = InInitializer.bPSOPrecache; } FD3D12ComputePipelineState::~FD3D12ComputePipelineState() { // At this point the object is not safe to use in the PSO cache. // Currently, the PSO cache manages the lifetime but we could potentially // stop doing an AddRef() and remove the PipelineState from any caches at this point. if (FD3D12PipelineState::UsePSORefCounting() && PipelineState != nullptr) { uint32 RefCount = PipelineState->Release(); check(RefCount > 0); bool bRemovePrecachedPSO = bPSOPrecache && !GLowLevelCacheKeepPrecachedPSOs; bool bRemoveUsedPSO = !bPSOPrecache && !GLowLevelCacheKeepUsedPSOs; if (RefCount == 1 && (bRemoveUsedPSO || bRemovePrecachedPSO)) { FD3D12DynamicRHI* D3D12RHI = FD3D12DynamicRHI::GetD3DRHI(); FD3D12PipelineStateCache& PSOCache = D3D12RHI->GetAdapter().GetPSOCache(); // NB: it's possible that this remove will not do anything because another thread might have requested the same PSO since we entered the if PSOCache.RemoveFromLowLevelCache(PipelineState, static_cast(GetComputeShader()), RootSignature); } } } void FD3D12PipelineStateCacheBase::CleanupPipelineStateCaches() { #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE // The runtime caches manage the lifetime of their FD3D12GraphicsPipelineState and FD3D12ComputePipelineState. // We need to release them. { FRWScopeLock Lock(InitializerToGraphicsPipelineMapMutex, FRWScopeLockType::SLT_Write); for (auto Pair : InitializerToGraphicsPipelineMap) { FD3D12GraphicsPipelineState* GraphicsPipelineState = Pair.Value; ensure(GIsRHIInitialized || (!GIsRHIInitialized && GraphicsPipelineState->GetRefCount() == 1)); GraphicsPipelineState->Release(); } InitializerToGraphicsPipelineMap.Reset(); } { FRWScopeLock Lock(ComputeShaderToComputePipelineMapMutex, FRWScopeLockType::SLT_Write); for (auto Pair : ComputeShaderToComputePipelineMap) { FD3D12ComputePipelineState* ComputePipelineState = Pair.Value; ensure(GIsRHIInitialized || (!GIsRHIInitialized && ComputePipelineState->GetRefCount() == 1)); ComputePipelineState->Release(); } ComputeShaderToComputePipelineMap.Reset(); } #endif { FRWScopeLock Lock(LowLevelGraphicsPipelineStateCacheMutex, FRWScopeLockType::SLT_Write); for (auto Iter = LowLevelGraphicsPipelineStateCache.CreateConstIterator(); Iter; ++Iter) { const FD3D12PipelineState* const PipelineState = Iter.Value(); if (FD3D12PipelineState::UsePSORefCounting()) { if (PipelineState) { PipelineState->Release(); } } else { delete PipelineState; } } LowLevelGraphicsPipelineStateCache.Empty(); } { FRWScopeLock Lock(ComputePipelineStateCacheMutex, FRWScopeLockType::SLT_Write); for (auto Iter = ComputePipelineStateCache.CreateConstIterator(); Iter; ++Iter) { const FD3D12PipelineState* const PipelineState = Iter.Value(); if (FD3D12PipelineState::UsePSORefCounting()) { if (PipelineState) { PipelineState->Release(); } } else { delete PipelineState; } } ComputePipelineStateCache.Empty(); } } #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE FD3D12GraphicsPipelineState* FD3D12PipelineStateCacheBase::AddToRuntimeCache(const FGraphicsPipelineStateInitializer& Initializer, uint32 InitializerHash, const FD3D12RootSignature* RootSignature, FD3D12PipelineState* PipelineState) { // Lifetime managed by the runtime cache. AddRef() so the upper level doesn't delete the FD3D12GraphicsPipelineState objects while they're still in the runtime cache. // One alternative is to remove the object from the runtime cache in the FD3D12GraphicsPipelineState destructor. FD3D12GraphicsPipelineState* const GraphicsPipelineState = new FD3D12GraphicsPipelineState(Initializer, RootSignature, PipelineState); GraphicsPipelineState->AddRef(); check(GraphicsPipelineState && InitializerHash != 0); check(GraphicsPipelineState->PipelineState != nullptr); { FRWScopeLock Lock(InitializerToGraphicsPipelineMapMutex, FRWScopeLockType::SLT_Write); InitializerToGraphicsPipelineMap.Add(FInitializerToGPSOMapKey(&GraphicsPipelineState->PipelineStateInitializer, InitializerHash), GraphicsPipelineState); } INC_DWORD_STAT(STAT_PSOGraphicsNumHighlevelCacheEntries); return GraphicsPipelineState; } #endif FD3D12PipelineState* FD3D12PipelineStateCacheBase::FindInLowLevelCache(const FD3D12LowLevelGraphicsPipelineStateDesc& Desc) { check(Desc.CombinedHash != 0); { FRWScopeLock Lock(LowLevelGraphicsPipelineStateCacheMutex, FRWScopeLockType::SLT_ReadOnly); FD3D12PipelineState** Found = LowLevelGraphicsPipelineStateCache.Find(Desc); if (Found) { INC_DWORD_STAT(STAT_PSOGraphicsLowlevelCacheHit); if (FD3D12PipelineState::UsePSORefCounting()) { (*Found)->AddRef().CheckAtLeast(2); } return *Found; } } INC_DWORD_STAT(STAT_PSOGraphicsLowlevelCacheMiss); return nullptr; } FD3D12PipelineState* FD3D12PipelineStateCacheBase::CreateAndAddToLowLevelCache(const FD3D12LowLevelGraphicsPipelineStateDesc& Desc) { // Add PSO to low level cache. FD3D12PipelineState* PipelineState = nullptr; AddToLowLevelCache(Desc, &PipelineState, [this](FD3D12PipelineState** PipelineState, const FD3D12LowLevelGraphicsPipelineStateDesc& Desc) { OnPSOCreated(*PipelineState, Desc); }); return PipelineState; } void FD3D12PipelineStateCacheBase::RemoveFromLowLevelCache(FD3D12PipelineState* PipelineState, const FGraphicsPipelineStateInitializer& PipelineStateInitializer, const FD3D12RootSignature* RootSignature) { if (!FD3D12PipelineState::UsePSORefCounting()) { checkNoEntry(); return; } FRWScopeLock Lock(LowLevelGraphicsPipelineStateCacheMutex, FRWScopeLockType::SLT_Write); // By the time we reach this function, it's possible another thread made a request for the same PSO if (PipelineState->Release() == 0) { FD3D12LowLevelGraphicsPipelineStateDesc LowLevelDesc = GetLowLevelGraphicsPipelineStateDesc(PipelineStateInitializer, RootSignature); LowLevelDesc.Desc.NodeMask = FRHIGPUMask::All().GetNative(); LowLevelDesc.CombinedHash = FD3D12PipelineStateCacheBase::HashPSODesc(LowLevelDesc); int32 ElementsRemoved = LowLevelGraphicsPipelineStateCache.Remove(LowLevelDesc); ensure(ElementsRemoved == 1); } else { // If another thread requested the pipeline state, we need to restore the refcount we just decremented PipelineState->AddRef().CheckAtLeast(2); } } void FD3D12PipelineStateCacheBase::RemoveFromLowLevelCache(FD3D12PipelineState* PipelineState, const FD3D12ComputeShader* ComputeShader, const FD3D12RootSignature* RootSignature) { if (!FD3D12PipelineState::UsePSORefCounting()) { checkNoEntry(); return; } FRWScopeLock Lock(ComputePipelineStateCacheMutex, FRWScopeLockType::SLT_Write); // By the time we reach this function, it's possible another thread made a request for the same PSO if (PipelineState->Release() == 0) { FD3D12ComputePipelineStateDesc LowLevelDesc = GetComputePipelineStateDesc(ComputeShader, RootSignature); LowLevelDesc.Desc.NodeMask = FRHIGPUMask::All().GetNative(); LowLevelDesc.CombinedHash = FD3D12PipelineStateCacheBase::HashPSODesc(LowLevelDesc); int32 ElementsRemoved = ComputePipelineStateCache.Remove(LowLevelDesc); ensure(ElementsRemoved == 1); } else { // If another thread requested the pipeline state, we need to restore the refcount we just decremented PipelineState->AddRef().CheckAtLeast(2); } } void FD3D12PipelineStateCacheBase::AddToLowLevelCache(const FD3D12LowLevelGraphicsPipelineStateDesc& Desc, FD3D12PipelineState** OutPipelineState, const FPostCreateGraphicCallback& PostCreateCallback) { check(Desc.CombinedHash != 0); // Double check the desc doesn't already exist while the lock is taken. // This avoids having multiple threads try to create the same PSO. { FRWScopeLock Lock(LowLevelGraphicsPipelineStateCacheMutex, FRWScopeLockType::SLT_Write); FD3D12PipelineState** const PipelineState = LowLevelGraphicsPipelineStateCache.Find(Desc); if (PipelineState) { // This desc already exists. *OutPipelineState = *PipelineState; if (FD3D12PipelineState::UsePSORefCounting()) { (*OutPipelineState)->AddRef(); } return; } // Add the FD3D12PipelineState object to the cache, but don't actually create the underlying PSO yet while the lock is taken. // This allows multiple threads to create different PSOs at the same time. INC_DWORD_STAT(STAT_PSOGraphicsNumLowlevelCacheEntries); FD3D12PipelineState* NewPipelineState = new FD3D12PipelineState(GetParentAdapter()); LowLevelGraphicsPipelineStateCache.Add(Desc, NewPipelineState); // This AddRef is for the low level cache if (FD3D12PipelineState::UsePSORefCounting()) { NewPipelineState->AddRef(); } *OutPipelineState = NewPipelineState; // This AddRef is for the FD3D12GraphicsPipelineState requesting it if (FD3D12PipelineState::UsePSORefCounting()) { (*OutPipelineState)->AddRef(); } } // Create the underlying PSO and then perform any other additional tasks like cleaning up/adding to caches, etc. PostCreateCallback(OutPipelineState, Desc); } #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE FD3D12ComputePipelineState* FD3D12PipelineStateCacheBase::AddToRuntimeCache(FD3D12ComputeShader* ComputeShader, FD3D12PipelineState* PipelineState) { // Lifetime managed by the runtime cache. AddRef() so the upper level doesn't delete the FD3D12ComputePipelineState objects while they're still in the runtime cache. // One alternative is to remove the object from the runtime cache in the FD3D12ComputePipelineState destructor. FD3D12ComputePipelineState* const ComputePipelineState = new FD3D12ComputePipelineState(ComputeShader, PipelineState); ComputePipelineState->AddRef(); check(ComputePipelineState && ComputeShader != nullptr); check(ComputePipelineState->PipelineState != nullptr); { FRWScopeLock Lock(ComputeShaderToComputePipelineMapMutex, FRWScopeLockType::SLT_Write); ComputeShaderToComputePipelineMap.Add(ComputeShader, ComputePipelineState); } INC_DWORD_STAT(STAT_PSOComputeNumHighlevelCacheEntries); return ComputePipelineState; } #endif FD3D12PipelineState* FD3D12PipelineStateCacheBase::FindInLowLevelCache(const FD3D12ComputePipelineStateDesc& Desc) { check(Desc.CombinedHash != 0); { FRWScopeLock Lock(ComputePipelineStateCacheMutex, FRWScopeLockType::SLT_ReadOnly); FD3D12PipelineState** Found = ComputePipelineStateCache.Find(Desc); if (Found) { INC_DWORD_STAT(STAT_PSOComputeLowlevelCacheHit); if (FD3D12PipelineState::UsePSORefCounting()) { (*Found)->AddRef().CheckAtLeast(2); } return *Found; } } INC_DWORD_STAT(STAT_PSOComputeLowlevelCacheMiss); return nullptr; } FD3D12PipelineState* FD3D12PipelineStateCacheBase::CreateAndAddToLowLevelCache(const FD3D12ComputePipelineStateDesc& Desc) { // Add PSO to low level cache. FD3D12PipelineState* PipelineState = nullptr; AddToLowLevelCache(Desc, &PipelineState, [&](FD3D12PipelineState* PipelineState, const FD3D12ComputePipelineStateDesc& Desc) { OnPSOCreated(PipelineState, Desc); }); return PipelineState; } void FD3D12PipelineStateCacheBase::AddToLowLevelCache(const FD3D12ComputePipelineStateDesc& Desc, FD3D12PipelineState** OutPipelineState, const FPostCreateComputeCallback& PostCreateCallback) { check(Desc.CombinedHash != 0); // Double check the desc doesn't already exist while the lock is taken. // This avoids having multiple threads try to create the same PSO. { FRWScopeLock Lock(ComputePipelineStateCacheMutex, FRWScopeLockType::SLT_Write); FD3D12PipelineState** const PipelineState = ComputePipelineStateCache.Find(Desc); if (PipelineState) { // This desc already exists. *OutPipelineState = *PipelineState; if (FD3D12PipelineState::UsePSORefCounting()) { (*OutPipelineState)->AddRef(); } return; } // Add the FD3D12PipelineState object to the cache, but don't actually create the underlying PSO yet while the lock is taken. // This allows multiple threads to create different PSOs at the same time. INC_DWORD_STAT(STAT_PSOComputeNumLowlevelCacheEntries); FD3D12PipelineState* NewPipelineState = new FD3D12PipelineState(GetParentAdapter()); ComputePipelineStateCache.Add(Desc, NewPipelineState); // This AddRef is for the low level cache. if (FD3D12PipelineState::UsePSORefCounting()) { NewPipelineState->AddRef(); } *OutPipelineState = NewPipelineState; // This AddRef is for the FD3D12ComputePipelineState requesting it. if (FD3D12PipelineState::UsePSORefCounting()) { (*OutPipelineState)->AddRef(); } } // Create the underlying PSO and then perform any other additional tasks like cleaning up/adding to caches, etc. PostCreateCallback(*OutPipelineState, Desc); } #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE FD3D12GraphicsPipelineState* FD3D12PipelineStateCacheBase::FindInRuntimeCache(const FGraphicsPipelineStateInitializer& Initializer, uint32& OutHash) { OutHash = HashData(&Initializer, sizeof(Initializer)); { FRWScopeLock Lock(InitializerToGraphicsPipelineMapMutex, FRWScopeLockType::SLT_ReadOnly); FD3D12GraphicsPipelineState** GraphicsPipelineState = InitializerToGraphicsPipelineMap.Find(FInitializerToGPSOMapKey(&Initializer, OutHash)); if (GraphicsPipelineState) { INC_DWORD_STAT(STAT_PSOGraphicsHighlevelCacheHit); return *GraphicsPipelineState; } } INC_DWORD_STAT(STAT_PSOGraphicsHighlevelCacheMiss); return nullptr; } #endif FD3D12GraphicsPipelineState* FD3D12PipelineStateCacheBase::FindInLoadedCache( const FGraphicsPipelineStateInitializer& Initializer, #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE uint32 InitializerHash, #endif const FD3D12RootSignature* RootSignature, FD3D12LowLevelGraphicsPipelineStateDesc& OutLowLevelDesc) { // TODO: For now PSOs will be created on every node of the LDA chain. OutLowLevelDesc = GetLowLevelGraphicsPipelineStateDesc(Initializer, RootSignature); OutLowLevelDesc.Desc.NodeMask = FRHIGPUMask::All().GetNative(); OutLowLevelDesc.CombinedHash = FD3D12PipelineStateCacheBase::HashPSODesc(OutLowLevelDesc); // First try to find the PSO in the low level cache that can be populated from disk. FD3D12PipelineState* PipelineState = FindInLowLevelCache(OutLowLevelDesc); #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE if (PipelineState) { // Add the PSO to the runtime cache for better performance next time. return AddToRuntimeCache(Initializer, InitializerHash, RootSignature, PipelineState); } // TODO: Try to load from a PipelineLibrary now instead of at Create time. return nullptr; #else if (PipelineState && PipelineState->IsValid()) { return new FD3D12GraphicsPipelineState(Initializer, RootSignature, PipelineState); } return nullptr; #endif } FD3D12GraphicsPipelineState* FD3D12PipelineStateCacheBase::CreateAndAdd( const FGraphicsPipelineStateInitializer& Initializer, #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE uint32 InitializerHash, #endif const FD3D12RootSignature* RootSignature, const FD3D12LowLevelGraphicsPipelineStateDesc& LowLevelDesc) { FD3D12PipelineState* const PipelineState = CreateAndAddToLowLevelCache(LowLevelDesc); #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE if (PipelineState == nullptr) { return nullptr; } // Add the PSO to the runtime cache for better performance next time. return AddToRuntimeCache(Initializer, InitializerHash, RootSignature, PipelineState); #else if (PipelineState && PipelineState->IsValid()) { return new FD3D12GraphicsPipelineState(Initializer, RootSignature, PipelineState); } return nullptr; #endif } #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE FD3D12ComputePipelineState* FD3D12PipelineStateCacheBase::FindInRuntimeCache(const FD3D12ComputeShader* ComputeShader) { { FRWScopeLock Lock(ComputeShaderToComputePipelineMapMutex, FRWScopeLockType::SLT_ReadOnly); FD3D12ComputePipelineState** ComputePipelineState = ComputeShaderToComputePipelineMap.Find(ComputeShader); if (ComputePipelineState) { INC_DWORD_STAT(STAT_PSOComputeHighlevelCacheHit); return *ComputePipelineState; } } INC_DWORD_STAT(STAT_PSOComputeHighlevelCacheMiss); return nullptr; } #endif FD3D12ComputePipelineState* FD3D12PipelineStateCacheBase::FindInLoadedCache(const FComputePipelineStateInitializer& Initializer, const FD3D12RootSignature* RootSignature, FD3D12ComputePipelineStateDesc& OutLowLevelDesc) { // TODO: For now PSOs will be created on every node of the LDA chain. OutLowLevelDesc = GetComputePipelineStateDesc(static_cast(Initializer.ComputeShader), RootSignature); OutLowLevelDesc.Desc.NodeMask = FRHIGPUMask::All().GetNative(); OutLowLevelDesc.CombinedHash = FD3D12PipelineStateCacheBase::HashPSODesc(OutLowLevelDesc); // First try to find the PSO in the low level cache that can be populated from disk. FD3D12PipelineState* PipelineState = FindInLowLevelCache(OutLowLevelDesc); #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE if (PipelineState) { // Add the PSO to the runtime cache for better performance next time. return AddToRuntimeCache(ComputeShader, PipelineState); } // TODO: Try to load from a PipelineLibrary now instead of at Create time. return nullptr; #else if (PipelineState && PipelineState->IsValid()) { return new FD3D12ComputePipelineState(Initializer, RootSignature, PipelineState); } return nullptr; #endif } FD3D12ComputePipelineState* FD3D12PipelineStateCacheBase::CreateAndAdd(const FComputePipelineStateInitializer& Initializer, const FD3D12RootSignature* RootSignature, const FD3D12ComputePipelineStateDesc& LowLevelDesc) { FD3D12PipelineState* const PipelineState = CreateAndAddToLowLevelCache(LowLevelDesc); #if D3D12RHI_USE_HIGH_LEVEL_PSO_CACHE // Add the PSO to the runtime cache for better performance next time. return AddToRuntimeCache(ComputeShader, PipelineState); #else if (PipelineState && PipelineState->IsValid()) { return new FD3D12ComputePipelineState(Initializer, RootSignature, PipelineState); } return nullptr; #endif }