1645 lines
58 KiB
C++
1645 lines
58 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
// Implementation of Device Context State Caching to improve draw
|
|
// thread performance by removing redundant device context calls.
|
|
|
|
#include "D3D12RHIPrivate.h"
|
|
#include "D3D12ExplicitDescriptorCache.h"
|
|
|
|
// This value defines how many descriptors will be in the device global descriptor heap. This heap contains all shader visible view descriptors.
|
|
// Other shader visible descriptor heaps (e.g. OnlineViewHeap) are allocated from this pool. Non-visible heaps (e.g. LocalViewHeap) are allocated as standalone.
|
|
int32 GGlobalResourceDescriptorHeapSize = 1000 * 1000;
|
|
static FAutoConsoleVariableRef CVarGlobalResourceDescriptorHeapSize(
|
|
TEXT("D3D12.GlobalResourceDescriptorHeapSize"),
|
|
GGlobalResourceDescriptorHeapSize,
|
|
TEXT("Global resource descriptor heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
int32 GGlobalSamplerDescriptorHeapSize = 2048;
|
|
static FAutoConsoleVariableRef CVarGlobalSamplerDescriptorHeapSize(
|
|
TEXT("D3D12.GlobalSamplerDescriptorHeapSize"),
|
|
GGlobalSamplerDescriptorHeapSize,
|
|
TEXT("Global sampler descriptor heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
// This value defines how many descriptors will be in the device local view heap which
|
|
// This should be tweaked for each title as heaps require VRAM. The default value of 512k takes up ~16MB
|
|
int32 GLocalViewHeapSize = 500 * 1000;
|
|
static FAutoConsoleVariableRef CVarLocalViewHeapSize(
|
|
TEXT("D3D12.LocalViewHeapSize"),
|
|
GLocalViewHeapSize,
|
|
TEXT("Local view heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
int32 GGlobalSamplerHeapSize = 2048;
|
|
static FAutoConsoleVariableRef CVarGlobalSamplerHeapSize(
|
|
TEXT("D3D12.GlobalSamplerHeapSize"),
|
|
GGlobalSamplerHeapSize,
|
|
TEXT("Global sampler descriptor heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
// This value defines how many descriptors will be in the device online view heap which
|
|
// is shared across contexts to allow the driver to eliminate redundant descriptor heap sets.
|
|
// This should be tweaked for each title as heaps require VRAM. The default value of 512k takes up ~16MB
|
|
int32 GOnlineDescriptorHeapSize = 500 * 1000;
|
|
static FAutoConsoleVariableRef CVarOnlineDescriptorHeapSize(
|
|
TEXT("D3D12.OnlineDescriptorHeapSize"),
|
|
GOnlineDescriptorHeapSize,
|
|
TEXT("Online descriptor heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
int32 GOnlineDescriptorHeapBlockSize = 2000;
|
|
static FAutoConsoleVariableRef CVarOnlineDescriptorHeapBlockSize(
|
|
TEXT("D3D12.OnlineDescriptorHeapBlockSize"),
|
|
GOnlineDescriptorHeapBlockSize,
|
|
TEXT("Block size for sub allocations on the global view descriptor heap."),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
int32 GBindlessOnlineDescriptorHeapSize = 500 * 1000;
|
|
static FAutoConsoleVariableRef CVarBindlessOnlineDescriptorHeapSize(
|
|
TEXT("D3D12.BindlessOnlineDescriptorHeapSize"),
|
|
GBindlessOnlineDescriptorHeapSize,
|
|
TEXT("Online descriptor heap size"),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
int32 GBindlessOnlineDescriptorHeapBlockSize = 2000;
|
|
static FAutoConsoleVariableRef CVarBindlessOnlineDescriptorHeapBlockSize(
|
|
TEXT("D3D12.BindlessOnlineDescriptorHeapBlockSize"),
|
|
GBindlessOnlineDescriptorHeapBlockSize,
|
|
TEXT("Block size for sub allocations on the global view descriptor heap."),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
inline bool operator!=(D3D12_CPU_DESCRIPTOR_HANDLE lhs, D3D12_CPU_DESCRIPTOR_HANDLE rhs)
|
|
{
|
|
return lhs.ptr != rhs.ptr;
|
|
}
|
|
|
|
#if D3D12_STATE_CACHE_RUNTIME_TOGGLE
|
|
|
|
// Default the state caching system to on.
|
|
bool GD3D12SkipStateCaching = false;
|
|
|
|
// A self registering exec helper to check for the TOGGLESTATECACHE command.
|
|
class FD3D12ToggleStateCacheExecHelper : public FSelfRegisteringExec
|
|
{
|
|
virtual bool Exec(class UWorld* InWorld, const TCHAR* Cmd, FOutputDevice& Ar)
|
|
{
|
|
if (FParse::Command(&Cmd, TEXT("TOGGLESTATECACHE")))
|
|
{
|
|
GD3D12SkipStateCaching = !GD3D12SkipStateCaching;
|
|
Ar.Log(FString::Printf(TEXT("D3D12 State Caching: %s"), GD3D12SkipStateCaching ? TEXT("OFF") : TEXT("ON")));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
static FD3D12ToggleStateCacheExecHelper GD3D12ToggleStateCacheExecHelper;
|
|
|
|
#endif // D3D12_STATE_CACHE_RUNTIME_TOGGLE
|
|
|
|
FD3D12StateCache::FD3D12StateCache(FD3D12CommandContext& Context, FRHIGPUMask Node)
|
|
: FD3D12DeviceChild(Context.Device)
|
|
, FD3D12SingleNodeGPUObject(Node)
|
|
, CmdContext(Context)
|
|
, DescriptorCache(Context, Node)
|
|
{
|
|
FD3D12Adapter* Adapter = Parent->GetParentAdapter();
|
|
|
|
// Cache the resource binding tier
|
|
ResourceBindingTier = Adapter->GetResourceBindingTier();
|
|
|
|
const uint32 NumSamplerDescriptors = NUM_SAMPLER_DESCRIPTORS;
|
|
|
|
checkCode(
|
|
const int32 MaximumResourceHeapSize = Adapter->GetMaxDescriptorsForHeapType(ERHIDescriptorHeapType::Standard);
|
|
const int32 MaximumSamplerHeapSize = Adapter->GetMaxDescriptorsForHeapType(ERHIDescriptorHeapType::Sampler);
|
|
|
|
check(GLocalViewHeapSize <= MaximumResourceHeapSize || MaximumResourceHeapSize < 0);
|
|
check(GOnlineDescriptorHeapSize <= MaximumResourceHeapSize || MaximumResourceHeapSize < 0);
|
|
|
|
check(NumSamplerDescriptors <= MaximumSamplerHeapSize);
|
|
);
|
|
|
|
DescriptorCache.Init(GLocalViewHeapSize, NumSamplerDescriptors);
|
|
|
|
ClearState();
|
|
}
|
|
|
|
void FD3D12StateCache::ClearState()
|
|
{
|
|
PipelineState = {};
|
|
DirtyState();
|
|
}
|
|
|
|
void FD3D12StateCache::ClearSRVs()
|
|
{
|
|
if (bSRVSCleared)
|
|
{
|
|
return;
|
|
}
|
|
|
|
PipelineState.Common.SRVCache.Clear();
|
|
|
|
bSRVSCleared = true;
|
|
}
|
|
|
|
void FD3D12StateCache::ClearResourceViewCaches(EShaderFrequency ShaderFrequency, FD3D12ResourceLocation*& ResourceLocation, EShaderParameterTypeMask ShaderParameterTypeMask)
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ClearShaderResourceViewsTime);
|
|
|
|
if (EnumHasAnyFlags(ShaderParameterTypeMask, EShaderParameterTypeMask::SRVMask))
|
|
{
|
|
if (PipelineState.Common.SRVCache.MaxBoundIndex[ShaderFrequency] >= 0)
|
|
{
|
|
auto& CurrentShaderResourceViews = PipelineState.Common.SRVCache.Views[ShaderFrequency];
|
|
for (int32 i = 0; i <= PipelineState.Common.SRVCache.MaxBoundIndex[ShaderFrequency]; ++i)
|
|
{
|
|
if (CurrentShaderResourceViews[i] && CurrentShaderResourceViews[i]->GetResourceLocation() == ResourceLocation)
|
|
{
|
|
SetShaderResourceView(ShaderFrequency, nullptr, i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (EnumHasAnyFlags(ShaderParameterTypeMask, EShaderParameterTypeMask::UAVMask))
|
|
{
|
|
auto& CurrentShaderResourceViews = PipelineState.Common.UAVCache.Views[ShaderFrequency];
|
|
for (int32 i = 0; i <= MAX_UAVS; ++i)
|
|
{
|
|
if (CurrentShaderResourceViews[i] && CurrentShaderResourceViews[i]->GetResourceLocation() == ResourceLocation)
|
|
{
|
|
SetUAV(ShaderFrequency, i, nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void FD3D12StateCache::FlushComputeShaderCache(bool bForce)
|
|
{
|
|
if (bForce)
|
|
{
|
|
CmdContext.AddUAVBarrier();
|
|
INC_DWORD_STAT(STAT_D3D12UAVBarriers);
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::DirtyStateForNewCommandList()
|
|
{
|
|
// Dirty state that doesn't align with command list defaults.
|
|
|
|
// Always need to set PSOs and root signatures
|
|
PipelineState.Common.bNeedSetPSO = true;
|
|
PipelineState.Common.bNeedSetRootConstants = true;
|
|
PipelineState.Compute.bNeedSetRootSignature = true;
|
|
PipelineState.Graphics.bNeedSetRootSignature = true;
|
|
bNeedSetPrimitiveTopology = true;
|
|
|
|
if (PipelineState.Graphics.VBCache.BoundVBMask) { bNeedSetVB = true; }
|
|
|
|
// IndexBuffers are set in DrawIndexed*() calls, so there's no way to depend on previously set IndexBuffers without making a new DrawIndexed*() call.
|
|
PipelineState.Graphics.IBCache.Clear();
|
|
|
|
if (PipelineState.Graphics.CurrentNumberOfRenderTargets || PipelineState.Graphics.CurrentDepthStencilTarget) { bNeedSetRTs = true; }
|
|
if (PipelineState.Graphics.CurrentNumberOfViewports) { bNeedSetViewports = true; }
|
|
if (PipelineState.Graphics.CurrentNumberOfScissorRects) { bNeedSetScissorRects = true; }
|
|
|
|
if (PipelineState.Graphics.CurrentBlendFactor[0] != D3D12_DEFAULT_BLEND_FACTOR_RED ||
|
|
PipelineState.Graphics.CurrentBlendFactor[1] != D3D12_DEFAULT_BLEND_FACTOR_GREEN ||
|
|
PipelineState.Graphics.CurrentBlendFactor[2] != D3D12_DEFAULT_BLEND_FACTOR_BLUE ||
|
|
PipelineState.Graphics.CurrentBlendFactor[3] != D3D12_DEFAULT_BLEND_FACTOR_ALPHA)
|
|
{
|
|
bNeedSetBlendFactor = true;
|
|
}
|
|
|
|
if (PipelineState.Graphics.CurrentReferenceStencil != D3D12_DEFAULT_STENCIL_REFERENCE) { bNeedSetStencilRef = true; }
|
|
|
|
if (PipelineState.Graphics.MinDepth != 0.0 ||
|
|
PipelineState.Graphics.MaxDepth != 1.0)
|
|
{
|
|
bNeedSetDepthBounds = GSupportsDepthBoundsTest;
|
|
}
|
|
|
|
bNeedSetShadingRate = GRHISupportsPipelineVariableRateShading;
|
|
bNeedSetShadingRateImage = GRHISupportsAttachmentVariableRateShading;
|
|
|
|
// Always dirty View and Sampler bindings. We detect the slots that are actually used at Draw/Dispatch time.
|
|
PipelineState.Common.SRVCache.DirtyAll();
|
|
PipelineState.Common.UAVCache.DirtyAll();
|
|
PipelineState.Common.CBVCache.DirtyAll();
|
|
PipelineState.Common.SamplerCache.DirtyAll();
|
|
}
|
|
|
|
void FD3D12StateCache::DirtyState()
|
|
{
|
|
// Mark bits dirty so the next call to ApplyState will set all this state again
|
|
PipelineState.Common.bNeedSetPSO = true;
|
|
PipelineState.Common.bNeedSetRootConstants = true;
|
|
PipelineState.Compute.bNeedSetRootSignature = true;
|
|
PipelineState.Graphics.bNeedSetRootSignature = true;
|
|
bNeedSetVB = true;
|
|
bNeedSetRTs = true;
|
|
bNeedSetViewports = true;
|
|
bNeedSetScissorRects = true;
|
|
bNeedSetPrimitiveTopology = true;
|
|
bNeedSetBlendFactor = true;
|
|
bNeedSetStencilRef = true;
|
|
bNeedSetDepthBounds = GSupportsDepthBoundsTest;
|
|
bNeedSetShadingRate = GRHISupportsPipelineVariableRateShading;
|
|
bNeedSetShadingRateImage = GRHISupportsAttachmentVariableRateShading;
|
|
PipelineState.Common.SRVCache.DirtyAll();
|
|
PipelineState.Common.UAVCache.DirtyAll();
|
|
PipelineState.Common.CBVCache.DirtyAll();
|
|
PipelineState.Common.SamplerCache.DirtyAll();
|
|
}
|
|
|
|
void FD3D12StateCache::DirtyViewDescriptorTables()
|
|
{
|
|
// Mark the CBV/SRV/UAV descriptor tables dirty for the current root signature.
|
|
// Note: Descriptor table state is undefined at the beginning of a command list and after descriptor heaps are changed on a command list.
|
|
// This will cause the next call to ApplyState to copy and set these descriptors again.
|
|
PipelineState.Common.SRVCache.DirtyAll();
|
|
PipelineState.Common.UAVCache.DirtyAll();
|
|
PipelineState.Common.CBVCache.DirtyAll(GDescriptorTableCBVSlotMask); // Only mark descriptor table slots as dirty.
|
|
}
|
|
|
|
void FD3D12StateCache::DirtySamplerDescriptorTables()
|
|
{
|
|
// Mark the sampler descriptor tables dirty for the current root signature.
|
|
// Note: Descriptor table state is undefined at the beginning of a command list and after descriptor heaps are changed on a command list.
|
|
// This will cause the next call to ApplyState to copy and set these descriptors again.
|
|
PipelineState.Common.SamplerCache.DirtyAll();
|
|
}
|
|
|
|
void FD3D12StateCache::SetViewport(const D3D12_VIEWPORT& Viewport)
|
|
{
|
|
if ((PipelineState.Graphics.CurrentNumberOfViewports != 1 || FMemory::Memcmp(&PipelineState.Graphics.CurrentViewport[0], &Viewport, sizeof(D3D12_VIEWPORT))) || GD3D12SkipStateCaching)
|
|
{
|
|
FMemory::Memcpy(&PipelineState.Graphics.CurrentViewport[0], &Viewport, sizeof(D3D12_VIEWPORT));
|
|
PipelineState.Graphics.CurrentNumberOfViewports = 1;
|
|
bNeedSetViewports = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetViewports(uint32 Count, const D3D12_VIEWPORT* const Viewports)
|
|
{
|
|
check(Count < UE_ARRAY_COUNT(PipelineState.Graphics.CurrentViewport));
|
|
if ((PipelineState.Graphics.CurrentNumberOfViewports != Count || FMemory::Memcmp(&PipelineState.Graphics.CurrentViewport[0], Viewports, sizeof(D3D12_VIEWPORT) * Count)) || GD3D12SkipStateCaching)
|
|
{
|
|
FMemory::Memcpy(&PipelineState.Graphics.CurrentViewport[0], Viewports, sizeof(D3D12_VIEWPORT) * Count);
|
|
PipelineState.Graphics.CurrentNumberOfViewports = Count;
|
|
bNeedSetViewports = true;
|
|
}
|
|
}
|
|
|
|
static void ValidateScissorRect(const D3D12_VIEWPORT& Viewport, const D3D12_RECT& ScissorRect)
|
|
{
|
|
bool bScissorRectValid = true;
|
|
bScissorRectValid = bScissorRectValid && ScissorRect.left >= (LONG)Viewport.TopLeftX;
|
|
bScissorRectValid = bScissorRectValid && ScissorRect.top >= (LONG)Viewport.TopLeftY;
|
|
bScissorRectValid = bScissorRectValid && ScissorRect.right <= (LONG)Viewport.TopLeftX + (LONG)Viewport.Width;
|
|
bScissorRectValid = bScissorRectValid && ScissorRect.bottom <= (LONG)Viewport.TopLeftY + (LONG)Viewport.Height;
|
|
bScissorRectValid = bScissorRectValid && ScissorRect.left <= ScissorRect.right && ScissorRect.top <= ScissorRect.bottom;
|
|
|
|
ensureMsgf(bScissorRectValid,
|
|
TEXT("Scissor invalid with current Viewport. Scissor: [left:%li, top:%li, right:%li, bottom:%li]. Viewport: [left:%li, top:%li, right:%li, bottom:%li]")
|
|
, ScissorRect.left
|
|
, ScissorRect.top
|
|
, ScissorRect.right
|
|
, ScissorRect.bottom
|
|
, (LONG)Viewport.TopLeftX
|
|
, (LONG)Viewport.TopLeftY
|
|
, (LONG)Viewport.TopLeftX + (LONG)Viewport.Width
|
|
, (LONG)Viewport.TopLeftY + (LONG)Viewport.Height);
|
|
}
|
|
|
|
void FD3D12StateCache::SetScissorRect(const D3D12_RECT& ScissorRect)
|
|
{
|
|
ValidateScissorRect(PipelineState.Graphics.CurrentViewport[0], ScissorRect);
|
|
|
|
if ((PipelineState.Graphics.CurrentNumberOfScissorRects != 1 || FMemory::Memcmp(&PipelineState.Graphics.CurrentScissorRects[0], &ScissorRect, sizeof(D3D12_RECT))) || GD3D12SkipStateCaching)
|
|
{
|
|
FMemory::Memcpy(&PipelineState.Graphics.CurrentScissorRects[0], &ScissorRect, sizeof(D3D12_RECT));
|
|
PipelineState.Graphics.CurrentNumberOfScissorRects = 1;
|
|
bNeedSetScissorRects = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetScissorRects(uint32 Count, const D3D12_RECT* const ScissorRects)
|
|
{
|
|
check(Count < UE_ARRAY_COUNT(PipelineState.Graphics.CurrentScissorRects));
|
|
|
|
for (uint32 Rect = 0; Rect < Count; ++Rect)
|
|
{
|
|
ValidateScissorRect(PipelineState.Graphics.CurrentViewport[Rect], ScissorRects[Rect]);
|
|
}
|
|
|
|
if ((PipelineState.Graphics.CurrentNumberOfScissorRects != Count || FMemory::Memcmp(&PipelineState.Graphics.CurrentScissorRects[0], ScissorRects, sizeof(D3D12_RECT) * Count)) || GD3D12SkipStateCaching)
|
|
{
|
|
FMemory::Memcpy(&PipelineState.Graphics.CurrentScissorRects[0], ScissorRects, sizeof(D3D12_RECT) * Count);
|
|
PipelineState.Graphics.CurrentNumberOfScissorRects = Count;
|
|
bNeedSetScissorRects = true;
|
|
}
|
|
}
|
|
|
|
bool FD3D12StateCache::InternalSetDescriptorHeaps(bool bBindless)
|
|
{
|
|
ED3D12SetDescriptorHeapsFlags Flags = ED3D12SetDescriptorHeapsFlags::None;
|
|
if (bBindless)
|
|
{
|
|
EnumAddFlags(Flags, ED3D12SetDescriptorHeapsFlags::Bindless);
|
|
}
|
|
|
|
return DescriptorCache.SetDescriptorHeaps(Flags);
|
|
}
|
|
|
|
inline bool ShouldSkipStage(uint32 Stage)
|
|
{
|
|
return ((Stage == SF_Mesh || Stage == SF_Amplification) && !GRHISupportsMeshShadersTier0);
|
|
}
|
|
|
|
bool FD3D12StateCache::InternalSetRootSignature(ED3D12PipelineType InPipelineType, const FD3D12RootSignature* InRootSignature, bool bForce)
|
|
{
|
|
bool bWasRootSignatureChanged = false;
|
|
|
|
if (InPipelineType == ED3D12PipelineType::Compute)
|
|
{
|
|
if (PipelineState.Compute.bNeedSetRootSignature || bForce)
|
|
{
|
|
CmdContext.GraphicsCommandList()->SetComputeRootSignature(InRootSignature->GetRootSignature());
|
|
PipelineState.Compute.bNeedSetRootSignature = false;
|
|
|
|
// After setting a root signature, all root parameters are undefined and must be set again.
|
|
PipelineState.Common.SRVCache.DirtyCompute();
|
|
PipelineState.Common.UAVCache.DirtyCompute();
|
|
PipelineState.Common.SamplerCache.DirtyCompute();
|
|
PipelineState.Common.CBVCache.DirtyCompute();
|
|
PipelineState.Common.bNeedSetRootConstants = true;
|
|
|
|
bWasRootSignatureChanged = true;
|
|
}
|
|
}
|
|
else if (InPipelineType == ED3D12PipelineType::Graphics)
|
|
{
|
|
// See if we need to set a graphics root signature
|
|
if (PipelineState.Graphics.bNeedSetRootSignature || bForce)
|
|
{
|
|
CmdContext.GraphicsCommandList()->SetGraphicsRootSignature(InRootSignature->GetRootSignature());
|
|
PipelineState.Graphics.bNeedSetRootSignature = false;
|
|
|
|
// After setting a root signature, all root parameters are undefined and must be set again.
|
|
PipelineState.Common.SRVCache.DirtyGraphics();
|
|
PipelineState.Common.UAVCache.DirtyGraphics();
|
|
PipelineState.Common.SamplerCache.DirtyGraphics();
|
|
PipelineState.Common.CBVCache.DirtyGraphics();
|
|
PipelineState.Common.bNeedSetRootConstants = true;
|
|
|
|
bWasRootSignatureChanged = true;
|
|
}
|
|
}
|
|
|
|
return bWasRootSignatureChanged;
|
|
}
|
|
|
|
void FD3D12StateCache::InternalSetPipelineState(FD3D12PipelineState* InPipelineState)
|
|
{
|
|
// See if we need to set our PSO:
|
|
// In D3D11, you could Set dispatch arguments, then set Draw arguments, then call Draw/Dispatch/Draw/Dispatch without setting arguments again.
|
|
// In D3D12, we need to understand when the app switches between Draw/Dispatch and make sure the correct PSO is set.
|
|
|
|
ID3D12PipelineState* const CurrentD3DPipelineState = PipelineState.Common.CurrentPipelineStateObject;
|
|
ID3D12PipelineState* const PendingD3DPipelineState = InPipelineState->GetPipelineState();
|
|
|
|
if (PipelineState.Common.bNeedSetPSO || CurrentD3DPipelineState == nullptr || CurrentD3DPipelineState != PendingD3DPipelineState)
|
|
{
|
|
PipelineState.Common.CurrentPipelineStateObject = PendingD3DPipelineState;
|
|
|
|
CmdContext.GraphicsCommandList()->SetPipelineState(PendingD3DPipelineState);
|
|
|
|
PipelineState.Common.bNeedSetPSO = false;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::ApplyState(ERHIPipeline HardwarePipe, ED3D12PipelineType PipelineType)
|
|
{
|
|
// Make sure the command list is opened before we start caching anything.
|
|
// There are certain situations where no command list actions were taken before here and we need it to be open before we cache anything.
|
|
CmdContext.OpenIfNotAlready();
|
|
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateTime);
|
|
const bool bForceState = false;
|
|
if (bForceState)
|
|
{
|
|
// Mark all state as dirty.
|
|
DirtyState();
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_VIRTUAL_TEXTURES
|
|
CmdContext.FlushTextureCacheIfNeeded();
|
|
#endif
|
|
|
|
FD3D12PipelineStateCommonData* PSOCommonData = nullptr;
|
|
|
|
// PSO
|
|
if (PipelineType == ED3D12PipelineType::Compute)
|
|
{
|
|
PSOCommonData = GetComputePipelineState();
|
|
}
|
|
else if (PipelineType == ED3D12PipelineType::Graphics)
|
|
{
|
|
PSOCommonData = GetGraphicsPipelineState();
|
|
}
|
|
else
|
|
{
|
|
checkf(false, TEXT("Unexpected pipeline type: %d"), (uint32)PipelineType);
|
|
return;
|
|
}
|
|
|
|
const bool bShaderUsesBindlessResources = PSOCommonData->RootSignature->UsesDynamicResources();
|
|
const bool bShaderUsesBindlessSamplers = PSOCommonData->RootSignature->UsesDynamicSamplers();
|
|
|
|
const bool bShaderUsesBindfulResources = PSOCommonData->RootSignature->HasTableResources();
|
|
const bool bShaderUsesBindfulSamplers = PSOCommonData->RootSignature->HasSamplers();
|
|
|
|
const bool bApplyResourceTables = !bShaderUsesBindlessResources && bShaderUsesBindfulResources;
|
|
const bool bApplySamplerTables = !bShaderUsesBindlessSamplers && bShaderUsesBindfulSamplers;
|
|
|
|
bool bBindlessDescriptorHeaps = false;
|
|
bool bSetExplicitDescriptorCache = false;
|
|
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
{
|
|
FD3D12BindlessDescriptorManager& BindlessManager = GetParentDevice()->GetBindlessDescriptorManager();
|
|
|
|
// Require explicit descriptor heaps if
|
|
if (bShaderUsesBindlessResources && BindlessManager.GetConfiguration() == ERHIBindlessConfiguration::RayTracing)
|
|
{
|
|
// Samplers should be bindless as well
|
|
check(bShaderUsesBindlessSamplers);
|
|
|
|
// Should have any SRVs or UAVs
|
|
check(!bShaderUsesBindfulResources);
|
|
|
|
// Everything bindless, don't require any CVS, SRV and UAV view data
|
|
FD3D12ExplicitDescriptorCache TransientDescriptorCache(GetParentDevice(), 1);
|
|
TransientDescriptorCache.Init(0, 0, 0, ERHIBindlessConfiguration::RayTracing);
|
|
|
|
GetDescriptorCache()->SetExplicitDescriptorCache(TransientDescriptorCache);
|
|
|
|
bSetExplicitDescriptorCache = true;
|
|
}
|
|
else if (IsBindlessFullyEnabled(BindlessManager.GetConfiguration()))
|
|
{
|
|
check(bShaderUsesBindlessResources || !bShaderUsesBindfulResources);
|
|
check(bShaderUsesBindlessSamplers || !bShaderUsesBindfulSamplers);
|
|
|
|
bBindlessDescriptorHeaps = true;
|
|
}
|
|
else if (IsBindlessEnabledForAnyGraphics(BindlessManager.GetConfiguration()))
|
|
{
|
|
const bool bAnyBindless = (bShaderUsesBindlessResources || bShaderUsesBindlessSamplers);
|
|
const bool bAnyBindful = (bShaderUsesBindfulResources || bShaderUsesBindfulSamplers);
|
|
|
|
check(bAnyBindless != bAnyBindful || !bAnyBindful);
|
|
|
|
bBindlessDescriptorHeaps = bAnyBindless;
|
|
}
|
|
else
|
|
{
|
|
check(!bShaderUsesBindlessResources && !bShaderUsesBindlessSamplers);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
const bool bDescriptorHeapsChanged = bSetExplicitDescriptorCache || InternalSetDescriptorHeaps(bBindlessDescriptorHeaps);
|
|
const bool bRootSignatureChanged = InternalSetRootSignature(PipelineType, PSOCommonData->RootSignature, bDescriptorHeapsChanged);
|
|
|
|
// Ensure the correct graphics PSO is set.
|
|
InternalSetPipelineState(PSOCommonData->PipelineState);
|
|
|
|
if (bRootSignatureChanged)
|
|
{
|
|
CmdContext.BindDiagnosticBuffer(PSOCommonData->RootSignature, PipelineType);
|
|
}
|
|
|
|
// Need to cache compute budget, as we need to reset after PSO changes
|
|
if (PipelineType == ED3D12PipelineType::Compute && CmdContext.IsAsyncComputeContext())
|
|
{
|
|
CmdContext.SetAsyncComputeBudgetInternal(PipelineState.Compute.ComputeBudget);
|
|
}
|
|
|
|
if (PipelineType == ED3D12PipelineType::Graphics)
|
|
{
|
|
// Setup non-heap bindings
|
|
if (bNeedSetVB)
|
|
{
|
|
bNeedSetVB = false;
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateSetVertexBufferTime);
|
|
DescriptorCache.SetVertexBuffers(PipelineState.Graphics.VBCache);
|
|
}
|
|
if (bNeedSetViewports)
|
|
{
|
|
bNeedSetViewports = false;
|
|
CmdContext.GraphicsCommandList()->RSSetViewports(PipelineState.Graphics.CurrentNumberOfViewports, PipelineState.Graphics.CurrentViewport);
|
|
}
|
|
if (bNeedSetScissorRects)
|
|
{
|
|
bNeedSetScissorRects = false;
|
|
CmdContext.GraphicsCommandList()->RSSetScissorRects(PipelineState.Graphics.CurrentNumberOfScissorRects, PipelineState.Graphics.CurrentScissorRects);
|
|
}
|
|
if (bNeedSetPrimitiveTopology)
|
|
{
|
|
bNeedSetPrimitiveTopology = false;
|
|
CmdContext.GraphicsCommandList()->IASetPrimitiveTopology(PipelineState.Graphics.CurrentPrimitiveTopology);
|
|
}
|
|
if (bNeedSetBlendFactor)
|
|
{
|
|
bNeedSetBlendFactor = false;
|
|
CmdContext.GraphicsCommandList()->OMSetBlendFactor(PipelineState.Graphics.CurrentBlendFactor);
|
|
}
|
|
if (bNeedSetStencilRef)
|
|
{
|
|
bNeedSetStencilRef = false;
|
|
CmdContext.GraphicsCommandList()->OMSetStencilRef(PipelineState.Graphics.CurrentReferenceStencil);
|
|
}
|
|
if (bNeedSetRTs)
|
|
{
|
|
bNeedSetRTs = false;
|
|
DescriptorCache.SetRenderTargets(PipelineState.Graphics.RenderTargetArray, PipelineState.Graphics.CurrentNumberOfRenderTargets, PipelineState.Graphics.CurrentDepthStencilTarget);
|
|
}
|
|
if (bNeedSetDepthBounds)
|
|
{
|
|
bNeedSetDepthBounds = false;
|
|
CmdContext.SetDepthBounds(PipelineState.Graphics.MinDepth, PipelineState.Graphics.MaxDepth);
|
|
}
|
|
|
|
// The shading rate image can influence RSSetShadingRate, because we want to set the combiners to passthrough/passthrough if the rate = 1x1 and the SRI is nullptr
|
|
if (bNeedSetShadingRate || bNeedSetShadingRateImage)
|
|
{
|
|
bNeedSetShadingRate = false;
|
|
bNeedSetShadingRateImage = false;
|
|
CmdContext.SetShadingRate(PipelineState.Graphics.DrawShadingRate, PipelineState.Graphics.ShadingRateImage, PipelineState.Graphics.Combiners);
|
|
}
|
|
}
|
|
|
|
// Note that ray tracing pipeline shares state with compute
|
|
const uint32 StartStage = PipelineType == ED3D12PipelineType::Graphics ? 0 : SF_Compute;
|
|
const uint32 EndStage = PipelineType == ED3D12PipelineType::Graphics ? SF_Compute : SF_NumStandardFrequencies;
|
|
|
|
//
|
|
// Reserve space in descriptor heaps
|
|
// Since this can cause heap rollover (which causes old bindings to become invalid), the reserve must be done atomically
|
|
//
|
|
|
|
// Samplers
|
|
if (bApplySamplerTables)
|
|
{
|
|
ApplySamplerTables(PSOCommonData->RootSignature, StartStage, EndStage);
|
|
}
|
|
|
|
if (bApplyResourceTables)
|
|
{
|
|
ApplyResourceTables(PSOCommonData->RootSignature, StartStage, EndStage);
|
|
}
|
|
else if (bShaderUsesBindlessResources)
|
|
{
|
|
ApplyBindlessResources(PSOCommonData->RootSignature, StartStage, EndStage);
|
|
}
|
|
|
|
ApplyConstants(PSOCommonData->RootSignature, StartStage, EndStage);
|
|
|
|
int8 RootConstantsSlot = PSOCommonData->RootSignature->GetRootConstantsSlot();
|
|
if (PipelineState.Common.bNeedSetRootConstants && RootConstantsSlot >= 0)
|
|
{
|
|
PipelineState.Common.bNeedSetRootConstants = false;
|
|
|
|
uint32 UERootConstants[4];
|
|
UERootConstants[0] = PipelineState.Common.ShaderRootConstants.X;
|
|
UERootConstants[1] = PipelineState.Common.ShaderRootConstants.Y;
|
|
UERootConstants[2] = PipelineState.Common.ShaderRootConstants.Z;
|
|
UERootConstants[3] = PipelineState.Common.ShaderRootConstants.W;
|
|
|
|
if (PipelineType == ED3D12PipelineType::Compute)
|
|
{
|
|
CmdContext.GraphicsCommandList()->SetComputeRoot32BitConstants(RootConstantsSlot, 4, &UERootConstants[0], 0);
|
|
}
|
|
else if (PipelineType == ED3D12PipelineType::Graphics) //-V547
|
|
{
|
|
CmdContext.GraphicsCommandList()->SetGraphicsRoot32BitConstants(RootConstantsSlot, 4, &UERootConstants[0], 0);
|
|
}
|
|
else
|
|
{
|
|
checkNoEntry();
|
|
}
|
|
}
|
|
|
|
// Flush any needed resource barriers
|
|
CmdContext.FlushResourceBarriers();
|
|
|
|
#if ASSERT_RESOURCE_STATES
|
|
bool bSucceeded = AssertResourceStates(PipelineType);
|
|
check(bSucceeded);
|
|
#endif
|
|
}
|
|
|
|
void FD3D12StateCache::ApplyResourceTables(const FD3D12RootSignature* const pRootSignature, uint32 StartStage, uint32 EndStage)
|
|
{
|
|
const bool bUAVs = pRootSignature->HasUAVs();
|
|
const bool bSRVs = pRootSignature->HasSRVs();
|
|
const bool bCBVs = pRootSignature->HasCBVs();
|
|
|
|
// Determine what resource bind slots are dirty for the current shaders and how many descriptor table slots we need.
|
|
// We only set dirty resources that can be used for the upcoming Draw/Dispatch.
|
|
SRVSlotMask CurrentShaderDirtySRVSlots[SF_NumStandardFrequencies] = {};
|
|
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
CBVSlotMask CurrentShaderDirtyCBVSlots[SF_NumStandardFrequencies] = {};
|
|
#endif
|
|
UAVSlotMask CurrentShaderDirtyUAVSlots[SF_NumStandardFrequencies] = {};
|
|
uint32 NumUAVs = 0;
|
|
uint32 NumSRVs[SF_NumStandardFrequencies] = {};
|
|
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
uint32 NumCBVs[SF_NumStandardFrequencies] ={};
|
|
#endif
|
|
uint32 NumViews = 0;
|
|
|
|
EShaderFrequency ComputeUAVStages[] = {SF_Compute};
|
|
EShaderFrequency GraphicsUAVStages[] = {SF_Vertex, SF_Pixel};
|
|
|
|
TArrayView<EShaderFrequency> UAVStages = StartStage == SF_Compute ? MakeArrayView(ComputeUAVStages) : MakeArrayView(GraphicsUAVStages);
|
|
|
|
for (uint32 iTries = 0; iTries < 2; ++iTries)
|
|
{
|
|
if (bUAVs)
|
|
{
|
|
for (EShaderFrequency UAVStage : UAVStages)
|
|
{
|
|
const UAVSlotMask CurrentShaderUAVRegisterMask = BitMask<UAVSlotMask>(PipelineState.Common.CurrentShaderUAVCounts[UAVStage]);
|
|
CurrentShaderDirtyUAVSlots[UAVStage] = CurrentShaderUAVRegisterMask & PipelineState.Common.UAVCache.DirtySlotMask[UAVStage];
|
|
if (CurrentShaderDirtyUAVSlots[UAVStage])
|
|
{
|
|
if (ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2)
|
|
{
|
|
// Tier 1 and 2 HW requires the full number of UAV descriptors defined in the root signature's descriptor table.
|
|
NumUAVs = pRootSignature->MaxUAVCount(UAVStage);
|
|
}
|
|
else
|
|
{
|
|
NumUAVs = PipelineState.Common.CurrentShaderUAVCounts[UAVStage];
|
|
}
|
|
|
|
check(NumUAVs > 0 && NumUAVs <= MAX_UAVS);
|
|
NumViews += NumUAVs;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (uint32 Stage = StartStage; Stage < EndStage; ++Stage)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (bSRVs)
|
|
{
|
|
// Note this code assumes the starting register is index 0.
|
|
const SRVSlotMask CurrentShaderSRVRegisterMask = BitMask<SRVSlotMask>(PipelineState.Common.CurrentShaderSRVCounts[Stage]);
|
|
CurrentShaderDirtySRVSlots[Stage] = CurrentShaderSRVRegisterMask & PipelineState.Common.SRVCache.DirtySlotMask[Stage];
|
|
if (CurrentShaderDirtySRVSlots[Stage])
|
|
{
|
|
if (ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1)
|
|
{
|
|
// Tier 1 HW requires the full number of SRV descriptors defined in the root signature's descriptor table.
|
|
NumSRVs[Stage] = pRootSignature->MaxSRVCount(Stage);
|
|
}
|
|
else
|
|
{
|
|
NumSRVs[Stage] = PipelineState.Common.CurrentShaderSRVCounts[Stage];
|
|
}
|
|
|
|
check(NumSRVs[Stage] > 0 && NumSRVs[Stage] <= MAX_SRVS);
|
|
NumViews += NumSRVs[Stage];
|
|
}
|
|
}
|
|
|
|
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
if (bCBVs)
|
|
{
|
|
const CBVSlotMask CurrentShaderCBVRegisterMask = BitMask<CBVSlotMask>(PipelineState.Common.CurrentShaderCBCounts[Stage]);
|
|
CurrentShaderDirtyCBVSlots[Stage] = CurrentShaderCBVRegisterMask & PipelineState.Common.CBVCache.DirtySlotMask[Stage];
|
|
if (CurrentShaderDirtyCBVSlots[Stage])
|
|
{
|
|
if (ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1)
|
|
{
|
|
// Tier 1 HW requires the full number of SRV descriptors defined in the root signature's descriptor table.
|
|
NumCBVs[Stage] = pRootSignature->MaxCBVCount(Stage);
|
|
}
|
|
else
|
|
{
|
|
NumCBVs[Stage] = PipelineState.Common.CurrentShaderCBCounts[Stage];
|
|
}
|
|
|
|
check(NumCBVs[Stage] > 0 && NumCBVs[Stage] <= MAX_CBS);
|
|
NumViews += NumCBVs[Stage];
|
|
}
|
|
}
|
|
#endif
|
|
// Note: CBVs don't currently use descriptor tables but we still need to know what resource point slots are dirty.
|
|
}
|
|
|
|
// See if the descriptor slots will fit
|
|
if (!DescriptorCache.GetCurrentViewHeap()->CanReserveSlots(NumViews))
|
|
{
|
|
const bool bDescriptorHeapsChanged = DescriptorCache.GetCurrentViewHeap()->RollOver();
|
|
if (bDescriptorHeapsChanged)
|
|
{
|
|
// If descriptor heaps changed, then all our tables are dirty again and we need to recalculate the number of slots we need.
|
|
NumViews = 0;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// We can reserve slots in the descriptor heap, no need to loop again.
|
|
break;
|
|
}
|
|
|
|
uint32 ViewHeapSlot = DescriptorCache.GetCurrentViewHeap()->ReserveSlots(NumViews);
|
|
|
|
// Unordered access views
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateSetUAVTime);
|
|
for (EShaderFrequency UAVStage : UAVStages)
|
|
{
|
|
if (CurrentShaderDirtyUAVSlots[UAVStage])
|
|
{
|
|
const D3D12_GPU_DESCRIPTOR_HANDLE BindDescriptor = DescriptorCache.BuildUAVTable(UAVStage, pRootSignature, PipelineState.Common.UAVCache, CurrentShaderDirtyUAVSlots[UAVStage], NumUAVs, ViewHeapSlot);
|
|
DescriptorCache.SetUAVTable(UAVStage, pRootSignature, PipelineState.Common.UAVCache, NumUAVs, BindDescriptor);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Shader resource views
|
|
if (bSRVs)
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateSetSRVTime);
|
|
FD3D12ShaderResourceViewCache& SRVCache = PipelineState.Common.SRVCache;
|
|
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
if (CurrentShaderDirtySRVSlots[Index])
|
|
{
|
|
const D3D12_GPU_DESCRIPTOR_HANDLE BindDescriptor = DescriptorCache.BuildSRVTable(static_cast<EShaderFrequency>(Index), pRootSignature, SRVCache, CurrentShaderDirtySRVSlots[Index], NumSRVs[Index], ViewHeapSlot);
|
|
DescriptorCache.SetSRVTable(static_cast<EShaderFrequency>(Index), pRootSignature, SRVCache, NumSRVs[Index], BindDescriptor);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
// Constant buffers
|
|
if (bCBVs)
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateSetConstantBufferTime);
|
|
FD3D12ConstantBufferCache& CBVCache = PipelineState.Common.CBVCache;
|
|
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
if (CurrentShaderDirtyCBVSlots[Index])
|
|
{
|
|
DescriptorCache.SetConstantBufferViews(static_cast<EShaderFrequency>(Index), pRootSignature, CBVCache, CurrentShaderDirtyCBVSlots[Index], NumCBVs[Index], ViewHeapSlot);
|
|
}
|
|
}
|
|
}
|
|
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
}
|
|
|
|
void FD3D12StateCache::ApplyBindlessResources(const FD3D12RootSignature* const pRootSignature, uint32 StartStage, uint32 EndStage)
|
|
{
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
DescriptorCache.PrepareBindlessViews(
|
|
static_cast<EShaderFrequency>(Index)
|
|
, PipelineState.Common.QueuedBindlessSRVs[Index]
|
|
, PipelineState.Common.QueuedBindlessUAVs[Index]);
|
|
|
|
PipelineState.Common.QueuedBindlessSRVs[Index].Reset();
|
|
PipelineState.Common.QueuedBindlessUAVs[Index].Reset();
|
|
}
|
|
|
|
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
if (pRootSignature && pRootSignature->HasCBVs())
|
|
{
|
|
FD3D12ConstantBufferCache& CBVCache = PipelineState.Common.CBVCache;
|
|
|
|
CBVSlotMask CurrentShaderDirtyCBVSlots[SF_NumStandardFrequencies] = {};
|
|
uint32 NumCBVs[SF_NumStandardFrequencies] = {};
|
|
|
|
uint32 NumViews = 0;
|
|
|
|
for (uint32 iTries = 0; iTries < 2; ++iTries)
|
|
{
|
|
for (uint32 Stage = StartStage; Stage < EndStage; ++Stage)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
const uint32 ConstantBufferCount = PipelineState.Common.CurrentShaderCBCounts[Stage];
|
|
|
|
const CBVSlotMask CurrentShaderCBVRegisterMask = BitMask<CBVSlotMask>(ConstantBufferCount);
|
|
CurrentShaderDirtyCBVSlots[Stage] = CurrentShaderCBVRegisterMask & CBVCache.DirtySlotMask[Stage];
|
|
if (CurrentShaderDirtyCBVSlots[Stage])
|
|
{
|
|
check(ConstantBufferCount > 0 && ConstantBufferCount <= MAX_CBS);
|
|
|
|
NumCBVs[Stage] = ConstantBufferCount;
|
|
NumViews += ConstantBufferCount;
|
|
}
|
|
// Note: CBVs don't currently use descriptor tables but we still need to know what resource point slots are dirty.
|
|
}
|
|
|
|
// See if the descriptor slots will fit
|
|
if (!DescriptorCache.GetCurrentViewHeap()->CanReserveSlots(NumViews))
|
|
{
|
|
if (DescriptorCache.GetCurrentViewHeap()->RollOver())
|
|
{
|
|
// If descriptor heaps changed, then all our tables are dirty again and we need to recalculate the number of slots we need.
|
|
NumViews = 0;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32 ViewHeapSlot = DescriptorCache.GetCurrentViewHeap()->ReserveSlots(NumViews);
|
|
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
if (CurrentShaderDirtyCBVSlots[Index])
|
|
{
|
|
DescriptorCache.SetConstantBufferViews(static_cast<EShaderFrequency>(Index), pRootSignature, CBVCache, CurrentShaderDirtyCBVSlots[Index], NumCBVs[Index], ViewHeapSlot);
|
|
}
|
|
}
|
|
}
|
|
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
|
|
#endif // PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
}
|
|
|
|
void FD3D12StateCache::ApplyConstants(const FD3D12RootSignature* const pRootSignature, uint32 StartStage, uint32 EndStage)
|
|
{
|
|
#if !D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
// Determine what resource bind slots are dirty for the current shaders and how many descriptor table slots we need.
|
|
// We only set dirty resources that can be used for the upcoming Draw/Dispatch.
|
|
CBVSlotMask CurrentShaderDirtyCBVSlots[SF_NumStandardFrequencies] = {};
|
|
|
|
for (uint32 Stage = StartStage; Stage < EndStage; ++Stage)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
const CBVSlotMask CurrentShaderCBVRegisterMask = BitMask<CBVSlotMask>(PipelineState.Common.CurrentShaderCBCounts[Stage]);
|
|
CurrentShaderDirtyCBVSlots[Stage] = CurrentShaderCBVRegisterMask & PipelineState.Common.CBVCache.DirtySlotMask[Stage];
|
|
// Note: CBVs don't currently use descriptor tables but we still need to know what resource point slots are dirty.
|
|
}
|
|
|
|
// Constant buffers
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12ApplyStateSetConstantBufferTime);
|
|
FD3D12ConstantBufferCache& CBVCache = PipelineState.Common.CBVCache;
|
|
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
if (CurrentShaderDirtyCBVSlots[Index])
|
|
{
|
|
DescriptorCache.SetRootConstantBuffers(static_cast<EShaderFrequency>(Index), pRootSignature, CBVCache, CurrentShaderDirtyCBVSlots[Index], this);
|
|
}
|
|
}
|
|
}
|
|
#endif // !D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
|
|
}
|
|
|
|
|
|
void FD3D12StateCache::ApplySamplerTables(const FD3D12RootSignature* const pRootSignature, uint32 StartStage, uint32 EndStage)
|
|
{
|
|
bool HighLevelCacheMiss = false;
|
|
|
|
FD3D12SamplerStateCache& Cache = PipelineState.Common.SamplerCache;
|
|
SamplerSlotMask CurrentShaderDirtySamplerSlots[SF_NumStandardFrequencies] = {};
|
|
uint32 NumSamplers[SF_NumStandardFrequencies + 1] = {};
|
|
|
|
const auto& pfnCalcSamplersNeeded = [&]()
|
|
{
|
|
NumSamplers[SF_NumStandardFrequencies] = 0;
|
|
|
|
for (uint32 Stage = StartStage; Stage < EndStage; ++Stage)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Note this code assumes the starting register is index 0.
|
|
const SamplerSlotMask CurrentShaderSamplerRegisterMask = BitMask<SamplerSlotMask>(PipelineState.Common.CurrentShaderSamplerCounts[Stage]);
|
|
CurrentShaderDirtySamplerSlots[Stage] = CurrentShaderSamplerRegisterMask & Cache.DirtySlotMask[Stage];
|
|
if (CurrentShaderDirtySamplerSlots[Stage])
|
|
{
|
|
if (ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1)
|
|
{
|
|
// Tier 1 HW requires the full number of sampler descriptors defined in the root signature.
|
|
NumSamplers[Stage] = pRootSignature->MaxSamplerCount(Stage);
|
|
}
|
|
else
|
|
{
|
|
NumSamplers[Stage] = PipelineState.Common.CurrentShaderSamplerCounts[Stage];
|
|
}
|
|
|
|
check(NumSamplers[Stage] > 0 && NumSamplers[Stage] <= MAX_SAMPLERS);
|
|
NumSamplers[SF_NumStandardFrequencies] += NumSamplers[Stage];
|
|
}
|
|
}
|
|
};
|
|
|
|
pfnCalcSamplersNeeded();
|
|
|
|
if (DescriptorCache.UsingGlobalSamplerHeap())
|
|
{
|
|
auto& GlobalSamplerSet = DescriptorCache.GetLocalSamplerSet();
|
|
|
|
for (uint32 Stage = StartStage; Stage < EndStage; Stage++)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (CurrentShaderDirtySamplerSlots[Stage] && NumSamplers[Stage])
|
|
{
|
|
SamplerSlotMask& CurrentDirtySlotMask = Cache.DirtySlotMask[Stage];
|
|
FD3D12SamplerState** Samplers = Cache.States[Stage];
|
|
|
|
FD3D12UniqueSamplerTable Table;
|
|
Table.Key.Count = NumSamplers[Stage];
|
|
|
|
for (uint32 i = 0; i < NumSamplers[Stage]; i++)
|
|
{
|
|
Table.Key.SamplerID[i] = Samplers[i] ? Samplers[i]->ID : 0;
|
|
FD3D12SamplerStateCache::CleanSlot(CurrentDirtySlotMask, i);
|
|
}
|
|
|
|
FD3D12UniqueSamplerTable* CachedTable = GlobalSamplerSet.Find(Table);
|
|
if (CachedTable)
|
|
{
|
|
// Make sure the global sampler heap is really set on the command list before we try to find a cached descriptor table for it.
|
|
check(DescriptorCache.IsHeapSet(GetParentDevice()->GetGlobalSamplerHeap().GetHeap()));
|
|
check(CachedTable->GPUHandle.ptr);
|
|
if (Stage == SF_Compute)
|
|
{
|
|
const uint32 RDTIndex = pRootSignature->SamplerRDTBindSlot(EShaderFrequency(Stage));
|
|
CmdContext.GraphicsCommandList()->SetComputeRootDescriptorTable(RDTIndex, CachedTable->GPUHandle);
|
|
}
|
|
else
|
|
{
|
|
const uint32 RDTIndex = pRootSignature->SamplerRDTBindSlot(EShaderFrequency(Stage));
|
|
CmdContext.GraphicsCommandList()->SetGraphicsRootDescriptorTable(RDTIndex, CachedTable->GPUHandle);
|
|
}
|
|
|
|
// We changed the descriptor table, so all resources bound to slots outside of the table's range are now dirty.
|
|
// If a shader needs to use resources bound to these slots later, we need to set the descriptor table again to ensure those
|
|
// descriptors are valid.
|
|
const SamplerSlotMask OutsideCurrentTableRegisterMask = ~BitMask<SamplerSlotMask>(Table.Key.Count);
|
|
Cache.Dirty(static_cast<EShaderFrequency>(Stage), OutsideCurrentTableRegisterMask);
|
|
}
|
|
else
|
|
{
|
|
HighLevelCacheMiss = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!HighLevelCacheMiss)
|
|
{
|
|
// Success, all the tables were found in the high level heap
|
|
INC_DWORD_STAT_BY(STAT_NumReusedSamplerOnlineDescriptors, NumSamplers[SF_NumStandardFrequencies]);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (HighLevelCacheMiss)
|
|
{
|
|
// Move to per context heap strategy
|
|
const bool bDescriptorHeapsChanged = DescriptorCache.SwitchToContextLocalSamplerHeap();
|
|
if (bDescriptorHeapsChanged)
|
|
{
|
|
// If descriptor heaps changed, then all our tables are dirty again and we need to recalculate the number of slots we need.
|
|
pfnCalcSamplersNeeded();
|
|
}
|
|
}
|
|
|
|
FD3D12OnlineHeap* const SamplerHeap = DescriptorCache.GetCurrentSamplerHeap();
|
|
check(DescriptorCache.UsingGlobalSamplerHeap() == false);
|
|
check(SamplerHeap != &GetParentDevice()->GetGlobalSamplerHeap());
|
|
check(DescriptorCache.IsHeapSet(SamplerHeap->GetHeap()));
|
|
check(!DescriptorCache.IsHeapSet(GetParentDevice()->GetGlobalSamplerHeap().GetHeap()));
|
|
|
|
if (!SamplerHeap->CanReserveSlots(NumSamplers[SF_NumStandardFrequencies]))
|
|
{
|
|
const bool bDescriptorHeapsChanged = SamplerHeap->RollOver();
|
|
if (bDescriptorHeapsChanged)
|
|
{
|
|
// If descriptor heaps changed, then all our tables are dirty again and we need to recalculate the number of slots we need.
|
|
pfnCalcSamplersNeeded();
|
|
}
|
|
}
|
|
uint32 SamplerHeapSlot = SamplerHeap->ReserveSlots(NumSamplers[SF_NumStandardFrequencies]);
|
|
|
|
for (uint32 Index = StartStage; Index < EndStage; Index++)
|
|
{
|
|
if (CurrentShaderDirtySamplerSlots[Index])
|
|
{
|
|
D3D12_GPU_DESCRIPTOR_HANDLE BindDescriptor = DescriptorCache.BuildSamplerTable(static_cast<EShaderFrequency>(Index), pRootSignature, Cache, CurrentShaderDirtySamplerSlots[Index], NumSamplers[Index], SamplerHeapSlot);
|
|
DescriptorCache.SetSamplerTable(static_cast<EShaderFrequency>(Index), pRootSignature, Cache, NumSamplers[Index], BindDescriptor);
|
|
}
|
|
}
|
|
|
|
SamplerHeap->SetNextSlot(SamplerHeapSlot);
|
|
}
|
|
|
|
//#if ASSERT_RESOURCE_STATES
|
|
#if 0
|
|
/** Determine if an two views intersect */
|
|
static inline bool ResourceViewsIntersect(FD3D12View* pLeftView, FD3D12View* pRightView)
|
|
{
|
|
if (pLeftView == nullptr || pRightView == nullptr)
|
|
{
|
|
// Cannot intersect if at least one is null
|
|
return false;
|
|
}
|
|
|
|
if ((void*)pLeftView == (void*)pRightView)
|
|
{
|
|
// Cannot intersect with itself
|
|
return false;
|
|
}
|
|
|
|
FD3D12Resource* pRTVResource = pLeftView->GetResource();
|
|
FD3D12Resource* pSRVResource = pRightView->GetResource();
|
|
if (pRTVResource != pSRVResource)
|
|
{
|
|
// Not the same resource
|
|
return false;
|
|
}
|
|
|
|
// Same resource, so see if their subresources overlap
|
|
return !pLeftView->DoesNotOverlap(*pRightView);
|
|
}
|
|
|
|
bool FD3D12StateCache::AssertResourceStates(ED3D12PipelineType PipelineType)
|
|
{
|
|
// This requires the debug layer
|
|
#if !D3D12_SUPPORTS_DEBUG_COMMAND_LIST
|
|
UE_LOG(LogD3D12RHI, Log, TEXT("*** VerifyResourceStates requires the debug layer ***"), this);
|
|
return true;
|
|
#else
|
|
// Can only verify resource states if the debug layer is used
|
|
static const bool bWithD3DDebug = GRHIGlobals.IsDebugLayerEnabled;
|
|
if (!bWithD3DDebug)
|
|
{
|
|
UE_LOG(LogD3D12RHI, Fatal, TEXT("*** AssertResourceStates requires the debug layer ***"));
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// Verify common pipeline state
|
|
//
|
|
|
|
// Note that ray tracing pipeline shares state with compute
|
|
const uint32 StartStage = PipelineType == ED3D12PipelineType::Graphics ? 0 : SF_Compute;
|
|
const uint32 EndStage = PipelineType == ED3D12PipelineType::Graphics ? SF_Compute : SF_NumStandardFrequencies;
|
|
|
|
bool bSRVIntersectsWithDepth = false;
|
|
bool bSRVIntersectsWithStencil = false;
|
|
for (uint32 Stage = StartStage; Stage < EndStage; Stage++)
|
|
{
|
|
if (ShouldSkipStage(Stage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// UAVs
|
|
{
|
|
const uint32 numUAVs = PipelineState.Common.CurrentShaderUAVCounts[Stage];
|
|
for (uint32 i = 0; i < numUAVs; i++)
|
|
{
|
|
FD3D12UnorderedAccessView *pCurrentView = PipelineState.Common.UAVCache.Views[Stage][i];
|
|
if (!AssertResourceState(CmdContext.GraphicsCommandList().Get(), pCurrentView, D3D12_RESOURCE_STATE_UNORDERED_ACCESS))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// SRVs
|
|
{
|
|
const uint32 numSRVs = PipelineState.Common.CurrentShaderSRVCounts[Stage];
|
|
for (uint32 i = 0; i < numSRVs; i++)
|
|
{
|
|
FD3D12ShaderResourceView* pCurrentView = PipelineState.Common.SRVCache.Views[Stage][i];
|
|
D3D12_RESOURCE_STATES expectedState = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
|
|
FD3D12Resource* Resource = pCurrentView->GetResource();
|
|
if (pCurrentView && Resource->IsDepthStencilResource())
|
|
{
|
|
expectedState = expectedState | D3D12_RESOURCE_STATE_DEPTH_READ;
|
|
|
|
// Sanity check that we don't have a read/write hazard between the DSV and SRV.
|
|
FD3D12DepthStencilView* DSV = PipelineState.Graphics.CurrentDepthStencilTarget;
|
|
if (ResourceViewsIntersect(DSV, pCurrentView))
|
|
{
|
|
const D3D12_DEPTH_STENCIL_VIEW_DESC &DSVDesc = DSV->GetDesc();
|
|
|
|
const bool bHasDepth = DSV->HasDepth();
|
|
const bool bHasStencil = DSV->HasStencil();
|
|
|
|
const bool bWritableDepth = bHasDepth && (DSVDesc.Flags & D3D12_DSV_FLAG_READ_ONLY_DEPTH) == 0;
|
|
const bool bWritableStencil = bHasStencil && (DSVDesc.Flags & D3D12_DSV_FLAG_READ_ONLY_STENCIL) == 0;
|
|
|
|
if (pCurrentView->IsStencilPlaneResource())
|
|
{
|
|
bSRVIntersectsWithStencil = true;
|
|
if (bWritableStencil)
|
|
{
|
|
// DSV is being used for stencil write and this SRV is being used for read which is not supported.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (pCurrentView->IsDepthPlaneResource())
|
|
{
|
|
bSRVIntersectsWithDepth = true;
|
|
if (bWritableDepth)
|
|
{
|
|
// DSV is being used for depth write and this SRV is being used for read which is not supported.
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!AssertResourceState(CmdContext.GraphicsCommandList().Get(), pCurrentView, expectedState))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note: There is nothing special to check for compute and ray tracing pipelines
|
|
if (PipelineType == ED3D12PipelineType::Graphics)
|
|
{
|
|
//
|
|
// Verify graphics pipeline state
|
|
//
|
|
|
|
// DSV
|
|
{
|
|
FD3D12DepthStencilView* pCurrentView = PipelineState.Graphics.CurrentDepthStencilTarget;
|
|
|
|
if (pCurrentView)
|
|
{
|
|
// Check if the depth/stencil resource has an SRV bound
|
|
const D3D12_DEPTH_STENCIL_VIEW_DESC& desc = pCurrentView->GetDesc();
|
|
const bool bDepthIsReadOnly = !!(desc.Flags & D3D12_DSV_FLAG_READ_ONLY_DEPTH);
|
|
const bool bStencilIsReadOnly = !!(desc.Flags & D3D12_DSV_FLAG_READ_ONLY_STENCIL);
|
|
|
|
// Decompose the view into the subresources (depth and stencil are on different planes)
|
|
FD3D12Resource* pResource = pCurrentView->GetResource();
|
|
for (uint32 SubresourceIndex : pCurrentView->GetViewSubset())
|
|
{
|
|
uint16 MipSlice;
|
|
uint16 ArraySlice;
|
|
uint8 PlaneSlice;
|
|
D3D12DecomposeSubresource(SubresourceIndex,
|
|
pResource->GetMipLevels(),
|
|
pResource->GetArraySize(),
|
|
MipSlice, ArraySlice, PlaneSlice);
|
|
|
|
D3D12_RESOURCE_STATES expectedState;
|
|
if (PlaneSlice == 0)
|
|
{
|
|
// Depth plane
|
|
expectedState = bDepthIsReadOnly ? D3D12_RESOURCE_STATE_DEPTH_READ : D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
|
if (bSRVIntersectsWithDepth)
|
|
{
|
|
// Depth SRVs just contain the depth plane
|
|
check(bDepthIsReadOnly);
|
|
expectedState |=
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Stencil plane
|
|
expectedState = bStencilIsReadOnly ? D3D12_RESOURCE_STATE_DEPTH_READ : D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
|
if (bSRVIntersectsWithStencil)
|
|
{
|
|
// Stencil SRVs just contain the stencil plane
|
|
check(bStencilIsReadOnly);
|
|
expectedState |=
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
|
}
|
|
}
|
|
|
|
bool bGoodState = !!CmdContext.DebugCommandList()->AssertResourceState(pResource->GetResource(), SubresourceIndex, expectedState);
|
|
if (!bGoodState)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// RTV
|
|
{
|
|
const uint32 numRTVs = UE_ARRAY_COUNT(PipelineState.Graphics.RenderTargetArray);
|
|
for (uint32 i = 0; i < numRTVs; i++)
|
|
{
|
|
FD3D12RenderTargetView* pCurrentView = PipelineState.Graphics.RenderTargetArray[i];
|
|
if (!AssertResourceState(CmdContext.GraphicsCommandList().Get(), pCurrentView, D3D12_RESOURCE_STATE_RENDER_TARGET))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: Verify vertex buffer, index buffer, and constant buffer state.
|
|
}
|
|
|
|
return true;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
void FD3D12StateCache::SetRootConstants(const FUint32Vector4& Constants)
|
|
{
|
|
if (Constants != PipelineState.Common.ShaderRootConstants)
|
|
{
|
|
PipelineState.Common.ShaderRootConstants = Constants;
|
|
PipelineState.Common.bNeedSetRootConstants = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::ClearUAVs(EShaderFrequency ShaderStage)
|
|
{
|
|
FD3D12UnorderedAccessViewCache& Cache = PipelineState.Common.UAVCache;
|
|
const bool bIsCompute = ShaderStage == SF_Compute;
|
|
|
|
for (uint32 i = 0; i < MAX_UAVS; ++i)
|
|
{
|
|
if(Cache.Views[ShaderStage][i] != nullptr)
|
|
{
|
|
FD3D12UnorderedAccessViewCache::DirtySlot(Cache.DirtySlotMask[ShaderStage], i);
|
|
}
|
|
Cache.Views[ShaderStage][i] = nullptr;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetUAV(EShaderFrequency ShaderStage, uint32 SlotIndex, FD3D12UnorderedAccessView* UAV, uint32 InitialCount)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_D3D12SetUnorderedAccessViewTime);
|
|
|
|
FD3D12UnorderedAccessViewCache& Cache = PipelineState.Common.UAVCache;
|
|
if (Cache.Views[ShaderStage][SlotIndex] == UAV)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// When setting UAV's for Graphics, it wipes out all existing bound resources.
|
|
const bool bIsCompute = ShaderStage == SF_Compute;
|
|
Cache.StartSlot[ShaderStage] = bIsCompute ? FMath::Min(SlotIndex, Cache.StartSlot[ShaderStage]) : 0;
|
|
|
|
Cache.Views[ShaderStage][SlotIndex] = UAV;
|
|
FD3D12UnorderedAccessViewCache::DirtySlot(Cache.DirtySlotMask[ShaderStage], SlotIndex);
|
|
|
|
if (UAV)
|
|
{
|
|
Cache.Resources[ShaderStage][SlotIndex] = UAV->GetResource();
|
|
|
|
FD3D12Resource* CounterResource = UAV->GetCounterResource();
|
|
if (CounterResource)
|
|
{
|
|
checkNoEntry(); // @todo fix this. UAV counters are not threadsafe. Initialization could happen out-of-order
|
|
/*&& (!UAV->IsCounterResourceInitialized() || InitialCount != -1))
|
|
{
|
|
FD3D12Device* Device = CounterResource->GetParentDevice();
|
|
FD3D12ResourceLocation UploadBufferLocation(Device);
|
|
|
|
uint32* CounterUploadHeapData = static_cast<uint32*>(CmdContext.ConstantsAllocator.Allocate(sizeof(uint32), UploadBufferLocation, nullptr));
|
|
|
|
// Initialize the counter to 0 if it's not been previously initialized and the UAVInitialCount is -1, if not use the value that was passed.
|
|
*CounterUploadHeapData = (!UAV->IsCounterResourceInitialized() && InitialCount == -1) ? 0 : InitialCount;
|
|
|
|
// Transition to copy dest
|
|
CmdContext.TransitionResource(CounterResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0);
|
|
CmdContext.FlushResourceBarriers();
|
|
|
|
CmdContext.GraphicsCommandList()->CopyBufferRegion(
|
|
CounterResource->GetResource(),
|
|
0,
|
|
UploadBufferLocation.GetResource()->GetResource(),
|
|
UploadBufferLocation.GetOffsetFromBaseOfResource(),
|
|
4);
|
|
|
|
// Restore UAV state
|
|
CmdContext.TransitionResource(CounterResource, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0);
|
|
|
|
CmdContext.UpdateResidency(CounterResource);
|
|
|
|
UAV->MarkCounterResourceInitialized();
|
|
}*/
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Cache.Resources[ShaderStage][SlotIndex] = nullptr;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetBlendFactor(const float BlendFactor[4])
|
|
{
|
|
if (FMemory::Memcmp(PipelineState.Graphics.CurrentBlendFactor, BlendFactor, sizeof(PipelineState.Graphics.CurrentBlendFactor)))
|
|
{
|
|
FMemory::Memcpy(PipelineState.Graphics.CurrentBlendFactor, BlendFactor, sizeof(PipelineState.Graphics.CurrentBlendFactor));
|
|
bNeedSetBlendFactor = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetStencilRef(uint32 StencilRef)
|
|
{
|
|
if (PipelineState.Graphics.CurrentReferenceStencil != StencilRef)
|
|
{
|
|
PipelineState.Graphics.CurrentReferenceStencil = StencilRef;
|
|
bNeedSetStencilRef = true;
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetNewShaderData(EShaderFrequency InFrequency, const FD3D12ShaderData* InShaderData)
|
|
{
|
|
PipelineState.Common.CurrentShaderSamplerCounts[InFrequency] = InShaderData ? InShaderData->ResourceCounts.NumSamplers : 0;
|
|
PipelineState.Common.CurrentShaderSRVCounts[InFrequency] = InShaderData ? InShaderData->ResourceCounts.NumSRVs : 0;
|
|
PipelineState.Common.CurrentShaderCBCounts[InFrequency] = InShaderData ? InShaderData->ResourceCounts.NumCBs : 0;
|
|
PipelineState.Common.CurrentShaderUAVCounts[InFrequency] = InShaderData ? InShaderData->ResourceCounts.NumUAVs : 0;
|
|
|
|
// Shader changed so its resource table is dirty
|
|
SetDirtyUniformBuffers(CmdContext, InFrequency);
|
|
}
|
|
|
|
void FD3D12StateCache::SetComputePipelineState(FD3D12ComputePipelineState* ComputePipelineState)
|
|
{
|
|
check(ComputePipelineState);
|
|
|
|
FD3D12ComputePipelineState* CurrentComputePipelineState = PipelineState.Compute.CurrentPipelineStateObject;
|
|
const bool bForceSet = CurrentComputePipelineState == nullptr;
|
|
|
|
if (bForceSet || CurrentComputePipelineState != ComputePipelineState)
|
|
{
|
|
if (bForceSet || CurrentComputePipelineState->RootSignature != ComputePipelineState->RootSignature)
|
|
{
|
|
PipelineState.Compute.bNeedSetRootSignature = true;
|
|
}
|
|
|
|
if (bForceSet || CurrentComputePipelineState->GetComputeShader() != ComputePipelineState->GetComputeShader())
|
|
{
|
|
SetNewShaderData(SF_Compute, FD3D12DynamicRHI::ResourceCast(ComputePipelineState->GetComputeShader()));
|
|
}
|
|
|
|
// Save the PSO
|
|
PipelineState.Common.bNeedSetPSO = true;
|
|
PipelineState.Compute.CurrentPipelineStateObject = ComputePipelineState;
|
|
|
|
// Set the PSO
|
|
InternalSetPipelineState(ComputePipelineState->PipelineState);
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetGraphicsPipelineState(FD3D12GraphicsPipelineState* GraphicsPipelineState)
|
|
{
|
|
check(GraphicsPipelineState);
|
|
|
|
FD3D12GraphicsPipelineState* CurrentGraphicsPipelineState = PipelineState.Graphics.CurrentPipelineStateObject;
|
|
const bool bForceSet = CurrentGraphicsPipelineState == nullptr;
|
|
|
|
if (bForceSet || CurrentGraphicsPipelineState != GraphicsPipelineState)
|
|
{
|
|
if (bForceSet || CurrentGraphicsPipelineState->GetVertexShader() != GraphicsPipelineState->GetVertexShader())
|
|
{
|
|
SetNewShaderData(SF_Vertex, GraphicsPipelineState->GetVertexShader());
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
if (bForceSet || CurrentGraphicsPipelineState->GetMeshShader() != GraphicsPipelineState->GetMeshShader())
|
|
{
|
|
SetNewShaderData(SF_Mesh, GraphicsPipelineState->GetMeshShader());
|
|
}
|
|
|
|
if (bForceSet || CurrentGraphicsPipelineState->GetAmplificationShader() != GraphicsPipelineState->GetAmplificationShader())
|
|
{
|
|
SetNewShaderData(SF_Amplification, GraphicsPipelineState->GetAmplificationShader());
|
|
}
|
|
#endif
|
|
|
|
if (bForceSet || CurrentGraphicsPipelineState->GetPixelShader() != GraphicsPipelineState->GetPixelShader())
|
|
{
|
|
SetNewShaderData(SF_Pixel, GraphicsPipelineState->GetPixelShader());
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
if (bForceSet || CurrentGraphicsPipelineState->GetGeometryShader() != GraphicsPipelineState->GetGeometryShader())
|
|
{
|
|
SetNewShaderData(SF_Geometry, GraphicsPipelineState->GetGeometryShader());
|
|
}
|
|
#endif
|
|
|
|
// See if we need to change the root signature
|
|
if (bForceSet || CurrentGraphicsPipelineState->RootSignature != GraphicsPipelineState->RootSignature)
|
|
{
|
|
PipelineState.Graphics.bNeedSetRootSignature = true;
|
|
}
|
|
|
|
PipelineState.Graphics.StreamStrides = GraphicsPipelineState->StreamStrides;
|
|
|
|
// Save the PSO
|
|
PipelineState.Common.bNeedSetPSO = true;
|
|
PipelineState.Graphics.CurrentPipelineStateObject = GraphicsPipelineState;
|
|
|
|
EPrimitiveType PrimitiveType = GraphicsPipelineState->PipelineStateInitializer.PrimitiveType;
|
|
if (PipelineState.Graphics.CurrentPrimitiveType != PrimitiveType)
|
|
{
|
|
PipelineState.Graphics.CurrentPrimitiveType = PrimitiveType;
|
|
PipelineState.Graphics.CurrentPrimitiveTopology = GetD3D12PrimitiveType(PrimitiveType);
|
|
bNeedSetPrimitiveTopology = true;
|
|
|
|
static_assert(PT_Num == 6, "This computation needs to be updated, matching that of GetVertexCountForPrimitiveCount()");
|
|
PipelineState.Graphics.PrimitiveTypeFactor = (PrimitiveType == PT_TriangleList) ? 3 : (PrimitiveType == PT_LineList) ? 2 : (PrimitiveType == PT_RectList) ? 3 : 1;
|
|
PipelineState.Graphics.PrimitiveTypeOffset = (PrimitiveType == PT_TriangleStrip) ? 2 : 0;
|
|
}
|
|
|
|
// Set the PSO
|
|
InternalSetPipelineState(GraphicsPipelineState->PipelineState);
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::InternalSetIndexBuffer(FD3D12Resource* Resource)
|
|
{
|
|
CmdContext.UpdateResidency(Resource);
|
|
CmdContext.GraphicsCommandList()->IASetIndexBuffer(&PipelineState.Graphics.IBCache.CurrentIndexBufferView);
|
|
|
|
if (Resource->RequiresResourceStateTracking())
|
|
{
|
|
check(Resource->GetSubresourceCount() == 1);
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::InternalSetStreamSource(FD3D12ResourceLocation* VertexBufferLocation, uint32 StreamIndex, uint32 Stride, uint32 Offset)
|
|
{
|
|
// If we have a vertex buffer location, that location should also have an underlying resource.
|
|
check(VertexBufferLocation == nullptr || VertexBufferLocation->GetResource());
|
|
|
|
check(StreamIndex < ARRAYSIZE(PipelineState.Graphics.VBCache.CurrentVertexBufferResources));
|
|
|
|
__declspec(align(16)) D3D12_VERTEX_BUFFER_VIEW NewView;
|
|
NewView.BufferLocation = (VertexBufferLocation) ? VertexBufferLocation->GetGPUVirtualAddress() + Offset : 0;
|
|
NewView.StrideInBytes = Stride;
|
|
NewView.SizeInBytes = (VertexBufferLocation) ? VertexBufferLocation->GetSize() - Offset : 0; // Make sure we account for how much we offset into the VB
|
|
|
|
D3D12_VERTEX_BUFFER_VIEW& CurrentView = PipelineState.Graphics.VBCache.CurrentVertexBufferViews[StreamIndex];
|
|
|
|
if (NewView.BufferLocation != CurrentView.BufferLocation ||
|
|
NewView.StrideInBytes != CurrentView.StrideInBytes ||
|
|
NewView.SizeInBytes != CurrentView.SizeInBytes ||
|
|
GD3D12SkipStateCaching)
|
|
{
|
|
bNeedSetVB = true;
|
|
PipelineState.Graphics.VBCache.CurrentVertexBufferResources[StreamIndex] = VertexBufferLocation;
|
|
|
|
if (VertexBufferLocation != nullptr)
|
|
{
|
|
PipelineState.Graphics.VBCache.Resources[StreamIndex] = VertexBufferLocation->GetResource();
|
|
FMemory::Memcpy(CurrentView, NewView);
|
|
PipelineState.Graphics.VBCache.BoundVBMask |= ((VBSlotMask)1 << StreamIndex);
|
|
}
|
|
else
|
|
{
|
|
FMemory::Memzero(&CurrentView, sizeof(CurrentView));
|
|
PipelineState.Graphics.VBCache.CurrentVertexBufferResources[StreamIndex] = nullptr;
|
|
PipelineState.Graphics.VBCache.Resources[StreamIndex] = nullptr;
|
|
|
|
PipelineState.Graphics.VBCache.BoundVBMask &= ~((VBSlotMask)1 << StreamIndex);
|
|
}
|
|
|
|
if (PipelineState.Graphics.VBCache.BoundVBMask)
|
|
{
|
|
PipelineState.Graphics.VBCache.MaxBoundVertexBufferIndex = FMath::FloorLog2(PipelineState.Graphics.VBCache.BoundVBMask);
|
|
}
|
|
else
|
|
{
|
|
PipelineState.Graphics.VBCache.MaxBoundVertexBufferIndex = INDEX_NONE;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetShaderResourceView(EShaderFrequency ShaderFrequency, FD3D12ShaderResourceView* SRV, uint32 ResourceIndex)
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_D3D12SetShaderResourceViewTime);
|
|
|
|
check(ResourceIndex < MAX_SRVS);
|
|
FD3D12ShaderResourceViewCache& Cache = PipelineState.Common.SRVCache;
|
|
auto& CurrentShaderResourceViews = Cache.Views[ShaderFrequency];
|
|
|
|
if ((CurrentShaderResourceViews[ResourceIndex] != SRV) || GD3D12SkipStateCaching)
|
|
{
|
|
if (SRV != nullptr)
|
|
{
|
|
// Mark the SRVs as not cleared
|
|
bSRVSCleared = false;
|
|
|
|
Cache.BoundMask[ShaderFrequency] |= ((SRVSlotMask)1 << ResourceIndex);
|
|
Cache.Resources[ShaderFrequency][ResourceIndex] = SRV->GetResource();
|
|
}
|
|
else
|
|
{
|
|
Cache.BoundMask[ShaderFrequency] &= ~((SRVSlotMask)1 << ResourceIndex);
|
|
Cache.Resources[ShaderFrequency][ResourceIndex] = nullptr;
|
|
}
|
|
|
|
// Find the highest set SRV
|
|
Cache.MaxBoundIndex[ShaderFrequency] =
|
|
(Cache.BoundMask[ShaderFrequency] == 0)? INDEX_NONE :
|
|
#if MAX_SRVS > 32
|
|
FMath::FloorLog2_64(Cache.BoundMask[ShaderFrequency]);
|
|
#else
|
|
FMath::FloorLog2(Cache.BoundMask[ShaderFrequency]);
|
|
#endif
|
|
|
|
CurrentShaderResourceViews[ResourceIndex] = SRV;
|
|
FD3D12ShaderResourceViewCache::DirtySlot(Cache.DirtySlotMask[ShaderFrequency], ResourceIndex);
|
|
}
|
|
}
|
|
|
|
void FD3D12StateCache::SetRenderTargets(uint32 NumSimultaneousRenderTargets, FD3D12RenderTargetView** RTArray, FD3D12DepthStencilView* DSTarget)
|
|
{
|
|
if (PipelineState.Graphics.CurrentDepthStencilTarget != DSTarget)
|
|
{
|
|
PipelineState.Graphics.CurrentDepthStencilTarget = DSTarget;
|
|
bNeedSetRTs = true;
|
|
}
|
|
|
|
// Update the render targets
|
|
PipelineState.Graphics.CurrentNumberOfRenderTargets = 0;
|
|
for (uint32 Index = 0; Index < UE_ARRAY_COUNT(PipelineState.Graphics.RenderTargetArray); ++Index)
|
|
{
|
|
FD3D12RenderTargetView* RTV = Index < NumSimultaneousRenderTargets
|
|
? RTArray[Index]
|
|
: nullptr;
|
|
|
|
if (RTV)
|
|
{
|
|
PipelineState.Graphics.CurrentNumberOfRenderTargets++;
|
|
}
|
|
|
|
if (PipelineState.Graphics.RenderTargetArray[Index] != RTV)
|
|
{
|
|
PipelineState.Graphics.RenderTargetArray[Index] = RTV;
|
|
bNeedSetRTs = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
void FD3D12StateCache::SetNewBindlessResourcesHeap(FD3D12DescriptorHeap* InHeap)
|
|
{
|
|
DescriptorCache.SetBindlessResourcesHeap(InHeap);
|
|
BindlessHeapsInvalidated();
|
|
}
|
|
|
|
void FD3D12StateCache::SetNewBindlessSamplerHeap(FD3D12DescriptorHeap* InHeap)
|
|
{
|
|
DescriptorCache.SetBindlessSamplersHeap(InHeap);
|
|
BindlessHeapsInvalidated();
|
|
}
|
|
#endif // PLATFORM_SUPPORTS_BINDLESS_RENDERING
|