1823 lines
59 KiB
C++
1823 lines
59 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
D3D11Commands.cpp: D3D RHI commands implementation.
|
|
=============================================================================*/
|
|
|
|
#include "D3D11RHIPrivate.h"
|
|
#include "Windows/D3D11RHIPrivateUtil.h"
|
|
#include "StaticBoundShaderState.h"
|
|
#include "GlobalShader.h"
|
|
#include "OneColorShader.h"
|
|
#include "RHICommandList.h"
|
|
#include "RHIStaticStates.h"
|
|
#include "ShaderParameterUtils.h"
|
|
#include "SceneUtils.h"
|
|
#include "EngineGlobals.h"
|
|
#include "RHIShaderParametersShared.h"
|
|
#include "RHIUniformBufferUtilities.h"
|
|
|
|
// For Depth Bounds Test interface
|
|
#include "Windows/AllowWindowsPlatformTypes.h"
|
|
#if WITH_NVAPI
|
|
#include "nvapi.h"
|
|
#endif
|
|
#if WITH_AMD_AGS
|
|
#include "amd_ags.h"
|
|
#endif
|
|
#include "Windows/HideWindowsPlatformTypes.h"
|
|
|
|
#define DECLARE_ISBOUNDSHADER(ShaderType) inline void ValidateBoundShader(FD3D11StateCache& InStateCache, FRHI##ShaderType* ShaderType##RHI) \
|
|
{ \
|
|
ID3D11##ShaderType* CachedShader; \
|
|
InStateCache.Get##ShaderType(&CachedShader); \
|
|
FD3D11##ShaderType* ShaderType = FD3D11DynamicRHI::ResourceCast(ShaderType##RHI); \
|
|
ensureMsgf(CachedShader == ShaderType->Resource, TEXT("Parameters are being set for a %s which is not currently bound"), TEXT( #ShaderType )); \
|
|
if (CachedShader) { CachedShader->Release(); } \
|
|
}
|
|
|
|
DECLARE_ISBOUNDSHADER(VertexShader)
|
|
DECLARE_ISBOUNDSHADER(PixelShader)
|
|
DECLARE_ISBOUNDSHADER(GeometryShader)
|
|
DECLARE_ISBOUNDSHADER(ComputeShader)
|
|
|
|
|
|
#if DO_GUARD_SLOW
|
|
#define VALIDATE_BOUND_SHADER(s) ValidateBoundShader(StateCache, s)
|
|
#else
|
|
#define VALIDATE_BOUND_SHADER(s)
|
|
#endif
|
|
|
|
static int32 GUnbindResourcesBetweenDrawsInDX11 = UE_BUILD_DEBUG;
|
|
static FAutoConsoleVariableRef CVarUnbindResourcesBetweenDrawsInDX11(
|
|
TEXT("r.UnbindResourcesBetweenDrawsInDX11"),
|
|
GUnbindResourcesBetweenDrawsInDX11,
|
|
TEXT("Unbind resources between material changes in DX11."),
|
|
ECVF_Default
|
|
);
|
|
|
|
|
|
int32 GDX11ReduceRTVRebinds = 1;
|
|
static FAutoConsoleVariableRef CVarDX11ReduceRTVRebinds(
|
|
TEXT("r.DX11.ReduceRTVRebinds"),
|
|
GDX11ReduceRTVRebinds,
|
|
TEXT("Reduce # of SetRenderTargetCalls."),
|
|
ECVF_ReadOnly
|
|
);
|
|
|
|
#if !UE_BUILD_SHIPPING && !UE_BUILD_TEST
|
|
int32 GLogDX11RTRebinds = 0;
|
|
static FAutoConsoleVariableRef CVarLogDx11RTRebinds(
|
|
TEXT("r.DX11.LogRTRebinds"),
|
|
GLogDX11RTRebinds,
|
|
TEXT("Log # of rebinds of RTs per frame"),
|
|
ECVF_Default
|
|
);
|
|
FThreadSafeCounter GDX11RTRebind;
|
|
FThreadSafeCounter GDX11CommitGraphicsResourceTables;
|
|
#endif
|
|
|
|
static TAutoConsoleVariable<int32> CVarAllowUAVFlushExt(
|
|
TEXT("r.D3D11.AutoFlushUAV"),
|
|
1,
|
|
TEXT("If enabled, use NVAPI (Nvidia), AGS (AMD) or Intel Extensions (Intel) to not flush between dispatches/draw calls")
|
|
TEXT(" 1: on (default)\n")
|
|
TEXT(" 0: off"),
|
|
ECVF_RenderThreadSafe);
|
|
|
|
// Vertex state.
|
|
void FD3D11DynamicRHI::RHISetStreamSource(uint32 StreamIndex, FRHIBuffer* VertexBufferRHI, uint32 Offset)
|
|
{
|
|
FD3D11Buffer* VertexBuffer = ResourceCast(VertexBufferRHI);
|
|
|
|
ID3D11Buffer* D3DBuffer = VertexBuffer ? VertexBuffer->Resource.GetReference() : nullptr;
|
|
TrackResourceBoundAsVB(VertexBuffer, StreamIndex);
|
|
StateCache.SetStreamSource(D3DBuffer, StreamIndex, Offset);
|
|
}
|
|
|
|
// Rasterizer state.
|
|
void FD3D11DynamicRHI::RHISetRasterizerState(FRHIRasterizerState* NewStateRHI)
|
|
{
|
|
FD3D11RasterizerState* NewState = ResourceCast(NewStateRHI);
|
|
StateCache.SetRasterizerState(NewState->Resource);
|
|
}
|
|
|
|
template<EShaderFrequency ShaderFrequency>
|
|
void FD3D11DynamicRHI::BindUniformBuffer(uint32 BufferIndex, FRHIUniformBuffer* BufferRHI)
|
|
{
|
|
check(BufferRHI && BufferRHI->GetLayout().GetHash());
|
|
|
|
FD3D11UniformBuffer* Buffer = ResourceCast(BufferRHI);
|
|
|
|
ID3D11Buffer* ConstantBuffer = Buffer ? Buffer->Resource.GetReference() : nullptr;
|
|
StateCache.SetConstantBuffer<ShaderFrequency>(ConstantBuffer, BufferIndex);
|
|
|
|
BoundUniformBuffers[ShaderFrequency][BufferIndex] = BufferRHI;
|
|
DirtyUniformBuffers[ShaderFrequency] |= (1 << BufferIndex);
|
|
}
|
|
|
|
template <typename TRHIShader>
|
|
void FD3D11DynamicRHI::ApplyStaticUniformBuffers(TRHIShader* Shader)
|
|
{
|
|
if (Shader)
|
|
{
|
|
UE::RHICore::ApplyStaticUniformBuffers(Shader, StaticUniformBuffers,
|
|
[this](int32 BufferIndex, FRHIUniformBuffer* Buffer)
|
|
{
|
|
BindUniformBuffer<static_cast<EShaderFrequency>(TRHIShader::StaticFrequency)>(BufferIndex, Buffer);
|
|
});
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetGraphicsPipelineState(FRHIGraphicsPipelineState* GraphicsState, uint32 StencilRef, bool bApplyAdditionalState)
|
|
{
|
|
FRHIGraphicsPipelineStateFallBack* FallbackGraphicsState = static_cast<FRHIGraphicsPipelineStateFallBack*>(GraphicsState);
|
|
IRHICommandContextPSOFallback::RHISetGraphicsPipelineState(GraphicsState, StencilRef, bApplyAdditionalState);
|
|
const FGraphicsPipelineStateInitializer& PsoInit = FallbackGraphicsState->Initializer;
|
|
|
|
if (bApplyAdditionalState)
|
|
{
|
|
ApplyStaticUniformBuffers(static_cast<FD3D11VertexShader*>(PsoInit.BoundShaderState.VertexShaderRHI));
|
|
ApplyStaticUniformBuffers(static_cast<FD3D11GeometryShader*>(PsoInit.BoundShaderState.GetGeometryShader()));
|
|
ApplyStaticUniformBuffers(static_cast<FD3D11PixelShader*>(PsoInit.BoundShaderState.PixelShaderRHI));
|
|
}
|
|
|
|
// Store the PSO's primitive (after since IRHICommandContext::RHISetGraphicsPipelineState sets the BSS)
|
|
PrimitiveType = PsoInit.PrimitiveType;
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetComputeShader(FRHIComputeShader* ComputeShaderRHI)
|
|
{
|
|
FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
|
|
SetCurrentComputeShader(ComputeShaderRHI);
|
|
|
|
if (GUnbindResourcesBetweenDrawsInDX11)
|
|
{
|
|
ClearAllShaderResourcesForFrequency<SF_Compute>();
|
|
}
|
|
|
|
ApplyStaticUniformBuffers(ComputeShader);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDispatchComputeShader(uint32 ThreadGroupCountX, uint32 ThreadGroupCountY, uint32 ThreadGroupCountZ)
|
|
{
|
|
RHI_DISPATCH_CALL_INC();
|
|
|
|
FRHIComputeShader* ComputeShaderRHI = GetCurrentComputeShader();
|
|
FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
|
|
|
|
StateCache.SetComputeShader(ComputeShader->Resource);
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUDispatch(FIntVector(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ));
|
|
#endif
|
|
|
|
if (ComputeShader->bShaderNeedsGlobalConstantBuffer)
|
|
{
|
|
CommitComputeShaderConstants();
|
|
}
|
|
CommitComputeResourceTables(ComputeShader);
|
|
|
|
Direct3DDeviceIMContext->Dispatch(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ);
|
|
StateCache.SetComputeShader(nullptr);
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDispatchIndirectComputeShader(FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset)
|
|
{
|
|
RHI_DISPATCH_CALL_INC();
|
|
|
|
FRHIComputeShader* ComputeShaderRHI = GetCurrentComputeShader();
|
|
FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
|
|
FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI);
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUDispatch(FIntVector(1, 1, 1));
|
|
#endif
|
|
|
|
StateCache.SetComputeShader(ComputeShader->Resource);
|
|
|
|
if (ComputeShader->bShaderNeedsGlobalConstantBuffer)
|
|
{
|
|
CommitComputeShaderConstants();
|
|
}
|
|
CommitComputeResourceTables(ComputeShader);
|
|
|
|
Direct3DDeviceIMContext->DispatchIndirect(ArgumentBuffer->Resource,ArgumentOffset);
|
|
StateCache.SetComputeShader(nullptr);
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetViewport(float MinX, float MinY, float MinZ, float MaxX, float MaxY, float MaxZ)
|
|
{
|
|
// These are the maximum viewport extents for D3D11. Exceeding them leads to badness.
|
|
check(MinX <= (float)D3D11_VIEWPORT_BOUNDS_MAX);
|
|
check(MinY <= (float)D3D11_VIEWPORT_BOUNDS_MAX);
|
|
check(MaxX <= (float)D3D11_VIEWPORT_BOUNDS_MAX);
|
|
check(MaxY <= (float)D3D11_VIEWPORT_BOUNDS_MAX);
|
|
|
|
D3D11_VIEWPORT Viewport = { MinX, MinY, MaxX - MinX, MaxY - MinY, MinZ, MaxZ };
|
|
//avoid setting a 0 extent viewport, which the debug runtime doesn't like
|
|
if (Viewport.Width > 0 && Viewport.Height > 0)
|
|
{
|
|
StateCache.SetViewport(Viewport);
|
|
RHISetScissorRect(true, MinX, MinY, MaxX, MaxY);
|
|
}
|
|
}
|
|
|
|
static void ValidateScissorRect(const D3D11_VIEWPORT& Viewport, const D3D11_RECT& ScissorRect)
|
|
{
|
|
ensure(ScissorRect.left >= (LONG)Viewport.TopLeftX);
|
|
ensure(ScissorRect.top >= (LONG)Viewport.TopLeftY);
|
|
ensure(ScissorRect.right <= (LONG)Viewport.TopLeftX + (LONG)Viewport.Width);
|
|
ensure(ScissorRect.bottom <= (LONG)Viewport.TopLeftY + (LONG)Viewport.Height);
|
|
ensure(ScissorRect.left <= ScissorRect.right && ScissorRect.top <= ScissorRect.bottom);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetStereoViewport(float LeftMinX, float RightMinX, float LeftMinY, float RightMinY, float MinZ, float LeftMaxX, float RightMaxX, float LeftMaxY, float RightMaxY, float MaxZ)
|
|
{
|
|
// Set up both viewports
|
|
D3D11_VIEWPORT StereoViewports[2] = {};
|
|
|
|
StereoViewports[0].TopLeftX = FMath::FloorToInt(LeftMinX);
|
|
StereoViewports[0].TopLeftY = FMath::FloorToInt(LeftMinY);
|
|
StereoViewports[0].Width = FMath::CeilToInt(LeftMaxX - LeftMinX);
|
|
StereoViewports[0].Height = FMath::CeilToInt(LeftMaxY - LeftMinY);
|
|
StereoViewports[0].MinDepth = MinZ;
|
|
StereoViewports[0].MaxDepth = MaxZ;
|
|
|
|
StereoViewports[1].TopLeftX = FMath::FloorToInt(RightMinX);
|
|
StereoViewports[1].TopLeftY = FMath::FloorToInt(RightMinY);
|
|
StereoViewports[1].Width = FMath::CeilToInt(RightMaxX - RightMinX);
|
|
StereoViewports[1].Height = FMath::CeilToInt(RightMaxY - RightMinY);
|
|
StereoViewports[1].MinDepth = MinZ;
|
|
StereoViewports[1].MaxDepth = MaxZ;
|
|
|
|
D3D11_RECT ScissorRects[2] =
|
|
{
|
|
{ StereoViewports[0].TopLeftX, StereoViewports[0].TopLeftY, StereoViewports[0].TopLeftX + StereoViewports[0].Width, StereoViewports[0].TopLeftY + StereoViewports[0].Height },
|
|
{ StereoViewports[1].TopLeftX, StereoViewports[1].TopLeftY, StereoViewports[1].TopLeftX + StereoViewports[1].Width, StereoViewports[1].TopLeftY + StereoViewports[1].Height }
|
|
};
|
|
|
|
ValidateScissorRect(StereoViewports[0], ScissorRects[0]);
|
|
ValidateScissorRect(StereoViewports[1], ScissorRects[1]);
|
|
|
|
StateCache.SetViewports(2, StereoViewports);
|
|
// Set the scissor rect appropriately.
|
|
Direct3DDeviceIMContext->RSSetScissorRects(2, ScissorRects);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetScissorRect(bool bEnable,uint32 MinX,uint32 MinY,uint32 MaxX,uint32 MaxY)
|
|
{
|
|
D3D11_VIEWPORT Viewport;
|
|
StateCache.GetViewport(&Viewport);
|
|
|
|
D3D11_RECT ScissorRect;
|
|
if (bEnable)
|
|
{
|
|
ScissorRect.left = MinX;
|
|
ScissorRect.top = MinY;
|
|
ScissorRect.right = MaxX;
|
|
ScissorRect.bottom = MaxY;
|
|
}
|
|
else
|
|
{
|
|
ScissorRect.left = (LONG) Viewport.TopLeftX;
|
|
ScissorRect.top = (LONG) Viewport.TopLeftY;
|
|
ScissorRect.right = (LONG) Viewport.TopLeftX + (LONG) Viewport.Width;
|
|
ScissorRect.bottom = (LONG) Viewport.TopLeftY + (LONG) Viewport.Height;
|
|
}
|
|
|
|
ValidateScissorRect(Viewport, ScissorRect);
|
|
Direct3DDeviceIMContext->RSSetScissorRects(1, &ScissorRect);
|
|
}
|
|
|
|
/**
|
|
* Set bound shader state. This will set the vertex decl/shader, and pixel shader
|
|
* @param BoundShaderState - state resource
|
|
*/
|
|
void FD3D11DynamicRHI::RHISetBoundShaderState(FRHIBoundShaderState* BoundShaderStateRHI)
|
|
{
|
|
FD3D11BoundShaderState* BoundShaderState = ResourceCast(BoundShaderStateRHI);
|
|
|
|
StateCache.SetStreamStrides(BoundShaderState->StreamStrides);
|
|
StateCache.SetInputLayout(BoundShaderState->InputLayout);
|
|
StateCache.SetVertexShader(BoundShaderState->VertexShader);
|
|
StateCache.SetPixelShader(BoundShaderState->PixelShader);
|
|
|
|
StateCache.SetGeometryShader(BoundShaderState->GeometryShader);
|
|
|
|
// @TODO : really should only discard the constants if the shader state has actually changed.
|
|
bDiscardSharedConstants = true;
|
|
|
|
// Prevent transient bound shader states from being recreated for each use by keeping a history of the most recently used bound shader states.
|
|
// The history keeps them alive, and the bound shader state cache allows them to am be reused if needed.
|
|
BoundShaderStateHistory.Add(BoundShaderState);
|
|
|
|
// Shader changed so all resource tables are dirty
|
|
DirtyUniformBuffers[SF_Vertex] = 0xffff;
|
|
DirtyUniformBuffers[SF_Pixel] = 0xffff;
|
|
DirtyUniformBuffers[SF_Geometry] = 0xffff;
|
|
|
|
// Shader changed. All UB's must be reset by high level code to match other platforms anway.
|
|
// Clear to catch those bugs, and bugs with stale UB's causing layout mismatches.
|
|
// Release references to bound uniform buffers.
|
|
for (int32 Frequency = 0; Frequency < SF_NumStandardFrequencies; ++Frequency)
|
|
{
|
|
for (int32 BindIndex = 0; BindIndex < MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE; ++BindIndex)
|
|
{
|
|
BoundUniformBuffers[Frequency][BindIndex] = nullptr;
|
|
}
|
|
}
|
|
|
|
if (GUnbindResourcesBetweenDrawsInDX11 || GRHIGlobals.IsDebugLayerEnabled)
|
|
{
|
|
ClearAllShaderResources();
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetStaticUniformBuffers(const FUniformBufferStaticBindings& InUniformBuffers)
|
|
{
|
|
FMemory::Memzero(StaticUniformBuffers.GetData(), StaticUniformBuffers.Num() * sizeof(FRHIUniformBuffer*));
|
|
|
|
for (int32 Index = 0; Index < InUniformBuffers.GetUniformBufferCount(); ++Index)
|
|
{
|
|
StaticUniformBuffers[InUniformBuffers.GetSlot(Index)] = InUniformBuffers.GetUniformBuffer(Index);
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetStaticUniformBuffer(FUniformBufferStaticSlot InSlot, FRHIUniformBuffer* InBuffer)
|
|
{
|
|
StaticUniformBuffers[InSlot] = InBuffer;
|
|
}
|
|
|
|
template<EShaderFrequency ShaderFrequency>
|
|
struct FD3D11ResourceBinder
|
|
{
|
|
FD3D11DynamicRHI& RHI;
|
|
|
|
FD3D11ResourceBinder(FD3D11DynamicRHI& InRHI)
|
|
: RHI(InRHI)
|
|
{
|
|
}
|
|
|
|
void SetUAV(FRHIUnorderedAccessView* InUnorderedAccessView, uint8 Index)
|
|
{
|
|
if (ShaderFrequency == SF_Compute)
|
|
{
|
|
RHI.InternalSetUAVCS(Index, FD3D11DynamicRHI::ResourceCast(InUnorderedAccessView));
|
|
}
|
|
else if (ShaderFrequency == SF_Pixel || ShaderFrequency == SF_Vertex)
|
|
{
|
|
RHI.InternalSetUAVVSPS(Index, FD3D11DynamicRHI::ResourceCast(InUnorderedAccessView));
|
|
}
|
|
else
|
|
{
|
|
checkf(false, TEXT("UAVs are only supported in compute, pixel and vertex shaders."));
|
|
}
|
|
}
|
|
|
|
void SetSRV(FRHIShaderResourceView* InShaderResourceView, uint8 Index)
|
|
{
|
|
FD3D11ShaderResourceView* D3D11ShaderResourceView = FD3D11DynamicRHI::ResourceCast(InShaderResourceView);
|
|
FD3D11ViewableResource* D3D11ViewableResource = D3D11ShaderResourceView ? D3D11ShaderResourceView->GetBaseResource() : nullptr;
|
|
ID3D11ShaderResourceView* D3D11SRV = D3D11ShaderResourceView ? D3D11ShaderResourceView->View : nullptr;
|
|
|
|
RHI.SetShaderResourceView<ShaderFrequency>(
|
|
D3D11ViewableResource,
|
|
D3D11SRV,
|
|
Index
|
|
);
|
|
}
|
|
|
|
void SetTexture(FRHITexture* InTexture, uint8 Index)
|
|
{
|
|
FD3D11Texture* D3D11Texture = FD3D11DynamicRHI::ResourceCast(InTexture);
|
|
ID3D11ShaderResourceView* ShaderResourceView = D3D11Texture ? D3D11Texture->GetShaderResourceView() : nullptr;
|
|
|
|
RHI.SetShaderResourceView<ShaderFrequency>(
|
|
D3D11Texture,
|
|
ShaderResourceView,
|
|
Index
|
|
);
|
|
}
|
|
|
|
void SetSampler(FRHISamplerState* Sampler, uint8 Index)
|
|
{
|
|
RHI.GetStateCache().SetSamplerState<ShaderFrequency>(FD3D11DynamicRHI::ResourceCast(Sampler)->Resource, Index);
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
|
|
void SetResourceCollection(FRHIResourceCollection* ResourceCollection, uint32 Index)
|
|
{
|
|
checkNoEntry();
|
|
}
|
|
#endif
|
|
};
|
|
|
|
template<EShaderFrequency ShaderFrequency>
|
|
void FD3D11DynamicRHI::SetShaderParametersCommon(FD3D11ConstantBuffer* StageConstantBuffer, TConstArrayView<uint8> InParametersData, TConstArrayView<FRHIShaderParameter> InParameters, TConstArrayView<FRHIShaderParameterResource> InResourceParameters)
|
|
{
|
|
if (InParameters.Num())
|
|
{
|
|
for (const FRHIShaderParameter& Parameter : InParameters)
|
|
{
|
|
check(Parameter.BufferIndex == 0);
|
|
StageConstantBuffer->UpdateConstant(&InParametersData[Parameter.ByteOffset], Parameter.BaseIndex, Parameter.ByteSize);
|
|
}
|
|
}
|
|
|
|
FD3D11ResourceBinder<ShaderFrequency> Binder(*this);
|
|
|
|
for (const FRHIShaderParameterResource& Parameter : InResourceParameters)
|
|
{
|
|
if (Parameter.Type == FRHIShaderParameterResource::EType::UnorderedAccessView)
|
|
{
|
|
Binder.SetUAV(static_cast<FRHIUnorderedAccessView*>(Parameter.Resource), Parameter.Index);
|
|
}
|
|
}
|
|
|
|
for (const FRHIShaderParameterResource& Parameter : InResourceParameters)
|
|
{
|
|
switch (Parameter.Type)
|
|
{
|
|
case FRHIShaderParameterResource::EType::Texture:
|
|
Binder.SetTexture(static_cast<FRHITexture*>(Parameter.Resource), Parameter.Index);
|
|
break;
|
|
case FRHIShaderParameterResource::EType::ResourceView:
|
|
Binder.SetSRV(static_cast<FRHIShaderResourceView*>(Parameter.Resource), Parameter.Index);
|
|
break;
|
|
case FRHIShaderParameterResource::EType::UnorderedAccessView:
|
|
break;
|
|
case FRHIShaderParameterResource::EType::Sampler:
|
|
Binder.SetSampler(static_cast<FRHISamplerState*>(Parameter.Resource), Parameter.Index);
|
|
break;
|
|
case FRHIShaderParameterResource::EType::UniformBuffer:
|
|
BindUniformBuffer<ShaderFrequency>(Parameter.Index, static_cast<FRHIUniformBuffer*>(Parameter.Resource));
|
|
break;
|
|
default:
|
|
checkf(false, TEXT("Unhandled resource type?"));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void FD3D11DynamicRHI::RHISetShaderParameters(FRHIComputeShader* Shader, TConstArrayView<uint8> InParametersData, TConstArrayView<FRHIShaderParameter> InParameters, TConstArrayView<FRHIShaderParameterResource> InResourceParameters, TConstArrayView<FRHIShaderParameterResource> InBindlessParameters)
|
|
{
|
|
SetShaderParametersCommon<SF_Compute>(CSConstantBuffer, InParametersData, InParameters, InResourceParameters);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetShaderParameters(FRHIGraphicsShader* Shader, TConstArrayView<uint8> InParametersData, TConstArrayView<FRHIShaderParameter> InParameters, TConstArrayView<FRHIShaderParameterResource> InResourceParameters, TConstArrayView<FRHIShaderParameterResource> InBindlessParameters)
|
|
{
|
|
switch (Shader->GetFrequency())
|
|
{
|
|
case SF_Vertex:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIVertexShader*>(Shader));
|
|
SetShaderParametersCommon<SF_Vertex>(VSConstantBuffer, InParametersData, InParameters, InResourceParameters);
|
|
break;
|
|
case SF_Geometry:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIGeometryShader*>(Shader));
|
|
SetShaderParametersCommon<SF_Geometry>(GSConstantBuffer, InParametersData, InParameters, InResourceParameters);
|
|
break;
|
|
case SF_Pixel:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIPixelShader*>(Shader));
|
|
SetShaderParametersCommon<SF_Pixel>(PSConstantBuffer, InParametersData, InParameters, InResourceParameters);
|
|
break;
|
|
default:
|
|
checkf(0, TEXT("Undefined FRHIGraphicsShader Type %d!"), (int32)Shader->GetFrequency());
|
|
}
|
|
}
|
|
|
|
template<EShaderFrequency ShaderFrequency>
|
|
void FD3D11DynamicRHI::SetShaderUnbindsCommon(TConstArrayView<FRHIShaderParameterUnbind> InUnbinds)
|
|
{
|
|
FD3D11ResourceBinder<ShaderFrequency> Binder(*this);
|
|
|
|
for (const FRHIShaderParameterUnbind& Unbind : InUnbinds)
|
|
{
|
|
switch (Unbind.Type)
|
|
{
|
|
case FRHIShaderParameterUnbind::EType::ResourceView:
|
|
Binder.SetSRV(nullptr, Unbind.Index);
|
|
break;
|
|
case FRHIShaderParameterUnbind::EType::UnorderedAccessView:
|
|
Binder.SetUAV(nullptr, Unbind.Index);
|
|
break;
|
|
default:
|
|
checkf(false, TEXT("Unhandled unbind resource type?"));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetShaderUnbinds(FRHIComputeShader* Shader, TConstArrayView<FRHIShaderParameterUnbind> InUnbinds)
|
|
{
|
|
SetShaderUnbindsCommon<SF_Compute>(InUnbinds);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetShaderUnbinds(FRHIGraphicsShader* Shader, TConstArrayView<FRHIShaderParameterUnbind> InUnbinds)
|
|
{
|
|
switch (Shader->GetFrequency())
|
|
{
|
|
case SF_Vertex:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIVertexShader*>(Shader));
|
|
SetShaderUnbindsCommon<SF_Vertex>(InUnbinds);
|
|
break;
|
|
case SF_Geometry:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIGeometryShader*>(Shader));
|
|
SetShaderUnbindsCommon<SF_Geometry>(InUnbinds);
|
|
break;
|
|
case SF_Pixel:
|
|
VALIDATE_BOUND_SHADER(static_cast<FRHIPixelShader*>(Shader));
|
|
SetShaderUnbindsCommon<SF_Pixel>(InUnbinds);
|
|
break;
|
|
default:
|
|
checkf(0, TEXT("Undefined FRHIGraphicsShader Type %d!"), (int32)Shader->GetFrequency());
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::ValidateExclusiveDepthStencilAccess(FExclusiveDepthStencil RequestedAccess) const
|
|
{
|
|
const bool bSrcDepthWrite = RequestedAccess.IsDepthWrite();
|
|
const bool bSrcStencilWrite = RequestedAccess.IsStencilWrite();
|
|
|
|
if (bSrcDepthWrite || bSrcStencilWrite)
|
|
{
|
|
// New Rule: You have to call SetRenderTarget[s]() before
|
|
ensure(CurrentDepthTexture);
|
|
|
|
const bool bDstDepthWrite = CurrentDSVAccessType.IsDepthWrite();
|
|
const bool bDstStencilWrite = CurrentDSVAccessType.IsStencilWrite();
|
|
|
|
// requested access is not possible, fix SetRenderTarget EExclusiveDepthStencil or request a different one
|
|
ensureMsgf(
|
|
!bSrcDepthWrite || bDstDepthWrite,
|
|
TEXT("Expected: SrcDepthWrite := false or DstDepthWrite := true. Actual: SrcDepthWrite := %s or DstDepthWrite := %s"),
|
|
(bSrcDepthWrite) ? TEXT("true") : TEXT("false"),
|
|
(bDstDepthWrite) ? TEXT("true") : TEXT("false")
|
|
);
|
|
|
|
ensureMsgf(
|
|
!bSrcStencilWrite || bDstStencilWrite,
|
|
TEXT("Expected: SrcStencilWrite := false or DstStencilWrite := true. Actual: SrcStencilWrite := %s or DstStencilWrite := %s"),
|
|
(bSrcStencilWrite) ? TEXT("true") : TEXT("false"),
|
|
(bDstStencilWrite) ? TEXT("true") : TEXT("false")
|
|
);
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetDepthStencilState(FRHIDepthStencilState* NewStateRHI,uint32 StencilRef)
|
|
{
|
|
FD3D11DepthStencilState* NewState = ResourceCast(NewStateRHI);
|
|
|
|
ValidateExclusiveDepthStencilAccess(NewState->AccessType);
|
|
|
|
StateCache.SetDepthStencilState(NewState->Resource, StencilRef);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetStencilRef(uint32 StencilRef)
|
|
{
|
|
StateCache.SetStencilRef(StencilRef);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetBlendState(FRHIBlendState* NewStateRHI,const FLinearColor& BlendFactor)
|
|
{
|
|
FD3D11BlendState* NewState = ResourceCast(NewStateRHI);
|
|
StateCache.SetBlendState(NewState->Resource, (const float*)&BlendFactor, 0xffffffff);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISetBlendFactor(const FLinearColor& BlendFactor)
|
|
{
|
|
StateCache.SetBlendFactor((const float*)&BlendFactor, 0xffffffff);
|
|
}
|
|
void FD3D11DynamicRHI::CommitRenderTargetsAndUAVs()
|
|
{
|
|
CommitRenderTargets(false);
|
|
FMemory::Memset(UAVBound, 0); //force to be rebound if any is set
|
|
UAVSChanged = 1;
|
|
CommitUAVs();
|
|
|
|
}
|
|
void FD3D11DynamicRHI::CommitRenderTargets(bool bClearUAVs)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_D3D11RenderTargetCommits);
|
|
#if !UE_BUILD_SHIPPING && !UE_BUILD_TEST
|
|
GDX11RTRebind.Increment();
|
|
#endif
|
|
ID3D11RenderTargetView* RTArray[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
|
|
for (uint32 RenderTargetIndex = 0; RenderTargetIndex < NumSimultaneousRenderTargets; ++RenderTargetIndex)
|
|
{
|
|
RTArray[RenderTargetIndex] = CurrentRenderTargets[RenderTargetIndex];
|
|
}
|
|
|
|
|
|
Direct3DDeviceIMContext->OMSetRenderTargets(
|
|
NumSimultaneousRenderTargets,
|
|
RTArray,
|
|
CurrentDepthStencilTarget
|
|
);
|
|
|
|
if(bClearUAVs)
|
|
{
|
|
for(uint32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i)
|
|
{
|
|
CurrentUAVs[i] = nullptr;
|
|
UAVBound[i] = nullptr;
|
|
}
|
|
UAVBindFirst = 0;
|
|
UAVBindCount = 0;
|
|
UAVSChanged = 0;
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::InternalSetUAVCS(uint32 BindIndex, FD3D11UnorderedAccessView* UnorderedAccessViewRHI)
|
|
{
|
|
if (UnorderedAccessViewRHI)
|
|
{
|
|
ConditionalClearShaderResource(UnorderedAccessViewRHI->GetBaseResource(), true);
|
|
}
|
|
|
|
ID3D11UnorderedAccessView* D3D11UAV = UnorderedAccessViewRHI ? UnorderedAccessViewRHI->View : nullptr;
|
|
|
|
uint32 InitialCount = -1;
|
|
Direct3DDeviceIMContext->CSSetUnorderedAccessViews(BindIndex, 1, &D3D11UAV, &InitialCount);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::InternalSetUAVVSPS(uint32 BindIndex, FD3D11UnorderedAccessView* UnorderedAccessViewRHI)
|
|
{
|
|
check(BindIndex < D3D11_PS_CS_UAV_REGISTER_COUNT);
|
|
if (CurrentUAVs[BindIndex] != UnorderedAccessViewRHI)
|
|
{
|
|
CurrentUAVs[BindIndex] = UnorderedAccessViewRHI;
|
|
UAVSChanged = 1;
|
|
}
|
|
if (UnorderedAccessViewRHI)
|
|
{
|
|
ConditionalClearShaderResource(UnorderedAccessViewRHI->GetBaseResource(), true);
|
|
for (uint32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; i++)
|
|
{
|
|
if (i != BindIndex && CurrentUAVs[i] == UnorderedAccessViewRHI)
|
|
{
|
|
CurrentUAVs[i] = nullptr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::CommitUAVs()
|
|
{
|
|
if (!UAVSChanged)
|
|
{
|
|
return;
|
|
}
|
|
int32 First = -1;
|
|
int32 Count = 0;
|
|
for (int32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i)
|
|
{
|
|
if (CurrentUAVs[i] != nullptr)
|
|
{
|
|
First = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (First != -1)
|
|
{
|
|
FD3D11UnorderedAccessView* RHIUAVs[D3D11_PS_CS_UAV_REGISTER_COUNT];
|
|
ID3D11UnorderedAccessView* UAVs[D3D11_PS_CS_UAV_REGISTER_COUNT];
|
|
FMemory::Memset(UAVs, 0);
|
|
|
|
for (int32 i = First; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i)
|
|
{
|
|
if (CurrentUAVs[i] == nullptr)
|
|
break;
|
|
RHIUAVs[i] = CurrentUAVs[i].GetReference();
|
|
UAVs[i] = RHIUAVs[i]->View;
|
|
Count++;
|
|
}
|
|
|
|
if (First != UAVBindFirst || Count != UAVBindCount || 0 != FMemory::Memcmp(&UAVs[First], &UAVBound[First], sizeof(UAVs[0]) * Count))
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_D3D11RenderTargetCommitsUAV);
|
|
for (int32 i = First; i < First + Count; ++i)
|
|
{
|
|
if (UAVs[i] != UAVBound[i])
|
|
{
|
|
FD3D11UnorderedAccessView* RHIUAV = RHIUAVs[i];
|
|
ID3D11UnorderedAccessView* UAV = UAVs[i];
|
|
|
|
// Unbind any shader views of the UAV's resource.
|
|
ConditionalClearShaderResource(RHIUAV->GetBaseResource(), true);
|
|
UAVBound[i] = UAV;
|
|
}
|
|
}
|
|
static const uint32 UAVInitialCountArray[D3D11_PS_CS_UAV_REGISTER_COUNT] = { ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u };
|
|
Direct3DDeviceIMContext->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, 0, 0, First, Count, &UAVs[First], &UAVInitialCountArray[0]);
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
if (First != UAVBindFirst)
|
|
{
|
|
Direct3DDeviceIMContext->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, 0, 0, 0, 0, nullptr, nullptr);
|
|
}
|
|
}
|
|
|
|
UAVBindFirst = First;
|
|
UAVBindCount = Count;
|
|
UAVSChanged = 0;
|
|
}
|
|
|
|
struct FRTVDesc
|
|
{
|
|
uint32 Width;
|
|
uint32 Height;
|
|
DXGI_SAMPLE_DESC SampleDesc;
|
|
};
|
|
|
|
// Return an FRTVDesc structure whose
|
|
// Width and height dimensions are adjusted for the RTV's miplevel.
|
|
FRTVDesc GetRenderTargetViewDesc(ID3D11RenderTargetView* RenderTargetView)
|
|
{
|
|
D3D11_RENDER_TARGET_VIEW_DESC TargetDesc;
|
|
RenderTargetView->GetDesc(&TargetDesc);
|
|
|
|
TRefCountPtr<ID3D11Resource> BaseResource;
|
|
RenderTargetView->GetResource((ID3D11Resource**)BaseResource.GetInitReference());
|
|
uint32 MipIndex = 0;
|
|
FRTVDesc ret;
|
|
memset(&ret, 0, sizeof(ret));
|
|
|
|
switch (TargetDesc.ViewDimension)
|
|
{
|
|
case D3D11_RTV_DIMENSION_TEXTURE2D:
|
|
case D3D11_RTV_DIMENSION_TEXTURE2DMS:
|
|
case D3D11_RTV_DIMENSION_TEXTURE2DARRAY:
|
|
case D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY:
|
|
{
|
|
D3D11_TEXTURE2D_DESC Desc;
|
|
((ID3D11Texture2D*)(BaseResource.GetReference()))->GetDesc(&Desc);
|
|
ret.Width = Desc.Width;
|
|
ret.Height = Desc.Height;
|
|
ret.SampleDesc = Desc.SampleDesc;
|
|
if (TargetDesc.ViewDimension == D3D11_RTV_DIMENSION_TEXTURE2D || TargetDesc.ViewDimension == D3D11_RTV_DIMENSION_TEXTURE2DARRAY)
|
|
{
|
|
// All the non-multisampled texture types have their mip-slice in the same position.
|
|
MipIndex = TargetDesc.Texture2D.MipSlice;
|
|
}
|
|
break;
|
|
}
|
|
case D3D11_RTV_DIMENSION_TEXTURE3D:
|
|
{
|
|
D3D11_TEXTURE3D_DESC Desc;
|
|
((ID3D11Texture3D*)(BaseResource.GetReference()))->GetDesc(&Desc);
|
|
ret.Width = Desc.Width;
|
|
ret.Height = Desc.Height;
|
|
ret.SampleDesc.Count = 1;
|
|
ret.SampleDesc.Quality = 0;
|
|
MipIndex = TargetDesc.Texture3D.MipSlice;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
// not expecting 1D targets.
|
|
checkNoEntry();
|
|
}
|
|
}
|
|
ret.Width >>= MipIndex;
|
|
ret.Height >>= MipIndex;
|
|
return ret;
|
|
}
|
|
|
|
void FD3D11DynamicRHI::SetRenderTargets(
|
|
uint32 NewNumSimultaneousRenderTargets,
|
|
const FRHIRenderTargetView* NewRenderTargetsRHI,
|
|
const FRHIDepthRenderTargetView* NewDepthStencilTargetRHI)
|
|
{
|
|
FD3D11Texture* NewDepthStencilTarget = ResourceCast(NewDepthStencilTargetRHI ? NewDepthStencilTargetRHI->Texture : nullptr);
|
|
|
|
check(NewNumSimultaneousRenderTargets <= MaxSimultaneousRenderTargets);
|
|
|
|
bool bTargetChanged = false;
|
|
|
|
// Set the appropriate depth stencil view depending on whether depth writes are enabled or not
|
|
ID3D11DepthStencilView* DepthStencilView = NULL;
|
|
if(NewDepthStencilTarget)
|
|
{
|
|
check(NewDepthStencilTargetRHI);
|
|
CurrentDSVAccessType = NewDepthStencilTargetRHI->GetDepthStencilAccess();
|
|
DepthStencilView = NewDepthStencilTarget->GetDepthStencilView(CurrentDSVAccessType);
|
|
|
|
// Unbind any shader views of the depth stencil target that are bound.
|
|
ConditionalClearShaderResource(NewDepthStencilTarget, false);
|
|
}
|
|
|
|
// Check if the depth stencil target is different from the old state.
|
|
if(CurrentDepthStencilTarget != DepthStencilView)
|
|
{
|
|
CurrentDepthTexture = NewDepthStencilTarget;
|
|
CurrentDepthStencilTarget = DepthStencilView;
|
|
bTargetChanged = true;
|
|
}
|
|
|
|
// Gather the render target views for the new render targets.
|
|
ID3D11RenderTargetView* NewRenderTargetViews[MaxSimultaneousRenderTargets];
|
|
for(uint32 RenderTargetIndex = 0;RenderTargetIndex < MaxSimultaneousRenderTargets;++RenderTargetIndex)
|
|
{
|
|
ID3D11RenderTargetView* RenderTargetView = NULL;
|
|
if(RenderTargetIndex < NewNumSimultaneousRenderTargets && NewRenderTargetsRHI[RenderTargetIndex].Texture != nullptr)
|
|
{
|
|
int32 RTMipIndex = NewRenderTargetsRHI[RenderTargetIndex].MipIndex;
|
|
int32 RTSliceIndex = NewRenderTargetsRHI[RenderTargetIndex].ArraySliceIndex;
|
|
|
|
FD3D11Texture* NewRenderTarget = ResourceCast(NewRenderTargetsRHI[RenderTargetIndex].Texture);
|
|
RenderTargetView = NewRenderTarget ? NewRenderTarget->GetRenderTargetView(RTMipIndex, RTSliceIndex) : nullptr;
|
|
|
|
ensureMsgf(RenderTargetView, TEXT("Texture being set as render target has no RTV"));
|
|
|
|
// Unbind any shader views of the render target that are bound.
|
|
ConditionalClearShaderResource(NewRenderTarget, false);
|
|
|
|
#if UE_BUILD_DEBUG
|
|
// A check to allow you to pinpoint what is using mismatching targets
|
|
// We filter our d3ddebug spew that checks for this as the d3d runtime's check is wrong.
|
|
// For filter code, see D3D11Device.cpp look for "OMSETRENDERTARGETS_INVALIDVIEW"
|
|
if(RenderTargetView && DepthStencilView)
|
|
{
|
|
FRTVDesc RTTDesc = GetRenderTargetViewDesc(RenderTargetView);
|
|
|
|
TRefCountPtr<ID3D11Texture2D> DepthTargetTexture;
|
|
DepthStencilView->GetResource((ID3D11Resource**)DepthTargetTexture.GetInitReference());
|
|
|
|
D3D11_TEXTURE2D_DESC DTTDesc;
|
|
DepthTargetTexture->GetDesc(&DTTDesc);
|
|
|
|
// enforce color target is <= depth and MSAA settings match
|
|
if(RTTDesc.Width > DTTDesc.Width || RTTDesc.Height > DTTDesc.Height ||
|
|
RTTDesc.SampleDesc.Count != DTTDesc.SampleDesc.Count ||
|
|
RTTDesc.SampleDesc.Quality != DTTDesc.SampleDesc.Quality)
|
|
{
|
|
UE_LOG(LogD3D11RHI, Fatal,TEXT("RTV(%i,%i c=%i,q=%i) and DSV(%i,%i c=%i,q=%i) have mismatching dimensions and/or MSAA levels!"),
|
|
RTTDesc.Width,RTTDesc.Height,RTTDesc.SampleDesc.Count,RTTDesc.SampleDesc.Quality,
|
|
DTTDesc.Width,DTTDesc.Height,DTTDesc.SampleDesc.Count,DTTDesc.SampleDesc.Quality);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
NewRenderTargetViews[RenderTargetIndex] = RenderTargetView;
|
|
|
|
// Check if the render target is different from the old state.
|
|
if(CurrentRenderTargets[RenderTargetIndex] != RenderTargetView)
|
|
{
|
|
CurrentRenderTargets[RenderTargetIndex] = RenderTargetView;
|
|
bTargetChanged = true;
|
|
}
|
|
}
|
|
if(NumSimultaneousRenderTargets != NewNumSimultaneousRenderTargets)
|
|
{
|
|
NumSimultaneousRenderTargets = NewNumSimultaneousRenderTargets;
|
|
uint32 Bit = 1;
|
|
uint32 Mask = 0;
|
|
for (uint32 Index = 0; Index < NumSimultaneousRenderTargets; ++Index)
|
|
{
|
|
Mask |= Bit;
|
|
Bit <<= 1;
|
|
}
|
|
CurrentRTVOverlapMask = Mask;
|
|
bTargetChanged = true;
|
|
}
|
|
|
|
// Only make the D3D call to change render targets if something actually changed.
|
|
if(bTargetChanged)
|
|
{
|
|
CommitRenderTargets(true);
|
|
CurrentUAVMask = 0;
|
|
}
|
|
|
|
// Set the viewport to the full size of render target 0.
|
|
if (NewRenderTargetViews[0])
|
|
{
|
|
// check target 0 is valid
|
|
check(0 < NewNumSimultaneousRenderTargets && NewRenderTargetsRHI[0].Texture != nullptr);
|
|
FRTVDesc RTTDesc = GetRenderTargetViewDesc(NewRenderTargetViews[0]);
|
|
RHISetViewport(0.0f, 0.0f, 0.0f, (float)RTTDesc.Width, (float)RTTDesc.Height, 1.0f);
|
|
}
|
|
else if( DepthStencilView )
|
|
{
|
|
TRefCountPtr<ID3D11Texture2D> DepthTargetTexture;
|
|
DepthStencilView->GetResource((ID3D11Resource**)DepthTargetTexture.GetInitReference());
|
|
|
|
D3D11_TEXTURE2D_DESC DTTDesc;
|
|
DepthTargetTexture->GetDesc(&DTTDesc);
|
|
RHISetViewport(0.0f, 0.0f, 0.0f, (float)DTTDesc.Width, (float)DTTDesc.Height, 1.0f);
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::SetRenderTargetsAndClear(const FRHISetRenderTargetsInfo& RenderTargetsInfo)
|
|
{
|
|
this->SetRenderTargets(RenderTargetsInfo.NumColorRenderTargets,
|
|
RenderTargetsInfo.ColorRenderTarget,
|
|
&RenderTargetsInfo.DepthStencilRenderTarget);
|
|
|
|
if (RenderTargetsInfo.bClearColor || RenderTargetsInfo.bClearStencil || RenderTargetsInfo.bClearDepth)
|
|
{
|
|
FLinearColor ClearColors[MaxSimultaneousRenderTargets];
|
|
bool bClearColorArray[MaxSimultaneousRenderTargets];
|
|
float DepthClear = 0.0;
|
|
uint32 StencilClear = 0;
|
|
|
|
if (RenderTargetsInfo.bClearColor)
|
|
{
|
|
for (int32 i = 0; i < RenderTargetsInfo.NumColorRenderTargets; ++i)
|
|
{
|
|
bClearColorArray[i] = RenderTargetsInfo.ColorRenderTarget[i].LoadAction == ERenderTargetLoadAction::EClear;
|
|
|
|
if (bClearColorArray[i] && RenderTargetsInfo.ColorRenderTarget[i].Texture != nullptr)
|
|
{
|
|
const FClearValueBinding& ClearValue = RenderTargetsInfo.ColorRenderTarget[i].Texture->GetClearBinding();
|
|
checkf(ClearValue.ColorBinding == EClearBinding::EColorBound, TEXT("Texture: %s does not have a color bound for fast clears"), *RenderTargetsInfo.ColorRenderTarget[i].Texture->GetName().GetPlainNameString());
|
|
ClearColors[i] = ClearValue.GetClearColor();
|
|
}
|
|
}
|
|
}
|
|
if (RenderTargetsInfo.bClearDepth || RenderTargetsInfo.bClearStencil)
|
|
{
|
|
const FClearValueBinding& ClearValue = RenderTargetsInfo.DepthStencilRenderTarget.Texture->GetClearBinding();
|
|
checkf(ClearValue.ColorBinding == EClearBinding::EDepthStencilBound, TEXT("Texture: %s does not have a DS value bound for fast clears"), *RenderTargetsInfo.DepthStencilRenderTarget.Texture->GetName().GetPlainNameString());
|
|
ClearValue.GetDepthStencil(DepthClear, StencilClear);
|
|
}
|
|
|
|
this->RHIClearMRTImpl(RenderTargetsInfo.bClearColor ? bClearColorArray : nullptr, RenderTargetsInfo.NumColorRenderTargets, ClearColors, RenderTargetsInfo.bClearDepth, DepthClear, RenderTargetsInfo.bClearStencil, StencilClear);
|
|
}
|
|
}
|
|
|
|
// Primitive drawing.
|
|
|
|
static D3D11_PRIMITIVE_TOPOLOGY GetD3D11PrimitiveType(EPrimitiveType PrimitiveType)
|
|
{
|
|
switch(PrimitiveType)
|
|
{
|
|
case PT_TriangleList: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
case PT_TriangleStrip: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
|
|
case PT_LineList: return D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
|
case PT_PointList: return D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
|
|
|
default: UE_LOG(LogD3D11RHI, Fatal,TEXT("Unknown primitive type: %u"),PrimitiveType);
|
|
};
|
|
|
|
return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
}
|
|
|
|
namespace FD3DRHIUtil
|
|
{
|
|
template <EShaderFrequency ShaderFrequencyT>
|
|
inline void CommitConstants(FD3D11ConstantBuffer* InConstantBuffer, FD3D11StateCache& StateCache, bool bDiscardSharedConstants)
|
|
{
|
|
FWinD3D11ConstantBuffer* ConstantBuffer = static_cast<FWinD3D11ConstantBuffer*>(InConstantBuffer);
|
|
// Array may contain NULL entries to pad out to proper
|
|
if (ConstantBuffer && ConstantBuffer->CommitConstantsToDevice(bDiscardSharedConstants))
|
|
{
|
|
ID3D11Buffer* DeviceBuffer = ConstantBuffer->GetConstantBuffer();
|
|
StateCache.SetConstantBuffer<ShaderFrequencyT>(DeviceBuffer, GLOBAL_CONSTANT_BUFFER_INDEX);
|
|
}
|
|
}
|
|
};
|
|
|
|
void FD3D11DynamicRHI::CommitNonComputeShaderConstants()
|
|
{
|
|
FD3D11BoundShaderState* CurrentBoundShaderState = (FD3D11BoundShaderState*)BoundShaderStateHistory.GetLast();
|
|
check(CurrentBoundShaderState);
|
|
|
|
// Only set the constant buffer if this shader needs the global constant buffer bound
|
|
// Otherwise we will overwrite a different constant buffer
|
|
if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Vertex])
|
|
{
|
|
// Commit and bind vertex shader constants
|
|
FD3DRHIUtil::CommitConstants<SF_Vertex>(VSConstantBuffer, StateCache, bDiscardSharedConstants);
|
|
}
|
|
|
|
if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Geometry])
|
|
{
|
|
// Commit and bind geometry shader constants
|
|
FD3DRHIUtil::CommitConstants<SF_Geometry>(GSConstantBuffer, StateCache, bDiscardSharedConstants);
|
|
}
|
|
|
|
if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Pixel])
|
|
{
|
|
// Commit and bind pixel shader constants
|
|
FD3DRHIUtil::CommitConstants<SF_Pixel>(PSConstantBuffer, StateCache, bDiscardSharedConstants);
|
|
}
|
|
|
|
bDiscardSharedConstants = false;
|
|
}
|
|
|
|
void FD3D11DynamicRHI::CommitComputeShaderConstants()
|
|
{
|
|
// Commit and bind compute shader constants
|
|
FD3DRHIUtil::CommitConstants<SF_Compute>(CSConstantBuffer, StateCache, bDiscardSharedConstants);
|
|
}
|
|
|
|
template <class ShaderType>
|
|
void FD3D11DynamicRHI::SetResourcesFromTables(const ShaderType* RESTRICT Shader)
|
|
{
|
|
checkSlow(Shader);
|
|
static constexpr EShaderFrequency Frequency = static_cast<EShaderFrequency>(ShaderType::StaticFrequency);
|
|
|
|
UE::RHI::Private::SetUniformBufferResourcesFromTables(
|
|
FD3D11ResourceBinder<Frequency> { *this }
|
|
, *Shader
|
|
, DirtyUniformBuffers[Frequency]
|
|
, BoundUniformBuffers[Frequency]
|
|
#if ENABLE_RHI_VALIDATION
|
|
, Tracker
|
|
#endif
|
|
);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::CommitGraphicsResourceTables()
|
|
{
|
|
#if !UE_BUILD_SHIPPING && !UE_BUILD_TEST
|
|
GDX11CommitGraphicsResourceTables.Increment();
|
|
#endif
|
|
|
|
FD3D11BoundShaderState* RESTRICT CurrentBoundShaderState = (FD3D11BoundShaderState*)BoundShaderStateHistory.GetLast();
|
|
check(CurrentBoundShaderState);
|
|
|
|
bool bRTVInvalidate = false;
|
|
uint32 UAVMask = 0;
|
|
|
|
if (auto* Shader = CurrentBoundShaderState->GetPixelShader())
|
|
{
|
|
UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask;
|
|
SetResourcesFromTables(Shader);
|
|
}
|
|
|
|
if (auto* Shader = CurrentBoundShaderState->GetVertexShader())
|
|
{
|
|
UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask;
|
|
SetResourcesFromTables(Shader);
|
|
}
|
|
if (auto* Shader = CurrentBoundShaderState->GetGeometryShader())
|
|
{
|
|
UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask;
|
|
SetResourcesFromTables(Shader);
|
|
}
|
|
|
|
// Because d3d11 binding uses the same slots for UAVs and RTVs, we have to rebind when two shaders with different sets of rendertargets are bound,
|
|
// as they can potentially be used by UAVs, which can cause them to unbind RTVs used by subsequent shaders.
|
|
if (GDX11ReduceRTVRebinds &&
|
|
(0 != ((~CurrentUAVMask) & UAVMask) && CurrentUAVMask == (CurrentUAVMask & UAVMask)))
|
|
{
|
|
//if the mask only -adds- uav binds, no RTs will be missing so we just grow the mask
|
|
CurrentUAVMask = UAVMask;
|
|
}
|
|
else if (CurrentUAVMask != UAVMask)
|
|
{
|
|
bRTVInvalidate = true;
|
|
CurrentUAVMask = UAVMask;
|
|
}
|
|
|
|
if (bRTVInvalidate)
|
|
{
|
|
CommitRenderTargets(true);
|
|
DirtyUniformBuffers[SF_Pixel] = -1;
|
|
DirtyUniformBuffers[SF_Vertex] = -1;
|
|
DirtyUniformBuffers[SF_Geometry] = -1;
|
|
}
|
|
|
|
if (UAVSChanged)
|
|
{
|
|
CommitUAVs();
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::CommitComputeResourceTables(FD3D11ComputeShader* InComputeShader)
|
|
{
|
|
FD3D11ComputeShader* RESTRICT ComputeShader = InComputeShader;
|
|
check(ComputeShader);
|
|
SetResourcesFromTables(ComputeShader);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDrawPrimitive(uint32 BaseVertexIndex,uint32 NumPrimitives,uint32 NumInstances)
|
|
{
|
|
CommitGraphicsResourceTables();
|
|
CommitNonComputeShaderConstants();
|
|
|
|
uint32 VertexCount = GetVertexCountForPrimitiveCount(NumPrimitives, PrimitiveType);
|
|
|
|
RHI_DRAW_CALL_STATS(PrimitiveType, VertexCount, NumPrimitives, NumInstances);
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(NumPrimitives * NumInstances, VertexCount * NumInstances);
|
|
#endif
|
|
|
|
StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType));
|
|
if(NumInstances > 1)
|
|
{
|
|
Direct3DDeviceIMContext->DrawInstanced(VertexCount,NumInstances,BaseVertexIndex,0);
|
|
}
|
|
else
|
|
{
|
|
Direct3DDeviceIMContext->Draw(VertexCount,BaseVertexIndex);
|
|
}
|
|
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDrawPrimitiveIndirect(FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset)
|
|
{
|
|
FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI);
|
|
|
|
RHI_DRAW_CALL_INC();
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(0);
|
|
#endif
|
|
|
|
CommitGraphicsResourceTables();
|
|
CommitNonComputeShaderConstants();
|
|
|
|
StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType));
|
|
Direct3DDeviceIMContext->DrawInstancedIndirect(ArgumentBuffer->Resource,ArgumentOffset);
|
|
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDrawIndexedIndirect(FRHIBuffer* IndexBufferRHI, FRHIBuffer* ArgumentsBufferRHI, int32 DrawArgumentsIndex, uint32 NumInstances)
|
|
{
|
|
FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI);
|
|
FD3D11Buffer* ArgumentsBuffer = ResourceCast(ArgumentsBufferRHI);
|
|
|
|
RHI_DRAW_CALL_INC();
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(1);
|
|
#endif
|
|
|
|
CommitGraphicsResourceTables();
|
|
CommitNonComputeShaderConstants();
|
|
|
|
// determine 16bit vs 32bit indices
|
|
const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT);
|
|
|
|
TrackResourceBoundAsIB(IndexBuffer);
|
|
StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0);
|
|
StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType));
|
|
|
|
Direct3DDeviceIMContext->DrawIndexedInstancedIndirect(ArgumentsBuffer->Resource, DrawArgumentsIndex * 5 * sizeof(uint32));
|
|
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDrawIndexedPrimitive(FRHIBuffer* IndexBufferRHI, int32 BaseVertexIndex, uint32 FirstInstance, uint32 NumVertices, uint32 StartIndex, uint32 NumPrimitives, uint32 NumInstances)
|
|
{
|
|
RHI_DRAW_CALL_STATS(PrimitiveType, NumVertices, NumPrimitives, NumInstances);
|
|
|
|
FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI);
|
|
|
|
// called should make sure the input is valid, this avoid hidden bugs
|
|
ensure(NumPrimitives > 0);
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(NumPrimitives * NumInstances, NumVertices * NumInstances);
|
|
#endif
|
|
|
|
CommitGraphicsResourceTables();
|
|
CommitNonComputeShaderConstants();
|
|
|
|
// determine 16bit vs 32bit indices
|
|
const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT);
|
|
|
|
uint32 IndexCount = GetVertexCountForPrimitiveCount(NumPrimitives,PrimitiveType);
|
|
|
|
// Verify that we are not trying to read outside the index buffer range
|
|
// test is an optimized version of: StartIndex + IndexCount <= IndexBuffer->GetSize() / IndexBuffer->GetStride()
|
|
checkf((StartIndex + IndexCount) * IndexBuffer->GetStride() <= IndexBuffer->GetSize(),
|
|
TEXT("Start %u, Count %u, Type %u, Buffer Size %u, Buffer stride %u"), StartIndex, IndexCount, PrimitiveType, IndexBuffer->GetSize(), IndexBuffer->GetStride());
|
|
|
|
TrackResourceBoundAsIB(IndexBuffer);
|
|
StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0);
|
|
StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType));
|
|
|
|
if (NumInstances > 1 || FirstInstance != 0)
|
|
{
|
|
const uint64 TotalIndexCount = (uint64)NumInstances * (uint64)IndexCount + (uint64)StartIndex;
|
|
checkf(TotalIndexCount <= (uint64)0xFFFFFFFF, TEXT("Instanced Index Draw exceeds maximum d3d11 limit: Total: %llu, NumInstances: %llu, IndexCount: %llu, StartIndex: %llu, FirstInstance: %llu"), TotalIndexCount, NumInstances, IndexCount, StartIndex, FirstInstance);
|
|
Direct3DDeviceIMContext->DrawIndexedInstanced(IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance);
|
|
}
|
|
else
|
|
{
|
|
Direct3DDeviceIMContext->DrawIndexed(IndexCount,StartIndex,BaseVertexIndex);
|
|
}
|
|
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIDrawIndexedPrimitiveIndirect(FRHIBuffer* IndexBufferRHI, FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset)
|
|
{
|
|
FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI);
|
|
FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI);
|
|
|
|
RHI_DRAW_CALL_INC();
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(0);
|
|
#endif
|
|
|
|
CommitGraphicsResourceTables();
|
|
CommitNonComputeShaderConstants();
|
|
|
|
// Set the index buffer.
|
|
const uint32 SizeFormat = sizeof(DXGI_FORMAT);
|
|
const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT);
|
|
TrackResourceBoundAsIB(IndexBuffer);
|
|
StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0);
|
|
StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType));
|
|
Direct3DDeviceIMContext->DrawIndexedInstancedIndirect(ArgumentBuffer->Resource,ArgumentOffset);
|
|
|
|
EnableUAVOverlap();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIClearMRTImpl(const bool* bClearColorArray, int32 NumClearColors, const FLinearColor* ClearColorArray, bool bClearDepth, float Depth, bool bClearStencil, uint32 Stencil)
|
|
{
|
|
FD3D11BoundRenderTargets BoundRenderTargets(Direct3DDeviceIMContext);
|
|
|
|
// Must specify enough clear colors for all active RTs
|
|
check(!bClearColorArray || NumClearColors >= BoundRenderTargets.GetNumActiveTargets());
|
|
|
|
// If we're clearing depth or stencil and we have a readonly depth/stencil view bound, we need to use a writable depth/stencil view
|
|
if (CurrentDepthTexture)
|
|
{
|
|
FExclusiveDepthStencil RequestedAccess;
|
|
|
|
RequestedAccess.SetDepthStencilWrite(bClearDepth, bClearStencil);
|
|
|
|
ensure(RequestedAccess.IsValid(CurrentDSVAccessType));
|
|
}
|
|
|
|
ID3D11DepthStencilView* DepthStencilView = BoundRenderTargets.GetDepthStencilView();
|
|
|
|
if (bClearColorArray && BoundRenderTargets.GetNumActiveTargets() > 0)
|
|
{
|
|
for (int32 TargetIndex = 0; TargetIndex < BoundRenderTargets.GetNumActiveTargets(); TargetIndex++)
|
|
{
|
|
if (bClearColorArray[TargetIndex])
|
|
{
|
|
ID3D11RenderTargetView* RenderTargetView = BoundRenderTargets.GetRenderTargetView(TargetIndex);
|
|
if (RenderTargetView != nullptr)
|
|
{
|
|
Direct3DDeviceIMContext->ClearRenderTargetView(RenderTargetView, (float*)&ClearColorArray[TargetIndex]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((bClearDepth || bClearStencil) && DepthStencilView)
|
|
{
|
|
uint32 ClearFlags = 0;
|
|
if (bClearDepth)
|
|
{
|
|
ClearFlags |= D3D11_CLEAR_DEPTH;
|
|
}
|
|
if (bClearStencil)
|
|
{
|
|
ClearFlags |= D3D11_CLEAR_STENCIL;
|
|
}
|
|
Direct3DDeviceIMContext->ClearDepthStencilView(DepthStencilView,ClearFlags,Depth,Stencil);
|
|
}
|
|
|
|
#if (RHI_NEW_GPU_PROFILER == 0)
|
|
RegisterGPUWork(0);
|
|
#endif
|
|
}
|
|
|
|
// Blocks the CPU until the GPU catches up and goes idle.
|
|
void FD3D11DynamicRHI::RHIBlockUntilGPUIdle()
|
|
{
|
|
D3D11_QUERY_DESC Desc = {};
|
|
Desc.Query = D3D11_QUERY_EVENT;
|
|
|
|
TRefCountPtr<ID3D11Query> Query;
|
|
VERIFYD3D11RESULT_EX(Direct3DDevice->CreateQuery(&Desc, Query.GetInitReference()), Direct3DDevice);
|
|
|
|
Direct3DDeviceIMContext->End(Query.GetReference());
|
|
Direct3DDeviceIMContext->Flush();
|
|
|
|
for(;;)
|
|
{
|
|
BOOL EventComplete = false;
|
|
Direct3DDeviceIMContext->GetData(Query.GetReference(), &EventComplete, sizeof(EventComplete), 0);
|
|
if (EventComplete)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
FPlatformProcess::Sleep(0.005f);
|
|
}
|
|
}
|
|
}
|
|
|
|
// NVIDIA Depth Bounds Test interface
|
|
void FD3D11DynamicRHI::EnableDepthBoundsTest(bool bEnable,float MinDepth,float MaxDepth)
|
|
{
|
|
#if PLATFORM_DESKTOP
|
|
if(MinDepth > MaxDepth)
|
|
{
|
|
UE_LOG(LogD3D11RHI, Error,TEXT("RHIEnableDepthBoundsTest(%i,%f, %f) MinDepth > MaxDepth, cannot set DBT."),bEnable,MinDepth,MaxDepth);
|
|
return;
|
|
}
|
|
|
|
if( MinDepth < 0.f || MaxDepth > 1.f)
|
|
{
|
|
UE_LOG(LogD3D11RHI, Verbose,TEXT("RHIEnableDepthBoundsTest(%i,%f, %f) depths out of range, will clamp."),bEnable,MinDepth,MaxDepth);
|
|
}
|
|
|
|
MinDepth = FMath::Clamp(MinDepth, 0.0f, 1.0f);
|
|
MaxDepth = FMath::Clamp(MaxDepth, 0.0f, 1.0f);
|
|
|
|
#if WITH_NVAPI
|
|
if (IsRHIDeviceNVIDIA())
|
|
{
|
|
auto Result = NvAPI_D3D11_SetDepthBoundsTest( Direct3DDevice, bEnable, MinDepth, MaxDepth );
|
|
if (Result != NVAPI_OK)
|
|
{
|
|
static bool bOnce = false;
|
|
if (!bOnce)
|
|
{
|
|
bOnce = true;
|
|
if (bRenderDoc)
|
|
{
|
|
if (FApp::IsUnattended())
|
|
{
|
|
UE_LOG(LogD3D11RHI, Display, TEXT("NvAPI is not available under RenderDoc"));
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogD3D11RHI, Warning, TEXT("NvAPI is not available under RenderDoc"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogD3D11RHI, Error, TEXT("NvAPI_D3D11_SetDepthBoundsTest(%i,%f, %f) returned error code %i. **********PLEASE UPDATE YOUR VIDEO DRIVERS*********"), bEnable, MinDepth, MaxDepth, (unsigned int)Result);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#if WITH_AMD_AGS
|
|
if (IsRHIDeviceAMD())
|
|
{
|
|
auto Result = agsDriverExtensionsDX11_SetDepthBounds(AmdAgsContext, Direct3DDeviceIMContext, bEnable, MinDepth, MaxDepth);
|
|
if(Result != AGS_SUCCESS)
|
|
{
|
|
static bool bOnce = false;
|
|
if (!bOnce)
|
|
{
|
|
bOnce = true;
|
|
if (bRenderDoc)
|
|
{
|
|
if (FApp::IsUnattended())
|
|
{
|
|
UE_LOG(LogD3D11RHI, Display, TEXT("AGS is not available under RenderDoc"));
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogD3D11RHI, Warning, TEXT("AGS is not available under RenderDoc"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogD3D11RHI, Error, TEXT("agsDriverExtensionsDX11_SetDepthBounds(%i,%f, %f) returned error code %i. **********PLEASE UPDATE YOUR VIDEO DRIVERS*********"), bEnable, MinDepth, MaxDepth, (unsigned int)Result);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
StateCache.bDepthBoundsEnabled = bEnable;
|
|
StateCache.DepthBoundsMin = MinDepth;
|
|
StateCache.DepthBoundsMax = MaxDepth;
|
|
}
|
|
|
|
IRHICommandContext* FD3D11DynamicRHI::RHIGetDefaultContext()
|
|
{
|
|
return this;
|
|
}
|
|
|
|
IRHIComputeContext* FD3D11DynamicRHI::RHIGetCommandContext(ERHIPipeline Pipeline, FRHIGPUMask GPUMask)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("FD3D11DynamicRHI::RHIGetCommandContext should never be called. D3D11 RHI does not implement parallel command list execution."));
|
|
return nullptr;
|
|
}
|
|
|
|
struct FD3D11PlatformCommandList : public IRHIPlatformCommandList
|
|
{
|
|
virtual ~FD3D11PlatformCommandList() = default;
|
|
};
|
|
|
|
void FD3D11DynamicRHI::RHIFinalizeContext(FRHIFinalizeContextArgs&& Args, TRHIPipelineArray<IRHIPlatformCommandList*>& Output)
|
|
{
|
|
#if RHI_NEW_GPU_PROFILER
|
|
FlushProfilerStats();
|
|
#endif
|
|
|
|
// "Context" will always be the default context, since we don't implement parallel execution.
|
|
for (IRHIComputeContext* Context : Args.Contexts)
|
|
{
|
|
// "Context" will always be the default context, since we don't implement parallel execution.
|
|
check(Context == this);
|
|
|
|
#if RHI_NEW_GPU_PROFILER && WITH_RHI_BREADCRUMBS
|
|
// We need platform command lists to contain the breadcrumb allocators
|
|
Output[Context->GetPipeline()] = new FD3D11PlatformCommandList;
|
|
#endif
|
|
}
|
|
|
|
// Reset some context state
|
|
for (int32 Frequency = 0; Frequency < SF_NumStandardFrequencies; ++Frequency)
|
|
{
|
|
DirtyUniformBuffers[Frequency] = 0;
|
|
|
|
for (int32 BindIndex = 0; BindIndex < MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE; ++BindIndex)
|
|
{
|
|
BoundUniformBuffers[Frequency][BindIndex] = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHISubmitCommandLists(FRHISubmitCommandListsArgs&& Args)
|
|
{
|
|
// Attempt to readback completed queries and fences
|
|
PollQueryResults();
|
|
FD3D11GPUFence::PollFences();
|
|
|
|
#if RHI_NEW_GPU_PROFILER && WITH_RHI_BREADCRUMBS
|
|
for (IRHIPlatformCommandList* CmdList : Args.CommandLists)
|
|
{
|
|
FD3D11PlatformCommandList* D3DCmdList = static_cast<FD3D11PlatformCommandList*>(CmdList);
|
|
|
|
// Preserve the breadcrumb allocators in the profiler frame
|
|
// so they are kept alive until the frame's data is processed.
|
|
for (auto const& Allocator : D3DCmdList->BreadcrumbAllocators)
|
|
{
|
|
Profiler.Current.BreadcrumbAllocators.AddUnique(&Allocator.Get());
|
|
}
|
|
|
|
delete D3DCmdList;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void FD3D11DynamicRHI::EnableUAVOverlap()
|
|
{
|
|
// This function is called after every draw or dispatch to turn overlap back on if it was turned off by a UAV barrier. This way, the next
|
|
// draw/dispatch after the barrier executes after everything before it has completed and the caches were flushed, and any subsequent
|
|
// submissions are allowed to overlap until the next UAV barrier.
|
|
|
|
if (bUAVOverlapEnabled || !CVarAllowUAVFlushExt.GetValueOnRenderThread())
|
|
{
|
|
return;
|
|
}
|
|
|
|
bUAVOverlapEnabled = true;
|
|
|
|
if (IsRHIDeviceNVIDIA())
|
|
{
|
|
#if WITH_NVAPI
|
|
NvAPI_D3D11_BeginUAVOverlap(Direct3DDevice);
|
|
#endif
|
|
}
|
|
else if (IsRHIDeviceAMD())
|
|
{
|
|
#if WITH_AMD_AGS
|
|
agsDriverExtensionsDX11_BeginUAVOverlap(AmdAgsContext, Direct3DDeviceIMContext);
|
|
#endif
|
|
}
|
|
else if (IsRHIDeviceIntel())
|
|
{
|
|
#if INTEL_EXTENSIONS
|
|
if (bIntelSupportsUAVOverlap)
|
|
{
|
|
INTC_D3D11_BeginUAVOverlap(IntelExtensionContext);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::DisableUAVOverlap()
|
|
{
|
|
// This is called when a transition to UAVCompute or UAVGraphics is executed. It disables overlapping for the next draw/dispatch, so we get the same
|
|
// behavior as with a UAV barrier in APIs with explicit barriers. Overlapping will be turned back on automatically after the draw/dispatch.
|
|
if (!bUAVOverlapEnabled)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (IsRHIDeviceNVIDIA())
|
|
{
|
|
#if WITH_NVAPI
|
|
NvAPI_D3D11_EndUAVOverlap(Direct3DDevice);
|
|
#endif
|
|
}
|
|
else if (IsRHIDeviceAMD())
|
|
{
|
|
#if WITH_AMD_AGS
|
|
agsDriverExtensionsDX11_EndUAVOverlap(AmdAgsContext, Direct3DDeviceIMContext);
|
|
#endif
|
|
}
|
|
else if (IsRHIDeviceIntel())
|
|
{
|
|
#if INTEL_EXTENSIONS
|
|
if (bIntelSupportsUAVOverlap)
|
|
{
|
|
INTC_D3D11_EndUAVOverlap(IntelExtensionContext);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
bUAVOverlapEnabled = false;
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHICreateTransition(FRHITransition* Transition, const FRHITransitionCreateInfo& CreateInfo)
|
|
{
|
|
checkf(FMath::IsPowerOfTwo(uint32(CreateInfo.SrcPipelines)) && FMath::IsPowerOfTwo(uint32(CreateInfo.DstPipelines)), TEXT("Support for multi-pipe resources is not yet implemented."));
|
|
|
|
FD3D11TransitionData* Data = new (Transition->GetPrivateData<FD3D11TransitionData>()) FD3D11TransitionData;
|
|
Data->bUAVBarrier = false;
|
|
|
|
// If we have any transitions to UAVCompute or UAVGraphics, we need to break up the current overlap group.
|
|
for (const FRHITransitionInfo& Info : CreateInfo.TransitionInfos)
|
|
{
|
|
if (Info.Resource && EnumHasAnyFlags(Info.AccessAfter, ERHIAccess::UAVMask))
|
|
{
|
|
Data->bUAVBarrier = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIReleaseTransition(FRHITransition* Transition)
|
|
{
|
|
Transition->GetPrivateData<FD3D11TransitionData>()->~FD3D11TransitionData();
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIBeginTransitions(TArrayView<const FRHITransition*> Transitions)
|
|
{
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIEndTransitions(TArrayView<const FRHITransition*> Transitions)
|
|
{
|
|
// The only thing we care about in D3D11 is breaking up the current overlap group if we have a UAV barrier. If overlap is already off, there's nothing to do.
|
|
if (!bUAVOverlapEnabled)
|
|
{
|
|
return;
|
|
}
|
|
|
|
for (const FRHITransition* Transition : Transitions)
|
|
{
|
|
const FD3D11TransitionData* Data = Transition->GetPrivateData<FD3D11TransitionData>();
|
|
if (Data->bUAVBarrier)
|
|
{
|
|
DisableUAVOverlap();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIBeginUAVOverlap()
|
|
{
|
|
// No need to do anything here. Overlap is always on and the current group is broken up when we see a transition to UAVCompute or UAVGraphics.
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIEndUAVOverlap()
|
|
{
|
|
// Same as above.
|
|
}
|
|
|
|
//*********************** StagingBuffer Implementation ***********************//
|
|
|
|
FStagingBufferRHIRef FD3D11DynamicRHI::RHICreateStagingBuffer()
|
|
{
|
|
return new FD3D11StagingBuffer();
|
|
}
|
|
|
|
FD3D11StagingBuffer::~FD3D11StagingBuffer()
|
|
{
|
|
if (StagedRead)
|
|
{
|
|
StagedRead.SafeRelease();
|
|
}
|
|
}
|
|
|
|
void* FD3D11StagingBuffer::Lock(uint32 Offset, uint32 NumBytes)
|
|
{
|
|
check(!bIsLocked);
|
|
bIsLocked = true;
|
|
if (StagedRead)
|
|
{
|
|
// Map the staging buffer's memory for reading.
|
|
D3D11_MAPPED_SUBRESOURCE MappedSubresource;
|
|
VERIFYD3D11RESULT(Context->Map(StagedRead, 0, D3D11_MAP_READ, 0, &MappedSubresource));
|
|
|
|
return (void*)((uint8*)MappedSubresource.pData + Offset);
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
void FD3D11StagingBuffer::Unlock()
|
|
{
|
|
check(bIsLocked);
|
|
bIsLocked = false;
|
|
if (StagedRead)
|
|
{
|
|
Context->Unmap(StagedRead, 0);
|
|
}
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHICopyToStagingBuffer(FRHIBuffer* SourceBufferRHI, FRHIStagingBuffer* StagingBufferRHI, uint32 Offset, uint32 NumBytes)
|
|
{
|
|
FD3D11Buffer* SourceBuffer = ResourceCast(SourceBufferRHI);
|
|
FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI);
|
|
if (StagingBuffer)
|
|
{
|
|
ensureMsgf(!StagingBuffer->bIsLocked, TEXT("Attempting to Copy to a locked staging buffer. This may have undefined behavior"));
|
|
if (SourceBuffer)
|
|
{
|
|
if (!StagingBuffer->StagedRead || StagingBuffer->ShadowBufferSize < NumBytes)
|
|
{
|
|
// Free previously allocated buffer.
|
|
if (StagingBuffer->StagedRead)
|
|
{
|
|
StagingBuffer->StagedRead.SafeRelease();
|
|
}
|
|
|
|
// Allocate a new one with enough space.
|
|
// @todo-mattc I feel like we should allocate more than NumBytes to handle small reads without blowing tons of space. Need to pool this.
|
|
D3D11_BUFFER_DESC StagedReadDesc;
|
|
ZeroMemory(&StagedReadDesc, sizeof(D3D11_BUFFER_DESC));
|
|
StagedReadDesc.ByteWidth = NumBytes;
|
|
StagedReadDesc.Usage = D3D11_USAGE_STAGING;
|
|
StagedReadDesc.BindFlags = 0;
|
|
StagedReadDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
|
StagedReadDesc.MiscFlags = 0;
|
|
TRefCountPtr<ID3D11Buffer> StagingVertexBuffer;
|
|
VERIFYD3D11RESULT_EX(Direct3DDevice->CreateBuffer(&StagedReadDesc, NULL, StagingBuffer->StagedRead.GetInitReference()), Direct3DDevice);
|
|
StagingBuffer->ShadowBufferSize = NumBytes;
|
|
StagingBuffer->Context = Direct3DDeviceIMContext;
|
|
}
|
|
|
|
// Copy the contents of the vertex buffer to the staging buffer.
|
|
D3D11_BOX SourceBox;
|
|
SourceBox.left = Offset;
|
|
SourceBox.right = Offset + NumBytes;
|
|
SourceBox.top = SourceBox.front = 0;
|
|
SourceBox.bottom = SourceBox.back = 1;
|
|
Direct3DDeviceIMContext->CopySubresourceRegion(StagingBuffer->StagedRead, 0, 0, 0, 0, SourceBuffer->Resource, 0, &SourceBox);
|
|
}
|
|
}
|
|
}
|
|
|
|
void* FD3D11DynamicRHI::RHILockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI, FRHIGPUFence* Fence, uint32 Offset, uint32 SizeRHI)
|
|
{
|
|
check(StagingBufferRHI);
|
|
FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI);
|
|
return StagingBuffer->Lock(Offset, SizeRHI);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIUnlockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI)
|
|
{
|
|
FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI);
|
|
StagingBuffer->Unlock();
|
|
}
|
|
|
|
|
|
TQueue<FD3D11GPUFence::FD3D11Sync, EQueueMode::SingleThreaded> FD3D11GPUFence::ActiveSyncs;
|
|
|
|
FD3D11GPUFence::FD3D11GPUFence(FName InName)
|
|
: FRHIGPUFence(InName)
|
|
{
|
|
}
|
|
|
|
void FD3D11GPUFence::Clear()
|
|
{
|
|
Event = nullptr;
|
|
}
|
|
|
|
bool FD3D11GPUFence::Poll() const
|
|
{
|
|
return Event && Event->IsComplete();
|
|
}
|
|
|
|
void FD3D11GPUFence::Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const
|
|
{
|
|
if (Event && !Event->IsComplete())
|
|
{
|
|
//
|
|
// The fence might get signalled by an earlier RHI command polling them, but we can't be sure that will happen.
|
|
// The GPU might finish work after the RHI thread has gone idle, and then we'll never see the fence complete.
|
|
//
|
|
// Enqueue a command here that will block and wait for the fence if it still hasn't signalled by the time
|
|
// the RHI thread is done with all prior commands.
|
|
//
|
|
RHICmdList.EnqueueLambda([Event = Event](FRHICommandListImmediate&)
|
|
{
|
|
if (!Event->IsComplete())
|
|
{
|
|
PollFencesUntil(Event);
|
|
}
|
|
});
|
|
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
|
|
|
|
Event->Wait();
|
|
}
|
|
}
|
|
|
|
void FD3D11GPUFence::PollFencesUntil(FGraphEvent* Target)
|
|
{
|
|
FD3D11DynamicRHI& RHI = FD3D11DynamicRHI::Get();
|
|
|
|
while (FD3D11Sync* Sync = ActiveSyncs.Peek())
|
|
{
|
|
bool const bTarget = Sync->Event == Target;
|
|
|
|
// Wait forever if this fence is the one we're looking for specifically, otherwise just poll.
|
|
UINT const Flags = bTarget ? 0 : D3D11_ASYNC_GETDATA_DONOTFLUSH;
|
|
|
|
Retry:
|
|
BOOL Value = 0;
|
|
HRESULT Result = RHI.GetDeviceContext()->GetData(Sync->Query, &Value, sizeof(Value), Flags);
|
|
|
|
if (Result == S_FALSE || (Result == S_OK && !Value))
|
|
{
|
|
// Fence is not done
|
|
if (bTarget)
|
|
{
|
|
// We're waiting for a specific fence. Spin until it passes.
|
|
goto Retry;
|
|
}
|
|
else
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
|
|
VERIFYD3D11RESULT(Result);
|
|
|
|
// The fence has completed. Signal the graph event and remove the node.
|
|
Sync->Event->DispatchSubsequents();
|
|
ActiveSyncs.Pop();
|
|
|
|
if (bTarget)
|
|
{
|
|
// We found the fence we wanted. Stop polling.
|
|
return;
|
|
}
|
|
}
|
|
|
|
checkf(!Target, TEXT("Attempt to poll for a specific fence, but it was not found in the queue."));
|
|
}
|
|
|
|
void FD3D11GPUFence::WriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList)
|
|
{
|
|
Event = FGraphEvent::CreateGraphEvent();
|
|
|
|
RHICmdList.EnqueueLambda([Event = Event](FRHICommandListBase&) mutable
|
|
{
|
|
FD3D11DynamicRHI& RHI = FD3D11DynamicRHI::Get();
|
|
|
|
// Insert an event query on the device context
|
|
D3D11_QUERY_DESC Desc {};
|
|
Desc.Query = D3D11_QUERY_EVENT;
|
|
|
|
TRefCountPtr<ID3D11Query> Query;
|
|
VERIFYD3D11RESULT(RHI.GetDevice()->CreateQuery(&Desc, Query.GetInitReference()));
|
|
RHI.GetDeviceContext()->End(Query);
|
|
|
|
// Store the query in the list of queries to poll
|
|
ActiveSyncs.Enqueue(FD3D11Sync(MoveTemp(Event), MoveTemp(Query)));
|
|
});
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIWriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIGPUFence* FenceRHI)
|
|
{
|
|
ResourceCast(FenceRHI)->WriteGPUFence_TopOfPipe(RHICmdList);
|
|
}
|
|
|
|
void FD3D11DynamicRHI::RHIWriteGPUFence(FRHIGPUFence* FenceRHI)
|
|
{
|
|
checkNoEntry(); // Should never be called
|
|
}
|
|
|
|
FGPUFenceRHIRef FD3D11DynamicRHI::RHICreateGPUFence(const FName& Name)
|
|
{
|
|
return new FD3D11GPUFence(Name);
|
|
}
|