// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= D3D11Commands.cpp: D3D RHI commands implementation. =============================================================================*/ #include "D3D11RHIPrivate.h" #include "Windows/D3D11RHIPrivateUtil.h" #include "StaticBoundShaderState.h" #include "GlobalShader.h" #include "OneColorShader.h" #include "RHICommandList.h" #include "RHIStaticStates.h" #include "ShaderParameterUtils.h" #include "SceneUtils.h" #include "EngineGlobals.h" #include "RHIShaderParametersShared.h" #include "RHIUniformBufferUtilities.h" // For Depth Bounds Test interface #include "Windows/AllowWindowsPlatformTypes.h" #if WITH_NVAPI #include "nvapi.h" #endif #if WITH_AMD_AGS #include "amd_ags.h" #endif #include "Windows/HideWindowsPlatformTypes.h" #define DECLARE_ISBOUNDSHADER(ShaderType) inline void ValidateBoundShader(FD3D11StateCache& InStateCache, FRHI##ShaderType* ShaderType##RHI) \ { \ ID3D11##ShaderType* CachedShader; \ InStateCache.Get##ShaderType(&CachedShader); \ FD3D11##ShaderType* ShaderType = FD3D11DynamicRHI::ResourceCast(ShaderType##RHI); \ ensureMsgf(CachedShader == ShaderType->Resource, TEXT("Parameters are being set for a %s which is not currently bound"), TEXT( #ShaderType )); \ if (CachedShader) { CachedShader->Release(); } \ } DECLARE_ISBOUNDSHADER(VertexShader) DECLARE_ISBOUNDSHADER(PixelShader) DECLARE_ISBOUNDSHADER(GeometryShader) DECLARE_ISBOUNDSHADER(ComputeShader) #if DO_GUARD_SLOW #define VALIDATE_BOUND_SHADER(s) ValidateBoundShader(StateCache, s) #else #define VALIDATE_BOUND_SHADER(s) #endif static int32 GUnbindResourcesBetweenDrawsInDX11 = UE_BUILD_DEBUG; static FAutoConsoleVariableRef CVarUnbindResourcesBetweenDrawsInDX11( TEXT("r.UnbindResourcesBetweenDrawsInDX11"), GUnbindResourcesBetweenDrawsInDX11, TEXT("Unbind resources between material changes in DX11."), ECVF_Default ); int32 GDX11ReduceRTVRebinds = 1; static FAutoConsoleVariableRef CVarDX11ReduceRTVRebinds( TEXT("r.DX11.ReduceRTVRebinds"), GDX11ReduceRTVRebinds, TEXT("Reduce # of SetRenderTargetCalls."), ECVF_ReadOnly ); #if !UE_BUILD_SHIPPING && !UE_BUILD_TEST int32 GLogDX11RTRebinds = 0; static FAutoConsoleVariableRef CVarLogDx11RTRebinds( TEXT("r.DX11.LogRTRebinds"), GLogDX11RTRebinds, TEXT("Log # of rebinds of RTs per frame"), ECVF_Default ); FThreadSafeCounter GDX11RTRebind; FThreadSafeCounter GDX11CommitGraphicsResourceTables; #endif static TAutoConsoleVariable CVarAllowUAVFlushExt( TEXT("r.D3D11.AutoFlushUAV"), 1, TEXT("If enabled, use NVAPI (Nvidia), AGS (AMD) or Intel Extensions (Intel) to not flush between dispatches/draw calls") TEXT(" 1: on (default)\n") TEXT(" 0: off"), ECVF_RenderThreadSafe); // Vertex state. void FD3D11DynamicRHI::RHISetStreamSource(uint32 StreamIndex, FRHIBuffer* VertexBufferRHI, uint32 Offset) { FD3D11Buffer* VertexBuffer = ResourceCast(VertexBufferRHI); ID3D11Buffer* D3DBuffer = VertexBuffer ? VertexBuffer->Resource.GetReference() : nullptr; TrackResourceBoundAsVB(VertexBuffer, StreamIndex); StateCache.SetStreamSource(D3DBuffer, StreamIndex, Offset); } // Rasterizer state. void FD3D11DynamicRHI::RHISetRasterizerState(FRHIRasterizerState* NewStateRHI) { FD3D11RasterizerState* NewState = ResourceCast(NewStateRHI); StateCache.SetRasterizerState(NewState->Resource); } template void FD3D11DynamicRHI::BindUniformBuffer(uint32 BufferIndex, FRHIUniformBuffer* BufferRHI) { check(BufferRHI && BufferRHI->GetLayout().GetHash()); FD3D11UniformBuffer* Buffer = ResourceCast(BufferRHI); ID3D11Buffer* ConstantBuffer = Buffer ? Buffer->Resource.GetReference() : nullptr; StateCache.SetConstantBuffer(ConstantBuffer, BufferIndex); BoundUniformBuffers[ShaderFrequency][BufferIndex] = BufferRHI; DirtyUniformBuffers[ShaderFrequency] |= (1 << BufferIndex); } template void FD3D11DynamicRHI::ApplyStaticUniformBuffers(TRHIShader* Shader) { if (Shader) { UE::RHICore::ApplyStaticUniformBuffers(Shader, StaticUniformBuffers, [this](int32 BufferIndex, FRHIUniformBuffer* Buffer) { BindUniformBuffer(TRHIShader::StaticFrequency)>(BufferIndex, Buffer); }); } } void FD3D11DynamicRHI::RHISetGraphicsPipelineState(FRHIGraphicsPipelineState* GraphicsState, uint32 StencilRef, bool bApplyAdditionalState) { FRHIGraphicsPipelineStateFallBack* FallbackGraphicsState = static_cast(GraphicsState); IRHICommandContextPSOFallback::RHISetGraphicsPipelineState(GraphicsState, StencilRef, bApplyAdditionalState); const FGraphicsPipelineStateInitializer& PsoInit = FallbackGraphicsState->Initializer; if (bApplyAdditionalState) { ApplyStaticUniformBuffers(static_cast(PsoInit.BoundShaderState.VertexShaderRHI)); ApplyStaticUniformBuffers(static_cast(PsoInit.BoundShaderState.GetGeometryShader())); ApplyStaticUniformBuffers(static_cast(PsoInit.BoundShaderState.PixelShaderRHI)); } // Store the PSO's primitive (after since IRHICommandContext::RHISetGraphicsPipelineState sets the BSS) PrimitiveType = PsoInit.PrimitiveType; } void FD3D11DynamicRHI::RHISetComputeShader(FRHIComputeShader* ComputeShaderRHI) { FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI); SetCurrentComputeShader(ComputeShaderRHI); if (GUnbindResourcesBetweenDrawsInDX11) { ClearAllShaderResourcesForFrequency(); } ApplyStaticUniformBuffers(ComputeShader); } void FD3D11DynamicRHI::RHIDispatchComputeShader(uint32 ThreadGroupCountX, uint32 ThreadGroupCountY, uint32 ThreadGroupCountZ) { RHI_DISPATCH_CALL_INC(); FRHIComputeShader* ComputeShaderRHI = GetCurrentComputeShader(); FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI); StateCache.SetComputeShader(ComputeShader->Resource); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUDispatch(FIntVector(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ)); #endif if (ComputeShader->bShaderNeedsGlobalConstantBuffer) { CommitComputeShaderConstants(); } CommitComputeResourceTables(ComputeShader); Direct3DDeviceIMContext->Dispatch(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); StateCache.SetComputeShader(nullptr); EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIDispatchIndirectComputeShader(FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset) { RHI_DISPATCH_CALL_INC(); FRHIComputeShader* ComputeShaderRHI = GetCurrentComputeShader(); FD3D11ComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI); FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUDispatch(FIntVector(1, 1, 1)); #endif StateCache.SetComputeShader(ComputeShader->Resource); if (ComputeShader->bShaderNeedsGlobalConstantBuffer) { CommitComputeShaderConstants(); } CommitComputeResourceTables(ComputeShader); Direct3DDeviceIMContext->DispatchIndirect(ArgumentBuffer->Resource,ArgumentOffset); StateCache.SetComputeShader(nullptr); EnableUAVOverlap(); } void FD3D11DynamicRHI::RHISetViewport(float MinX, float MinY, float MinZ, float MaxX, float MaxY, float MaxZ) { // These are the maximum viewport extents for D3D11. Exceeding them leads to badness. check(MinX <= (float)D3D11_VIEWPORT_BOUNDS_MAX); check(MinY <= (float)D3D11_VIEWPORT_BOUNDS_MAX); check(MaxX <= (float)D3D11_VIEWPORT_BOUNDS_MAX); check(MaxY <= (float)D3D11_VIEWPORT_BOUNDS_MAX); D3D11_VIEWPORT Viewport = { MinX, MinY, MaxX - MinX, MaxY - MinY, MinZ, MaxZ }; //avoid setting a 0 extent viewport, which the debug runtime doesn't like if (Viewport.Width > 0 && Viewport.Height > 0) { StateCache.SetViewport(Viewport); RHISetScissorRect(true, MinX, MinY, MaxX, MaxY); } } static void ValidateScissorRect(const D3D11_VIEWPORT& Viewport, const D3D11_RECT& ScissorRect) { ensure(ScissorRect.left >= (LONG)Viewport.TopLeftX); ensure(ScissorRect.top >= (LONG)Viewport.TopLeftY); ensure(ScissorRect.right <= (LONG)Viewport.TopLeftX + (LONG)Viewport.Width); ensure(ScissorRect.bottom <= (LONG)Viewport.TopLeftY + (LONG)Viewport.Height); ensure(ScissorRect.left <= ScissorRect.right && ScissorRect.top <= ScissorRect.bottom); } void FD3D11DynamicRHI::RHISetStereoViewport(float LeftMinX, float RightMinX, float LeftMinY, float RightMinY, float MinZ, float LeftMaxX, float RightMaxX, float LeftMaxY, float RightMaxY, float MaxZ) { // Set up both viewports D3D11_VIEWPORT StereoViewports[2] = {}; StereoViewports[0].TopLeftX = FMath::FloorToInt(LeftMinX); StereoViewports[0].TopLeftY = FMath::FloorToInt(LeftMinY); StereoViewports[0].Width = FMath::CeilToInt(LeftMaxX - LeftMinX); StereoViewports[0].Height = FMath::CeilToInt(LeftMaxY - LeftMinY); StereoViewports[0].MinDepth = MinZ; StereoViewports[0].MaxDepth = MaxZ; StereoViewports[1].TopLeftX = FMath::FloorToInt(RightMinX); StereoViewports[1].TopLeftY = FMath::FloorToInt(RightMinY); StereoViewports[1].Width = FMath::CeilToInt(RightMaxX - RightMinX); StereoViewports[1].Height = FMath::CeilToInt(RightMaxY - RightMinY); StereoViewports[1].MinDepth = MinZ; StereoViewports[1].MaxDepth = MaxZ; D3D11_RECT ScissorRects[2] = { { StereoViewports[0].TopLeftX, StereoViewports[0].TopLeftY, StereoViewports[0].TopLeftX + StereoViewports[0].Width, StereoViewports[0].TopLeftY + StereoViewports[0].Height }, { StereoViewports[1].TopLeftX, StereoViewports[1].TopLeftY, StereoViewports[1].TopLeftX + StereoViewports[1].Width, StereoViewports[1].TopLeftY + StereoViewports[1].Height } }; ValidateScissorRect(StereoViewports[0], ScissorRects[0]); ValidateScissorRect(StereoViewports[1], ScissorRects[1]); StateCache.SetViewports(2, StereoViewports); // Set the scissor rect appropriately. Direct3DDeviceIMContext->RSSetScissorRects(2, ScissorRects); } void FD3D11DynamicRHI::RHISetScissorRect(bool bEnable,uint32 MinX,uint32 MinY,uint32 MaxX,uint32 MaxY) { D3D11_VIEWPORT Viewport; StateCache.GetViewport(&Viewport); D3D11_RECT ScissorRect; if (bEnable) { ScissorRect.left = MinX; ScissorRect.top = MinY; ScissorRect.right = MaxX; ScissorRect.bottom = MaxY; } else { ScissorRect.left = (LONG) Viewport.TopLeftX; ScissorRect.top = (LONG) Viewport.TopLeftY; ScissorRect.right = (LONG) Viewport.TopLeftX + (LONG) Viewport.Width; ScissorRect.bottom = (LONG) Viewport.TopLeftY + (LONG) Viewport.Height; } ValidateScissorRect(Viewport, ScissorRect); Direct3DDeviceIMContext->RSSetScissorRects(1, &ScissorRect); } /** * Set bound shader state. This will set the vertex decl/shader, and pixel shader * @param BoundShaderState - state resource */ void FD3D11DynamicRHI::RHISetBoundShaderState(FRHIBoundShaderState* BoundShaderStateRHI) { FD3D11BoundShaderState* BoundShaderState = ResourceCast(BoundShaderStateRHI); StateCache.SetStreamStrides(BoundShaderState->StreamStrides); StateCache.SetInputLayout(BoundShaderState->InputLayout); StateCache.SetVertexShader(BoundShaderState->VertexShader); StateCache.SetPixelShader(BoundShaderState->PixelShader); StateCache.SetGeometryShader(BoundShaderState->GeometryShader); // @TODO : really should only discard the constants if the shader state has actually changed. bDiscardSharedConstants = true; // Prevent transient bound shader states from being recreated for each use by keeping a history of the most recently used bound shader states. // The history keeps them alive, and the bound shader state cache allows them to am be reused if needed. BoundShaderStateHistory.Add(BoundShaderState); // Shader changed so all resource tables are dirty DirtyUniformBuffers[SF_Vertex] = 0xffff; DirtyUniformBuffers[SF_Pixel] = 0xffff; DirtyUniformBuffers[SF_Geometry] = 0xffff; // Shader changed. All UB's must be reset by high level code to match other platforms anway. // Clear to catch those bugs, and bugs with stale UB's causing layout mismatches. // Release references to bound uniform buffers. for (int32 Frequency = 0; Frequency < SF_NumStandardFrequencies; ++Frequency) { for (int32 BindIndex = 0; BindIndex < MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE; ++BindIndex) { BoundUniformBuffers[Frequency][BindIndex] = nullptr; } } if (GUnbindResourcesBetweenDrawsInDX11 || GRHIGlobals.IsDebugLayerEnabled) { ClearAllShaderResources(); } } void FD3D11DynamicRHI::RHISetStaticUniformBuffers(const FUniformBufferStaticBindings& InUniformBuffers) { FMemory::Memzero(StaticUniformBuffers.GetData(), StaticUniformBuffers.Num() * sizeof(FRHIUniformBuffer*)); for (int32 Index = 0; Index < InUniformBuffers.GetUniformBufferCount(); ++Index) { StaticUniformBuffers[InUniformBuffers.GetSlot(Index)] = InUniformBuffers.GetUniformBuffer(Index); } } void FD3D11DynamicRHI::RHISetStaticUniformBuffer(FUniformBufferStaticSlot InSlot, FRHIUniformBuffer* InBuffer) { StaticUniformBuffers[InSlot] = InBuffer; } template struct FD3D11ResourceBinder { FD3D11DynamicRHI& RHI; FD3D11ResourceBinder(FD3D11DynamicRHI& InRHI) : RHI(InRHI) { } void SetUAV(FRHIUnorderedAccessView* InUnorderedAccessView, uint8 Index) { if (ShaderFrequency == SF_Compute) { RHI.InternalSetUAVCS(Index, FD3D11DynamicRHI::ResourceCast(InUnorderedAccessView)); } else if (ShaderFrequency == SF_Pixel || ShaderFrequency == SF_Vertex) { RHI.InternalSetUAVVSPS(Index, FD3D11DynamicRHI::ResourceCast(InUnorderedAccessView)); } else { checkf(false, TEXT("UAVs are only supported in compute, pixel and vertex shaders.")); } } void SetSRV(FRHIShaderResourceView* InShaderResourceView, uint8 Index) { FD3D11ShaderResourceView* D3D11ShaderResourceView = FD3D11DynamicRHI::ResourceCast(InShaderResourceView); FD3D11ViewableResource* D3D11ViewableResource = D3D11ShaderResourceView ? D3D11ShaderResourceView->GetBaseResource() : nullptr; ID3D11ShaderResourceView* D3D11SRV = D3D11ShaderResourceView ? D3D11ShaderResourceView->View : nullptr; RHI.SetShaderResourceView( D3D11ViewableResource, D3D11SRV, Index ); } void SetTexture(FRHITexture* InTexture, uint8 Index) { FD3D11Texture* D3D11Texture = FD3D11DynamicRHI::ResourceCast(InTexture); ID3D11ShaderResourceView* ShaderResourceView = D3D11Texture ? D3D11Texture->GetShaderResourceView() : nullptr; RHI.SetShaderResourceView( D3D11Texture, ShaderResourceView, Index ); } void SetSampler(FRHISamplerState* Sampler, uint8 Index) { RHI.GetStateCache().SetSamplerState(FD3D11DynamicRHI::ResourceCast(Sampler)->Resource, Index); } #if PLATFORM_SUPPORTS_BINDLESS_RENDERING void SetResourceCollection(FRHIResourceCollection* ResourceCollection, uint32 Index) { checkNoEntry(); } #endif }; template void FD3D11DynamicRHI::SetShaderParametersCommon(FD3D11ConstantBuffer* StageConstantBuffer, TConstArrayView InParametersData, TConstArrayView InParameters, TConstArrayView InResourceParameters) { if (InParameters.Num()) { for (const FRHIShaderParameter& Parameter : InParameters) { check(Parameter.BufferIndex == 0); StageConstantBuffer->UpdateConstant(&InParametersData[Parameter.ByteOffset], Parameter.BaseIndex, Parameter.ByteSize); } } FD3D11ResourceBinder Binder(*this); for (const FRHIShaderParameterResource& Parameter : InResourceParameters) { if (Parameter.Type == FRHIShaderParameterResource::EType::UnorderedAccessView) { Binder.SetUAV(static_cast(Parameter.Resource), Parameter.Index); } } for (const FRHIShaderParameterResource& Parameter : InResourceParameters) { switch (Parameter.Type) { case FRHIShaderParameterResource::EType::Texture: Binder.SetTexture(static_cast(Parameter.Resource), Parameter.Index); break; case FRHIShaderParameterResource::EType::ResourceView: Binder.SetSRV(static_cast(Parameter.Resource), Parameter.Index); break; case FRHIShaderParameterResource::EType::UnorderedAccessView: break; case FRHIShaderParameterResource::EType::Sampler: Binder.SetSampler(static_cast(Parameter.Resource), Parameter.Index); break; case FRHIShaderParameterResource::EType::UniformBuffer: BindUniformBuffer(Parameter.Index, static_cast(Parameter.Resource)); break; default: checkf(false, TEXT("Unhandled resource type?")); break; } } } void FD3D11DynamicRHI::RHISetShaderParameters(FRHIComputeShader* Shader, TConstArrayView InParametersData, TConstArrayView InParameters, TConstArrayView InResourceParameters, TConstArrayView InBindlessParameters) { SetShaderParametersCommon(CSConstantBuffer, InParametersData, InParameters, InResourceParameters); } void FD3D11DynamicRHI::RHISetShaderParameters(FRHIGraphicsShader* Shader, TConstArrayView InParametersData, TConstArrayView InParameters, TConstArrayView InResourceParameters, TConstArrayView InBindlessParameters) { switch (Shader->GetFrequency()) { case SF_Vertex: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderParametersCommon(VSConstantBuffer, InParametersData, InParameters, InResourceParameters); break; case SF_Geometry: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderParametersCommon(GSConstantBuffer, InParametersData, InParameters, InResourceParameters); break; case SF_Pixel: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderParametersCommon(PSConstantBuffer, InParametersData, InParameters, InResourceParameters); break; default: checkf(0, TEXT("Undefined FRHIGraphicsShader Type %d!"), (int32)Shader->GetFrequency()); } } template void FD3D11DynamicRHI::SetShaderUnbindsCommon(TConstArrayView InUnbinds) { FD3D11ResourceBinder Binder(*this); for (const FRHIShaderParameterUnbind& Unbind : InUnbinds) { switch (Unbind.Type) { case FRHIShaderParameterUnbind::EType::ResourceView: Binder.SetSRV(nullptr, Unbind.Index); break; case FRHIShaderParameterUnbind::EType::UnorderedAccessView: Binder.SetUAV(nullptr, Unbind.Index); break; default: checkf(false, TEXT("Unhandled unbind resource type?")); break; } } } void FD3D11DynamicRHI::RHISetShaderUnbinds(FRHIComputeShader* Shader, TConstArrayView InUnbinds) { SetShaderUnbindsCommon(InUnbinds); } void FD3D11DynamicRHI::RHISetShaderUnbinds(FRHIGraphicsShader* Shader, TConstArrayView InUnbinds) { switch (Shader->GetFrequency()) { case SF_Vertex: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderUnbindsCommon(InUnbinds); break; case SF_Geometry: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderUnbindsCommon(InUnbinds); break; case SF_Pixel: VALIDATE_BOUND_SHADER(static_cast(Shader)); SetShaderUnbindsCommon(InUnbinds); break; default: checkf(0, TEXT("Undefined FRHIGraphicsShader Type %d!"), (int32)Shader->GetFrequency()); } } void FD3D11DynamicRHI::ValidateExclusiveDepthStencilAccess(FExclusiveDepthStencil RequestedAccess) const { const bool bSrcDepthWrite = RequestedAccess.IsDepthWrite(); const bool bSrcStencilWrite = RequestedAccess.IsStencilWrite(); if (bSrcDepthWrite || bSrcStencilWrite) { // New Rule: You have to call SetRenderTarget[s]() before ensure(CurrentDepthTexture); const bool bDstDepthWrite = CurrentDSVAccessType.IsDepthWrite(); const bool bDstStencilWrite = CurrentDSVAccessType.IsStencilWrite(); // requested access is not possible, fix SetRenderTarget EExclusiveDepthStencil or request a different one ensureMsgf( !bSrcDepthWrite || bDstDepthWrite, TEXT("Expected: SrcDepthWrite := false or DstDepthWrite := true. Actual: SrcDepthWrite := %s or DstDepthWrite := %s"), (bSrcDepthWrite) ? TEXT("true") : TEXT("false"), (bDstDepthWrite) ? TEXT("true") : TEXT("false") ); ensureMsgf( !bSrcStencilWrite || bDstStencilWrite, TEXT("Expected: SrcStencilWrite := false or DstStencilWrite := true. Actual: SrcStencilWrite := %s or DstStencilWrite := %s"), (bSrcStencilWrite) ? TEXT("true") : TEXT("false"), (bDstStencilWrite) ? TEXT("true") : TEXT("false") ); } } void FD3D11DynamicRHI::RHISetDepthStencilState(FRHIDepthStencilState* NewStateRHI,uint32 StencilRef) { FD3D11DepthStencilState* NewState = ResourceCast(NewStateRHI); ValidateExclusiveDepthStencilAccess(NewState->AccessType); StateCache.SetDepthStencilState(NewState->Resource, StencilRef); } void FD3D11DynamicRHI::RHISetStencilRef(uint32 StencilRef) { StateCache.SetStencilRef(StencilRef); } void FD3D11DynamicRHI::RHISetBlendState(FRHIBlendState* NewStateRHI,const FLinearColor& BlendFactor) { FD3D11BlendState* NewState = ResourceCast(NewStateRHI); StateCache.SetBlendState(NewState->Resource, (const float*)&BlendFactor, 0xffffffff); } void FD3D11DynamicRHI::RHISetBlendFactor(const FLinearColor& BlendFactor) { StateCache.SetBlendFactor((const float*)&BlendFactor, 0xffffffff); } void FD3D11DynamicRHI::CommitRenderTargetsAndUAVs() { CommitRenderTargets(false); FMemory::Memset(UAVBound, 0); //force to be rebound if any is set UAVSChanged = 1; CommitUAVs(); } void FD3D11DynamicRHI::CommitRenderTargets(bool bClearUAVs) { SCOPE_CYCLE_COUNTER(STAT_D3D11RenderTargetCommits); #if !UE_BUILD_SHIPPING && !UE_BUILD_TEST GDX11RTRebind.Increment(); #endif ID3D11RenderTargetView* RTArray[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; for (uint32 RenderTargetIndex = 0; RenderTargetIndex < NumSimultaneousRenderTargets; ++RenderTargetIndex) { RTArray[RenderTargetIndex] = CurrentRenderTargets[RenderTargetIndex]; } Direct3DDeviceIMContext->OMSetRenderTargets( NumSimultaneousRenderTargets, RTArray, CurrentDepthStencilTarget ); if(bClearUAVs) { for(uint32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i) { CurrentUAVs[i] = nullptr; UAVBound[i] = nullptr; } UAVBindFirst = 0; UAVBindCount = 0; UAVSChanged = 0; } } void FD3D11DynamicRHI::InternalSetUAVCS(uint32 BindIndex, FD3D11UnorderedAccessView* UnorderedAccessViewRHI) { if (UnorderedAccessViewRHI) { ConditionalClearShaderResource(UnorderedAccessViewRHI->GetBaseResource(), true); } ID3D11UnorderedAccessView* D3D11UAV = UnorderedAccessViewRHI ? UnorderedAccessViewRHI->View : nullptr; uint32 InitialCount = -1; Direct3DDeviceIMContext->CSSetUnorderedAccessViews(BindIndex, 1, &D3D11UAV, &InitialCount); } void FD3D11DynamicRHI::InternalSetUAVVSPS(uint32 BindIndex, FD3D11UnorderedAccessView* UnorderedAccessViewRHI) { check(BindIndex < D3D11_PS_CS_UAV_REGISTER_COUNT); if (CurrentUAVs[BindIndex] != UnorderedAccessViewRHI) { CurrentUAVs[BindIndex] = UnorderedAccessViewRHI; UAVSChanged = 1; } if (UnorderedAccessViewRHI) { ConditionalClearShaderResource(UnorderedAccessViewRHI->GetBaseResource(), true); for (uint32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; i++) { if (i != BindIndex && CurrentUAVs[i] == UnorderedAccessViewRHI) { CurrentUAVs[i] = nullptr; } } } } void FD3D11DynamicRHI::CommitUAVs() { if (!UAVSChanged) { return; } int32 First = -1; int32 Count = 0; for (int32 i = 0; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i) { if (CurrentUAVs[i] != nullptr) { First = i; break; } } if (First != -1) { FD3D11UnorderedAccessView* RHIUAVs[D3D11_PS_CS_UAV_REGISTER_COUNT]; ID3D11UnorderedAccessView* UAVs[D3D11_PS_CS_UAV_REGISTER_COUNT]; FMemory::Memset(UAVs, 0); for (int32 i = First; i < D3D11_PS_CS_UAV_REGISTER_COUNT; ++i) { if (CurrentUAVs[i] == nullptr) break; RHIUAVs[i] = CurrentUAVs[i].GetReference(); UAVs[i] = RHIUAVs[i]->View; Count++; } if (First != UAVBindFirst || Count != UAVBindCount || 0 != FMemory::Memcmp(&UAVs[First], &UAVBound[First], sizeof(UAVs[0]) * Count)) { SCOPE_CYCLE_COUNTER(STAT_D3D11RenderTargetCommitsUAV); for (int32 i = First; i < First + Count; ++i) { if (UAVs[i] != UAVBound[i]) { FD3D11UnorderedAccessView* RHIUAV = RHIUAVs[i]; ID3D11UnorderedAccessView* UAV = UAVs[i]; // Unbind any shader views of the UAV's resource. ConditionalClearShaderResource(RHIUAV->GetBaseResource(), true); UAVBound[i] = UAV; } } static const uint32 UAVInitialCountArray[D3D11_PS_CS_UAV_REGISTER_COUNT] = { ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u }; Direct3DDeviceIMContext->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, 0, 0, First, Count, &UAVs[First], &UAVInitialCountArray[0]); } } else { if (First != UAVBindFirst) { Direct3DDeviceIMContext->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, 0, 0, 0, 0, nullptr, nullptr); } } UAVBindFirst = First; UAVBindCount = Count; UAVSChanged = 0; } struct FRTVDesc { uint32 Width; uint32 Height; DXGI_SAMPLE_DESC SampleDesc; }; // Return an FRTVDesc structure whose // Width and height dimensions are adjusted for the RTV's miplevel. FRTVDesc GetRenderTargetViewDesc(ID3D11RenderTargetView* RenderTargetView) { D3D11_RENDER_TARGET_VIEW_DESC TargetDesc; RenderTargetView->GetDesc(&TargetDesc); TRefCountPtr BaseResource; RenderTargetView->GetResource((ID3D11Resource**)BaseResource.GetInitReference()); uint32 MipIndex = 0; FRTVDesc ret; memset(&ret, 0, sizeof(ret)); switch (TargetDesc.ViewDimension) { case D3D11_RTV_DIMENSION_TEXTURE2D: case D3D11_RTV_DIMENSION_TEXTURE2DMS: case D3D11_RTV_DIMENSION_TEXTURE2DARRAY: case D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY: { D3D11_TEXTURE2D_DESC Desc; ((ID3D11Texture2D*)(BaseResource.GetReference()))->GetDesc(&Desc); ret.Width = Desc.Width; ret.Height = Desc.Height; ret.SampleDesc = Desc.SampleDesc; if (TargetDesc.ViewDimension == D3D11_RTV_DIMENSION_TEXTURE2D || TargetDesc.ViewDimension == D3D11_RTV_DIMENSION_TEXTURE2DARRAY) { // All the non-multisampled texture types have their mip-slice in the same position. MipIndex = TargetDesc.Texture2D.MipSlice; } break; } case D3D11_RTV_DIMENSION_TEXTURE3D: { D3D11_TEXTURE3D_DESC Desc; ((ID3D11Texture3D*)(BaseResource.GetReference()))->GetDesc(&Desc); ret.Width = Desc.Width; ret.Height = Desc.Height; ret.SampleDesc.Count = 1; ret.SampleDesc.Quality = 0; MipIndex = TargetDesc.Texture3D.MipSlice; break; } default: { // not expecting 1D targets. checkNoEntry(); } } ret.Width >>= MipIndex; ret.Height >>= MipIndex; return ret; } void FD3D11DynamicRHI::SetRenderTargets( uint32 NewNumSimultaneousRenderTargets, const FRHIRenderTargetView* NewRenderTargetsRHI, const FRHIDepthRenderTargetView* NewDepthStencilTargetRHI) { FD3D11Texture* NewDepthStencilTarget = ResourceCast(NewDepthStencilTargetRHI ? NewDepthStencilTargetRHI->Texture : nullptr); check(NewNumSimultaneousRenderTargets <= MaxSimultaneousRenderTargets); bool bTargetChanged = false; // Set the appropriate depth stencil view depending on whether depth writes are enabled or not ID3D11DepthStencilView* DepthStencilView = NULL; if(NewDepthStencilTarget) { check(NewDepthStencilTargetRHI); CurrentDSVAccessType = NewDepthStencilTargetRHI->GetDepthStencilAccess(); DepthStencilView = NewDepthStencilTarget->GetDepthStencilView(CurrentDSVAccessType); // Unbind any shader views of the depth stencil target that are bound. ConditionalClearShaderResource(NewDepthStencilTarget, false); } // Check if the depth stencil target is different from the old state. if(CurrentDepthStencilTarget != DepthStencilView) { CurrentDepthTexture = NewDepthStencilTarget; CurrentDepthStencilTarget = DepthStencilView; bTargetChanged = true; } // Gather the render target views for the new render targets. ID3D11RenderTargetView* NewRenderTargetViews[MaxSimultaneousRenderTargets]; for(uint32 RenderTargetIndex = 0;RenderTargetIndex < MaxSimultaneousRenderTargets;++RenderTargetIndex) { ID3D11RenderTargetView* RenderTargetView = NULL; if(RenderTargetIndex < NewNumSimultaneousRenderTargets && NewRenderTargetsRHI[RenderTargetIndex].Texture != nullptr) { int32 RTMipIndex = NewRenderTargetsRHI[RenderTargetIndex].MipIndex; int32 RTSliceIndex = NewRenderTargetsRHI[RenderTargetIndex].ArraySliceIndex; FD3D11Texture* NewRenderTarget = ResourceCast(NewRenderTargetsRHI[RenderTargetIndex].Texture); RenderTargetView = NewRenderTarget ? NewRenderTarget->GetRenderTargetView(RTMipIndex, RTSliceIndex) : nullptr; ensureMsgf(RenderTargetView, TEXT("Texture being set as render target has no RTV")); // Unbind any shader views of the render target that are bound. ConditionalClearShaderResource(NewRenderTarget, false); #if UE_BUILD_DEBUG // A check to allow you to pinpoint what is using mismatching targets // We filter our d3ddebug spew that checks for this as the d3d runtime's check is wrong. // For filter code, see D3D11Device.cpp look for "OMSETRENDERTARGETS_INVALIDVIEW" if(RenderTargetView && DepthStencilView) { FRTVDesc RTTDesc = GetRenderTargetViewDesc(RenderTargetView); TRefCountPtr DepthTargetTexture; DepthStencilView->GetResource((ID3D11Resource**)DepthTargetTexture.GetInitReference()); D3D11_TEXTURE2D_DESC DTTDesc; DepthTargetTexture->GetDesc(&DTTDesc); // enforce color target is <= depth and MSAA settings match if(RTTDesc.Width > DTTDesc.Width || RTTDesc.Height > DTTDesc.Height || RTTDesc.SampleDesc.Count != DTTDesc.SampleDesc.Count || RTTDesc.SampleDesc.Quality != DTTDesc.SampleDesc.Quality) { UE_LOG(LogD3D11RHI, Fatal,TEXT("RTV(%i,%i c=%i,q=%i) and DSV(%i,%i c=%i,q=%i) have mismatching dimensions and/or MSAA levels!"), RTTDesc.Width,RTTDesc.Height,RTTDesc.SampleDesc.Count,RTTDesc.SampleDesc.Quality, DTTDesc.Width,DTTDesc.Height,DTTDesc.SampleDesc.Count,DTTDesc.SampleDesc.Quality); } } #endif } NewRenderTargetViews[RenderTargetIndex] = RenderTargetView; // Check if the render target is different from the old state. if(CurrentRenderTargets[RenderTargetIndex] != RenderTargetView) { CurrentRenderTargets[RenderTargetIndex] = RenderTargetView; bTargetChanged = true; } } if(NumSimultaneousRenderTargets != NewNumSimultaneousRenderTargets) { NumSimultaneousRenderTargets = NewNumSimultaneousRenderTargets; uint32 Bit = 1; uint32 Mask = 0; for (uint32 Index = 0; Index < NumSimultaneousRenderTargets; ++Index) { Mask |= Bit; Bit <<= 1; } CurrentRTVOverlapMask = Mask; bTargetChanged = true; } // Only make the D3D call to change render targets if something actually changed. if(bTargetChanged) { CommitRenderTargets(true); CurrentUAVMask = 0; } // Set the viewport to the full size of render target 0. if (NewRenderTargetViews[0]) { // check target 0 is valid check(0 < NewNumSimultaneousRenderTargets && NewRenderTargetsRHI[0].Texture != nullptr); FRTVDesc RTTDesc = GetRenderTargetViewDesc(NewRenderTargetViews[0]); RHISetViewport(0.0f, 0.0f, 0.0f, (float)RTTDesc.Width, (float)RTTDesc.Height, 1.0f); } else if( DepthStencilView ) { TRefCountPtr DepthTargetTexture; DepthStencilView->GetResource((ID3D11Resource**)DepthTargetTexture.GetInitReference()); D3D11_TEXTURE2D_DESC DTTDesc; DepthTargetTexture->GetDesc(&DTTDesc); RHISetViewport(0.0f, 0.0f, 0.0f, (float)DTTDesc.Width, (float)DTTDesc.Height, 1.0f); } } void FD3D11DynamicRHI::SetRenderTargetsAndClear(const FRHISetRenderTargetsInfo& RenderTargetsInfo) { this->SetRenderTargets(RenderTargetsInfo.NumColorRenderTargets, RenderTargetsInfo.ColorRenderTarget, &RenderTargetsInfo.DepthStencilRenderTarget); if (RenderTargetsInfo.bClearColor || RenderTargetsInfo.bClearStencil || RenderTargetsInfo.bClearDepth) { FLinearColor ClearColors[MaxSimultaneousRenderTargets]; bool bClearColorArray[MaxSimultaneousRenderTargets]; float DepthClear = 0.0; uint32 StencilClear = 0; if (RenderTargetsInfo.bClearColor) { for (int32 i = 0; i < RenderTargetsInfo.NumColorRenderTargets; ++i) { bClearColorArray[i] = RenderTargetsInfo.ColorRenderTarget[i].LoadAction == ERenderTargetLoadAction::EClear; if (bClearColorArray[i] && RenderTargetsInfo.ColorRenderTarget[i].Texture != nullptr) { const FClearValueBinding& ClearValue = RenderTargetsInfo.ColorRenderTarget[i].Texture->GetClearBinding(); checkf(ClearValue.ColorBinding == EClearBinding::EColorBound, TEXT("Texture: %s does not have a color bound for fast clears"), *RenderTargetsInfo.ColorRenderTarget[i].Texture->GetName().GetPlainNameString()); ClearColors[i] = ClearValue.GetClearColor(); } } } if (RenderTargetsInfo.bClearDepth || RenderTargetsInfo.bClearStencil) { const FClearValueBinding& ClearValue = RenderTargetsInfo.DepthStencilRenderTarget.Texture->GetClearBinding(); checkf(ClearValue.ColorBinding == EClearBinding::EDepthStencilBound, TEXT("Texture: %s does not have a DS value bound for fast clears"), *RenderTargetsInfo.DepthStencilRenderTarget.Texture->GetName().GetPlainNameString()); ClearValue.GetDepthStencil(DepthClear, StencilClear); } this->RHIClearMRTImpl(RenderTargetsInfo.bClearColor ? bClearColorArray : nullptr, RenderTargetsInfo.NumColorRenderTargets, ClearColors, RenderTargetsInfo.bClearDepth, DepthClear, RenderTargetsInfo.bClearStencil, StencilClear); } } // Primitive drawing. static D3D11_PRIMITIVE_TOPOLOGY GetD3D11PrimitiveType(EPrimitiveType PrimitiveType) { switch(PrimitiveType) { case PT_TriangleList: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; case PT_TriangleStrip: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; case PT_LineList: return D3D11_PRIMITIVE_TOPOLOGY_LINELIST; case PT_PointList: return D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; default: UE_LOG(LogD3D11RHI, Fatal,TEXT("Unknown primitive type: %u"),PrimitiveType); }; return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; } namespace FD3DRHIUtil { template inline void CommitConstants(FD3D11ConstantBuffer* InConstantBuffer, FD3D11StateCache& StateCache, bool bDiscardSharedConstants) { FWinD3D11ConstantBuffer* ConstantBuffer = static_cast(InConstantBuffer); // Array may contain NULL entries to pad out to proper if (ConstantBuffer && ConstantBuffer->CommitConstantsToDevice(bDiscardSharedConstants)) { ID3D11Buffer* DeviceBuffer = ConstantBuffer->GetConstantBuffer(); StateCache.SetConstantBuffer(DeviceBuffer, GLOBAL_CONSTANT_BUFFER_INDEX); } } }; void FD3D11DynamicRHI::CommitNonComputeShaderConstants() { FD3D11BoundShaderState* CurrentBoundShaderState = (FD3D11BoundShaderState*)BoundShaderStateHistory.GetLast(); check(CurrentBoundShaderState); // Only set the constant buffer if this shader needs the global constant buffer bound // Otherwise we will overwrite a different constant buffer if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Vertex]) { // Commit and bind vertex shader constants FD3DRHIUtil::CommitConstants(VSConstantBuffer, StateCache, bDiscardSharedConstants); } if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Geometry]) { // Commit and bind geometry shader constants FD3DRHIUtil::CommitConstants(GSConstantBuffer, StateCache, bDiscardSharedConstants); } if (CurrentBoundShaderState->bShaderNeedsGlobalConstantBuffer[SF_Pixel]) { // Commit and bind pixel shader constants FD3DRHIUtil::CommitConstants(PSConstantBuffer, StateCache, bDiscardSharedConstants); } bDiscardSharedConstants = false; } void FD3D11DynamicRHI::CommitComputeShaderConstants() { // Commit and bind compute shader constants FD3DRHIUtil::CommitConstants(CSConstantBuffer, StateCache, bDiscardSharedConstants); } template void FD3D11DynamicRHI::SetResourcesFromTables(const ShaderType* RESTRICT Shader) { checkSlow(Shader); static constexpr EShaderFrequency Frequency = static_cast(ShaderType::StaticFrequency); UE::RHI::Private::SetUniformBufferResourcesFromTables( FD3D11ResourceBinder { *this } , *Shader , DirtyUniformBuffers[Frequency] , BoundUniformBuffers[Frequency] #if ENABLE_RHI_VALIDATION , Tracker #endif ); } void FD3D11DynamicRHI::CommitGraphicsResourceTables() { #if !UE_BUILD_SHIPPING && !UE_BUILD_TEST GDX11CommitGraphicsResourceTables.Increment(); #endif FD3D11BoundShaderState* RESTRICT CurrentBoundShaderState = (FD3D11BoundShaderState*)BoundShaderStateHistory.GetLast(); check(CurrentBoundShaderState); bool bRTVInvalidate = false; uint32 UAVMask = 0; if (auto* Shader = CurrentBoundShaderState->GetPixelShader()) { UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask; SetResourcesFromTables(Shader); } if (auto* Shader = CurrentBoundShaderState->GetVertexShader()) { UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask; SetResourcesFromTables(Shader); } if (auto* Shader = CurrentBoundShaderState->GetGeometryShader()) { UAVMask |= Shader->UAVMask & CurrentRTVOverlapMask; SetResourcesFromTables(Shader); } // Because d3d11 binding uses the same slots for UAVs and RTVs, we have to rebind when two shaders with different sets of rendertargets are bound, // as they can potentially be used by UAVs, which can cause them to unbind RTVs used by subsequent shaders. if (GDX11ReduceRTVRebinds && (0 != ((~CurrentUAVMask) & UAVMask) && CurrentUAVMask == (CurrentUAVMask & UAVMask))) { //if the mask only -adds- uav binds, no RTs will be missing so we just grow the mask CurrentUAVMask = UAVMask; } else if (CurrentUAVMask != UAVMask) { bRTVInvalidate = true; CurrentUAVMask = UAVMask; } if (bRTVInvalidate) { CommitRenderTargets(true); DirtyUniformBuffers[SF_Pixel] = -1; DirtyUniformBuffers[SF_Vertex] = -1; DirtyUniformBuffers[SF_Geometry] = -1; } if (UAVSChanged) { CommitUAVs(); } } void FD3D11DynamicRHI::CommitComputeResourceTables(FD3D11ComputeShader* InComputeShader) { FD3D11ComputeShader* RESTRICT ComputeShader = InComputeShader; check(ComputeShader); SetResourcesFromTables(ComputeShader); } void FD3D11DynamicRHI::RHIDrawPrimitive(uint32 BaseVertexIndex,uint32 NumPrimitives,uint32 NumInstances) { CommitGraphicsResourceTables(); CommitNonComputeShaderConstants(); uint32 VertexCount = GetVertexCountForPrimitiveCount(NumPrimitives, PrimitiveType); RHI_DRAW_CALL_STATS(PrimitiveType, VertexCount, NumPrimitives, NumInstances); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(NumPrimitives * NumInstances, VertexCount * NumInstances); #endif StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType)); if(NumInstances > 1) { Direct3DDeviceIMContext->DrawInstanced(VertexCount,NumInstances,BaseVertexIndex,0); } else { Direct3DDeviceIMContext->Draw(VertexCount,BaseVertexIndex); } EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIDrawPrimitiveIndirect(FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset) { FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI); RHI_DRAW_CALL_INC(); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(0); #endif CommitGraphicsResourceTables(); CommitNonComputeShaderConstants(); StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType)); Direct3DDeviceIMContext->DrawInstancedIndirect(ArgumentBuffer->Resource,ArgumentOffset); EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIDrawIndexedIndirect(FRHIBuffer* IndexBufferRHI, FRHIBuffer* ArgumentsBufferRHI, int32 DrawArgumentsIndex, uint32 NumInstances) { FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI); FD3D11Buffer* ArgumentsBuffer = ResourceCast(ArgumentsBufferRHI); RHI_DRAW_CALL_INC(); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(1); #endif CommitGraphicsResourceTables(); CommitNonComputeShaderConstants(); // determine 16bit vs 32bit indices const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT); TrackResourceBoundAsIB(IndexBuffer); StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0); StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType)); Direct3DDeviceIMContext->DrawIndexedInstancedIndirect(ArgumentsBuffer->Resource, DrawArgumentsIndex * 5 * sizeof(uint32)); EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIDrawIndexedPrimitive(FRHIBuffer* IndexBufferRHI, int32 BaseVertexIndex, uint32 FirstInstance, uint32 NumVertices, uint32 StartIndex, uint32 NumPrimitives, uint32 NumInstances) { RHI_DRAW_CALL_STATS(PrimitiveType, NumVertices, NumPrimitives, NumInstances); FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI); // called should make sure the input is valid, this avoid hidden bugs ensure(NumPrimitives > 0); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(NumPrimitives * NumInstances, NumVertices * NumInstances); #endif CommitGraphicsResourceTables(); CommitNonComputeShaderConstants(); // determine 16bit vs 32bit indices const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT); uint32 IndexCount = GetVertexCountForPrimitiveCount(NumPrimitives,PrimitiveType); // Verify that we are not trying to read outside the index buffer range // test is an optimized version of: StartIndex + IndexCount <= IndexBuffer->GetSize() / IndexBuffer->GetStride() checkf((StartIndex + IndexCount) * IndexBuffer->GetStride() <= IndexBuffer->GetSize(), TEXT("Start %u, Count %u, Type %u, Buffer Size %u, Buffer stride %u"), StartIndex, IndexCount, PrimitiveType, IndexBuffer->GetSize(), IndexBuffer->GetStride()); TrackResourceBoundAsIB(IndexBuffer); StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0); StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType)); if (NumInstances > 1 || FirstInstance != 0) { const uint64 TotalIndexCount = (uint64)NumInstances * (uint64)IndexCount + (uint64)StartIndex; checkf(TotalIndexCount <= (uint64)0xFFFFFFFF, TEXT("Instanced Index Draw exceeds maximum d3d11 limit: Total: %llu, NumInstances: %llu, IndexCount: %llu, StartIndex: %llu, FirstInstance: %llu"), TotalIndexCount, NumInstances, IndexCount, StartIndex, FirstInstance); Direct3DDeviceIMContext->DrawIndexedInstanced(IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance); } else { Direct3DDeviceIMContext->DrawIndexed(IndexCount,StartIndex,BaseVertexIndex); } EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIDrawIndexedPrimitiveIndirect(FRHIBuffer* IndexBufferRHI, FRHIBuffer* ArgumentBufferRHI, uint32 ArgumentOffset) { FD3D11Buffer* IndexBuffer = ResourceCast(IndexBufferRHI); FD3D11Buffer* ArgumentBuffer = ResourceCast(ArgumentBufferRHI); RHI_DRAW_CALL_INC(); #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(0); #endif CommitGraphicsResourceTables(); CommitNonComputeShaderConstants(); // Set the index buffer. const uint32 SizeFormat = sizeof(DXGI_FORMAT); const DXGI_FORMAT Format = (IndexBuffer->GetStride() == sizeof(uint16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT); TrackResourceBoundAsIB(IndexBuffer); StateCache.SetIndexBuffer(IndexBuffer->Resource, Format, 0); StateCache.SetPrimitiveTopology(GetD3D11PrimitiveType(PrimitiveType)); Direct3DDeviceIMContext->DrawIndexedInstancedIndirect(ArgumentBuffer->Resource,ArgumentOffset); EnableUAVOverlap(); } void FD3D11DynamicRHI::RHIClearMRTImpl(const bool* bClearColorArray, int32 NumClearColors, const FLinearColor* ClearColorArray, bool bClearDepth, float Depth, bool bClearStencil, uint32 Stencil) { FD3D11BoundRenderTargets BoundRenderTargets(Direct3DDeviceIMContext); // Must specify enough clear colors for all active RTs check(!bClearColorArray || NumClearColors >= BoundRenderTargets.GetNumActiveTargets()); // If we're clearing depth or stencil and we have a readonly depth/stencil view bound, we need to use a writable depth/stencil view if (CurrentDepthTexture) { FExclusiveDepthStencil RequestedAccess; RequestedAccess.SetDepthStencilWrite(bClearDepth, bClearStencil); ensure(RequestedAccess.IsValid(CurrentDSVAccessType)); } ID3D11DepthStencilView* DepthStencilView = BoundRenderTargets.GetDepthStencilView(); if (bClearColorArray && BoundRenderTargets.GetNumActiveTargets() > 0) { for (int32 TargetIndex = 0; TargetIndex < BoundRenderTargets.GetNumActiveTargets(); TargetIndex++) { if (bClearColorArray[TargetIndex]) { ID3D11RenderTargetView* RenderTargetView = BoundRenderTargets.GetRenderTargetView(TargetIndex); if (RenderTargetView != nullptr) { Direct3DDeviceIMContext->ClearRenderTargetView(RenderTargetView, (float*)&ClearColorArray[TargetIndex]); } } } } if ((bClearDepth || bClearStencil) && DepthStencilView) { uint32 ClearFlags = 0; if (bClearDepth) { ClearFlags |= D3D11_CLEAR_DEPTH; } if (bClearStencil) { ClearFlags |= D3D11_CLEAR_STENCIL; } Direct3DDeviceIMContext->ClearDepthStencilView(DepthStencilView,ClearFlags,Depth,Stencil); } #if (RHI_NEW_GPU_PROFILER == 0) RegisterGPUWork(0); #endif } // Blocks the CPU until the GPU catches up and goes idle. void FD3D11DynamicRHI::RHIBlockUntilGPUIdle() { D3D11_QUERY_DESC Desc = {}; Desc.Query = D3D11_QUERY_EVENT; TRefCountPtr Query; VERIFYD3D11RESULT_EX(Direct3DDevice->CreateQuery(&Desc, Query.GetInitReference()), Direct3DDevice); Direct3DDeviceIMContext->End(Query.GetReference()); Direct3DDeviceIMContext->Flush(); for(;;) { BOOL EventComplete = false; Direct3DDeviceIMContext->GetData(Query.GetReference(), &EventComplete, sizeof(EventComplete), 0); if (EventComplete) { break; } else { FPlatformProcess::Sleep(0.005f); } } } // NVIDIA Depth Bounds Test interface void FD3D11DynamicRHI::EnableDepthBoundsTest(bool bEnable,float MinDepth,float MaxDepth) { #if PLATFORM_DESKTOP if(MinDepth > MaxDepth) { UE_LOG(LogD3D11RHI, Error,TEXT("RHIEnableDepthBoundsTest(%i,%f, %f) MinDepth > MaxDepth, cannot set DBT."),bEnable,MinDepth,MaxDepth); return; } if( MinDepth < 0.f || MaxDepth > 1.f) { UE_LOG(LogD3D11RHI, Verbose,TEXT("RHIEnableDepthBoundsTest(%i,%f, %f) depths out of range, will clamp."),bEnable,MinDepth,MaxDepth); } MinDepth = FMath::Clamp(MinDepth, 0.0f, 1.0f); MaxDepth = FMath::Clamp(MaxDepth, 0.0f, 1.0f); #if WITH_NVAPI if (IsRHIDeviceNVIDIA()) { auto Result = NvAPI_D3D11_SetDepthBoundsTest( Direct3DDevice, bEnable, MinDepth, MaxDepth ); if (Result != NVAPI_OK) { static bool bOnce = false; if (!bOnce) { bOnce = true; if (bRenderDoc) { if (FApp::IsUnattended()) { UE_LOG(LogD3D11RHI, Display, TEXT("NvAPI is not available under RenderDoc")); } else { UE_LOG(LogD3D11RHI, Warning, TEXT("NvAPI is not available under RenderDoc")); } } else { UE_LOG(LogD3D11RHI, Error, TEXT("NvAPI_D3D11_SetDepthBoundsTest(%i,%f, %f) returned error code %i. **********PLEASE UPDATE YOUR VIDEO DRIVERS*********"), bEnable, MinDepth, MaxDepth, (unsigned int)Result); } } } } #endif #if WITH_AMD_AGS if (IsRHIDeviceAMD()) { auto Result = agsDriverExtensionsDX11_SetDepthBounds(AmdAgsContext, Direct3DDeviceIMContext, bEnable, MinDepth, MaxDepth); if(Result != AGS_SUCCESS) { static bool bOnce = false; if (!bOnce) { bOnce = true; if (bRenderDoc) { if (FApp::IsUnattended()) { UE_LOG(LogD3D11RHI, Display, TEXT("AGS is not available under RenderDoc")); } else { UE_LOG(LogD3D11RHI, Warning, TEXT("AGS is not available under RenderDoc")); } } else { UE_LOG(LogD3D11RHI, Error, TEXT("agsDriverExtensionsDX11_SetDepthBounds(%i,%f, %f) returned error code %i. **********PLEASE UPDATE YOUR VIDEO DRIVERS*********"), bEnable, MinDepth, MaxDepth, (unsigned int)Result); } } } } #endif #endif StateCache.bDepthBoundsEnabled = bEnable; StateCache.DepthBoundsMin = MinDepth; StateCache.DepthBoundsMax = MaxDepth; } IRHICommandContext* FD3D11DynamicRHI::RHIGetDefaultContext() { return this; } IRHIComputeContext* FD3D11DynamicRHI::RHIGetCommandContext(ERHIPipeline Pipeline, FRHIGPUMask GPUMask) { UE_LOG(LogRHI, Fatal, TEXT("FD3D11DynamicRHI::RHIGetCommandContext should never be called. D3D11 RHI does not implement parallel command list execution.")); return nullptr; } struct FD3D11PlatformCommandList : public IRHIPlatformCommandList { virtual ~FD3D11PlatformCommandList() = default; }; void FD3D11DynamicRHI::RHIFinalizeContext(FRHIFinalizeContextArgs&& Args, TRHIPipelineArray& Output) { #if RHI_NEW_GPU_PROFILER FlushProfilerStats(); #endif // "Context" will always be the default context, since we don't implement parallel execution. for (IRHIComputeContext* Context : Args.Contexts) { // "Context" will always be the default context, since we don't implement parallel execution. check(Context == this); #if RHI_NEW_GPU_PROFILER && WITH_RHI_BREADCRUMBS // We need platform command lists to contain the breadcrumb allocators Output[Context->GetPipeline()] = new FD3D11PlatformCommandList; #endif } // Reset some context state for (int32 Frequency = 0; Frequency < SF_NumStandardFrequencies; ++Frequency) { DirtyUniformBuffers[Frequency] = 0; for (int32 BindIndex = 0; BindIndex < MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE; ++BindIndex) { BoundUniformBuffers[Frequency][BindIndex] = nullptr; } } } void FD3D11DynamicRHI::RHISubmitCommandLists(FRHISubmitCommandListsArgs&& Args) { // Attempt to readback completed queries and fences PollQueryResults(); FD3D11GPUFence::PollFences(); #if RHI_NEW_GPU_PROFILER && WITH_RHI_BREADCRUMBS for (IRHIPlatformCommandList* CmdList : Args.CommandLists) { FD3D11PlatformCommandList* D3DCmdList = static_cast(CmdList); // Preserve the breadcrumb allocators in the profiler frame // so they are kept alive until the frame's data is processed. for (auto const& Allocator : D3DCmdList->BreadcrumbAllocators) { Profiler.Current.BreadcrumbAllocators.AddUnique(&Allocator.Get()); } delete D3DCmdList; } #endif } void FD3D11DynamicRHI::EnableUAVOverlap() { // This function is called after every draw or dispatch to turn overlap back on if it was turned off by a UAV barrier. This way, the next // draw/dispatch after the barrier executes after everything before it has completed and the caches were flushed, and any subsequent // submissions are allowed to overlap until the next UAV barrier. if (bUAVOverlapEnabled || !CVarAllowUAVFlushExt.GetValueOnRenderThread()) { return; } bUAVOverlapEnabled = true; if (IsRHIDeviceNVIDIA()) { #if WITH_NVAPI NvAPI_D3D11_BeginUAVOverlap(Direct3DDevice); #endif } else if (IsRHIDeviceAMD()) { #if WITH_AMD_AGS agsDriverExtensionsDX11_BeginUAVOverlap(AmdAgsContext, Direct3DDeviceIMContext); #endif } else if (IsRHIDeviceIntel()) { #if INTEL_EXTENSIONS if (bIntelSupportsUAVOverlap) { INTC_D3D11_BeginUAVOverlap(IntelExtensionContext); } #endif } } void FD3D11DynamicRHI::DisableUAVOverlap() { // This is called when a transition to UAVCompute or UAVGraphics is executed. It disables overlapping for the next draw/dispatch, so we get the same // behavior as with a UAV barrier in APIs with explicit barriers. Overlapping will be turned back on automatically after the draw/dispatch. if (!bUAVOverlapEnabled) { return; } if (IsRHIDeviceNVIDIA()) { #if WITH_NVAPI NvAPI_D3D11_EndUAVOverlap(Direct3DDevice); #endif } else if (IsRHIDeviceAMD()) { #if WITH_AMD_AGS agsDriverExtensionsDX11_EndUAVOverlap(AmdAgsContext, Direct3DDeviceIMContext); #endif } else if (IsRHIDeviceIntel()) { #if INTEL_EXTENSIONS if (bIntelSupportsUAVOverlap) { INTC_D3D11_EndUAVOverlap(IntelExtensionContext); } #endif } bUAVOverlapEnabled = false; } void FD3D11DynamicRHI::RHICreateTransition(FRHITransition* Transition, const FRHITransitionCreateInfo& CreateInfo) { checkf(FMath::IsPowerOfTwo(uint32(CreateInfo.SrcPipelines)) && FMath::IsPowerOfTwo(uint32(CreateInfo.DstPipelines)), TEXT("Support for multi-pipe resources is not yet implemented.")); FD3D11TransitionData* Data = new (Transition->GetPrivateData()) FD3D11TransitionData; Data->bUAVBarrier = false; // If we have any transitions to UAVCompute or UAVGraphics, we need to break up the current overlap group. for (const FRHITransitionInfo& Info : CreateInfo.TransitionInfos) { if (Info.Resource && EnumHasAnyFlags(Info.AccessAfter, ERHIAccess::UAVMask)) { Data->bUAVBarrier = true; break; } } } void FD3D11DynamicRHI::RHIReleaseTransition(FRHITransition* Transition) { Transition->GetPrivateData()->~FD3D11TransitionData(); } void FD3D11DynamicRHI::RHIBeginTransitions(TArrayView Transitions) { } void FD3D11DynamicRHI::RHIEndTransitions(TArrayView Transitions) { // The only thing we care about in D3D11 is breaking up the current overlap group if we have a UAV barrier. If overlap is already off, there's nothing to do. if (!bUAVOverlapEnabled) { return; } for (const FRHITransition* Transition : Transitions) { const FD3D11TransitionData* Data = Transition->GetPrivateData(); if (Data->bUAVBarrier) { DisableUAVOverlap(); break; } } } void FD3D11DynamicRHI::RHIBeginUAVOverlap() { // No need to do anything here. Overlap is always on and the current group is broken up when we see a transition to UAVCompute or UAVGraphics. } void FD3D11DynamicRHI::RHIEndUAVOverlap() { // Same as above. } //*********************** StagingBuffer Implementation ***********************// FStagingBufferRHIRef FD3D11DynamicRHI::RHICreateStagingBuffer() { return new FD3D11StagingBuffer(); } FD3D11StagingBuffer::~FD3D11StagingBuffer() { if (StagedRead) { StagedRead.SafeRelease(); } } void* FD3D11StagingBuffer::Lock(uint32 Offset, uint32 NumBytes) { check(!bIsLocked); bIsLocked = true; if (StagedRead) { // Map the staging buffer's memory for reading. D3D11_MAPPED_SUBRESOURCE MappedSubresource; VERIFYD3D11RESULT(Context->Map(StagedRead, 0, D3D11_MAP_READ, 0, &MappedSubresource)); return (void*)((uint8*)MappedSubresource.pData + Offset); } else { return nullptr; } } void FD3D11StagingBuffer::Unlock() { check(bIsLocked); bIsLocked = false; if (StagedRead) { Context->Unmap(StagedRead, 0); } } void FD3D11DynamicRHI::RHICopyToStagingBuffer(FRHIBuffer* SourceBufferRHI, FRHIStagingBuffer* StagingBufferRHI, uint32 Offset, uint32 NumBytes) { FD3D11Buffer* SourceBuffer = ResourceCast(SourceBufferRHI); FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI); if (StagingBuffer) { ensureMsgf(!StagingBuffer->bIsLocked, TEXT("Attempting to Copy to a locked staging buffer. This may have undefined behavior")); if (SourceBuffer) { if (!StagingBuffer->StagedRead || StagingBuffer->ShadowBufferSize < NumBytes) { // Free previously allocated buffer. if (StagingBuffer->StagedRead) { StagingBuffer->StagedRead.SafeRelease(); } // Allocate a new one with enough space. // @todo-mattc I feel like we should allocate more than NumBytes to handle small reads without blowing tons of space. Need to pool this. D3D11_BUFFER_DESC StagedReadDesc; ZeroMemory(&StagedReadDesc, sizeof(D3D11_BUFFER_DESC)); StagedReadDesc.ByteWidth = NumBytes; StagedReadDesc.Usage = D3D11_USAGE_STAGING; StagedReadDesc.BindFlags = 0; StagedReadDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; StagedReadDesc.MiscFlags = 0; TRefCountPtr StagingVertexBuffer; VERIFYD3D11RESULT_EX(Direct3DDevice->CreateBuffer(&StagedReadDesc, NULL, StagingBuffer->StagedRead.GetInitReference()), Direct3DDevice); StagingBuffer->ShadowBufferSize = NumBytes; StagingBuffer->Context = Direct3DDeviceIMContext; } // Copy the contents of the vertex buffer to the staging buffer. D3D11_BOX SourceBox; SourceBox.left = Offset; SourceBox.right = Offset + NumBytes; SourceBox.top = SourceBox.front = 0; SourceBox.bottom = SourceBox.back = 1; Direct3DDeviceIMContext->CopySubresourceRegion(StagingBuffer->StagedRead, 0, 0, 0, 0, SourceBuffer->Resource, 0, &SourceBox); } } } void* FD3D11DynamicRHI::RHILockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI, FRHIGPUFence* Fence, uint32 Offset, uint32 SizeRHI) { check(StagingBufferRHI); FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI); return StagingBuffer->Lock(Offset, SizeRHI); } void FD3D11DynamicRHI::RHIUnlockStagingBuffer(FRHIStagingBuffer* StagingBufferRHI) { FD3D11StagingBuffer* StagingBuffer = ResourceCast(StagingBufferRHI); StagingBuffer->Unlock(); } TQueue FD3D11GPUFence::ActiveSyncs; FD3D11GPUFence::FD3D11GPUFence(FName InName) : FRHIGPUFence(InName) { } void FD3D11GPUFence::Clear() { Event = nullptr; } bool FD3D11GPUFence::Poll() const { return Event && Event->IsComplete(); } void FD3D11GPUFence::Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const { if (Event && !Event->IsComplete()) { // // The fence might get signalled by an earlier RHI command polling them, but we can't be sure that will happen. // The GPU might finish work after the RHI thread has gone idle, and then we'll never see the fence complete. // // Enqueue a command here that will block and wait for the fence if it still hasn't signalled by the time // the RHI thread is done with all prior commands. // RHICmdList.EnqueueLambda([Event = Event](FRHICommandListImmediate&) { if (!Event->IsComplete()) { PollFencesUntil(Event); } }); RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); Event->Wait(); } } void FD3D11GPUFence::PollFencesUntil(FGraphEvent* Target) { FD3D11DynamicRHI& RHI = FD3D11DynamicRHI::Get(); while (FD3D11Sync* Sync = ActiveSyncs.Peek()) { bool const bTarget = Sync->Event == Target; // Wait forever if this fence is the one we're looking for specifically, otherwise just poll. UINT const Flags = bTarget ? 0 : D3D11_ASYNC_GETDATA_DONOTFLUSH; Retry: BOOL Value = 0; HRESULT Result = RHI.GetDeviceContext()->GetData(Sync->Query, &Value, sizeof(Value), Flags); if (Result == S_FALSE || (Result == S_OK && !Value)) { // Fence is not done if (bTarget) { // We're waiting for a specific fence. Spin until it passes. goto Retry; } else { return; } } VERIFYD3D11RESULT(Result); // The fence has completed. Signal the graph event and remove the node. Sync->Event->DispatchSubsequents(); ActiveSyncs.Pop(); if (bTarget) { // We found the fence we wanted. Stop polling. return; } } checkf(!Target, TEXT("Attempt to poll for a specific fence, but it was not found in the queue.")); } void FD3D11GPUFence::WriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList) { Event = FGraphEvent::CreateGraphEvent(); RHICmdList.EnqueueLambda([Event = Event](FRHICommandListBase&) mutable { FD3D11DynamicRHI& RHI = FD3D11DynamicRHI::Get(); // Insert an event query on the device context D3D11_QUERY_DESC Desc {}; Desc.Query = D3D11_QUERY_EVENT; TRefCountPtr Query; VERIFYD3D11RESULT(RHI.GetDevice()->CreateQuery(&Desc, Query.GetInitReference())); RHI.GetDeviceContext()->End(Query); // Store the query in the list of queries to poll ActiveSyncs.Enqueue(FD3D11Sync(MoveTemp(Event), MoveTemp(Query))); }); } void FD3D11DynamicRHI::RHIWriteGPUFence_TopOfPipe(FRHICommandListBase& RHICmdList, FRHIGPUFence* FenceRHI) { ResourceCast(FenceRHI)->WriteGPUFence_TopOfPipe(RHICmdList); } void FD3D11DynamicRHI::RHIWriteGPUFence(FRHIGPUFence* FenceRHI) { checkNoEntry(); // Should never be called } FGPUFenceRHIRef FD3D11DynamicRHI::RHICreateGPUFence(const FName& Name) { return new FD3D11GPUFence(Name); }