388 lines
14 KiB
C++
388 lines
14 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "RendererUtils.h"
|
|
#include "RendererPrivateUtils.h"
|
|
#include "Nanite/NaniteRayTracing.h"
|
|
#include "RenderTargetPool.h"
|
|
#include "RHIDefinitions.h"
|
|
#include "DataDrivenShaderPlatformInfo.h"
|
|
#include "VisualizeTexture.h"
|
|
#include "ScenePrivate.h"
|
|
#include "SystemTextures.h"
|
|
#include "UnifiedBuffer.h"
|
|
#include "ComponentRecreateRenderStateContext.h"
|
|
|
|
static int32 GSkipNaniteLPIs = 1;
|
|
static FAutoConsoleVariableRef CVarSkipNaniteLPIs(
|
|
TEXT("r.SkipNaniteLPIs"),
|
|
GSkipNaniteLPIs,
|
|
TEXT("Skip Nanite primitives in the light-primitive interactions & the primitive octree as they perform GPU-driven culling separately.\n")
|
|
TEXT(" Values:")
|
|
TEXT(" 1 - (auto, default) Skipping is auto-disabled if r.AllowStaticLighting is enabled for the project as it breaks some associated editor features otherwise.")
|
|
TEXT(" 2 - (forced) Skipping is always enabled regardless of r.AllowStaticLighting. May cause issues with static lighting. Use with care."),
|
|
FConsoleVariableDelegate::CreateLambda([](IConsoleVariable* InVariable)
|
|
{
|
|
// Needed because the primitives need to be re-added to the scene to be removed from the octree and to have existing LPIs cleaned up. And vice versa.
|
|
// The cvar is not expected to be changed during runtime outside of testing.
|
|
FGlobalComponentRecreateRenderStateContext Context;
|
|
}),
|
|
ECVF_RenderThreadSafe);
|
|
|
|
bool ShouldSkipNaniteLPIs(EShaderPlatform ShaderPlatform)
|
|
{
|
|
return (GSkipNaniteLPIs > 1
|
|
|| ( GSkipNaniteLPIs == 1 && !IsStaticLightingAllowed()))
|
|
&& UseNanite(ShaderPlatform);
|
|
}
|
|
|
|
class FRTWriteMaskDecodeCS : public FGlobalShader
|
|
{
|
|
public:
|
|
DECLARE_GLOBAL_SHADER(FRTWriteMaskDecodeCS);
|
|
|
|
static const uint32 MaxRenderTargetCount = 4;
|
|
static const uint32 ThreadGroupSizeX = 8;
|
|
static const uint32 ThreadGroupSizeY = 8;
|
|
|
|
class FNumRenderTargets : SHADER_PERMUTATION_RANGE_INT("NUM_RENDER_TARGETS", 1, MaxRenderTargetCount);
|
|
using FPermutationDomain = TShaderPermutationDomain<FNumRenderTargets>;
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, ReferenceInput)
|
|
SHADER_PARAMETER_RDG_TEXTURE_SRV_ARRAY(TextureMetadata, RTWriteMaskInputs, [MaxRenderTargetCount])
|
|
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D<uint>, OutCombinedRTWriteMask)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
|
|
static bool IsSupported(uint32 NumRenderTargets)
|
|
{
|
|
return NumRenderTargets == 1 || NumRenderTargets == 3;
|
|
}
|
|
|
|
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
|
|
{
|
|
const FPermutationDomain PermutationVector(Parameters.PermutationId);
|
|
const uint32 NumRenderTargets = PermutationVector.Get<FNumRenderTargets>();
|
|
|
|
if (!IsSupported(NumRenderTargets))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return RHISupportsRenderTargetWriteMask(Parameters.Platform);
|
|
}
|
|
|
|
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
|
|
{
|
|
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEX"), ThreadGroupSizeX);
|
|
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEY"), ThreadGroupSizeY);
|
|
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
|
|
}
|
|
|
|
FRTWriteMaskDecodeCS() = default;
|
|
FRTWriteMaskDecodeCS(const ShaderMetaType::CompiledShaderInitializerType & Initializer)
|
|
: FGlobalShader(Initializer)
|
|
{
|
|
PlatformDataParam.Bind(Initializer.ParameterMap, TEXT("PlatformData"), SPF_Mandatory);
|
|
BindForLegacyShaderParameters<FParameters>(this, Initializer.PermutationId, Initializer.ParameterMap);
|
|
}
|
|
|
|
// Shader parameter structs don't have a way to push variable sized data yet. So the we use the old shader parameter API.
|
|
void SetParameters(FRHIBatchedShaderParameters& BatchedParameters, const void* PlatformDataPtr, uint32 PlatformDataSize)
|
|
{
|
|
BatchedParameters.SetShaderParameter(PlatformDataParam.GetBufferIndex(), PlatformDataParam.GetBaseIndex(), PlatformDataSize, PlatformDataPtr);
|
|
}
|
|
|
|
private:
|
|
LAYOUT_FIELD(FShaderParameter, PlatformDataParam);
|
|
};
|
|
|
|
IMPLEMENT_GLOBAL_SHADER(FRTWriteMaskDecodeCS, "/Engine/Private/RTWriteMaskDecode.usf", "RTWriteMaskDecodeMain", SF_Compute);
|
|
|
|
void FRenderTargetWriteMask::Decode(
|
|
FRHICommandListImmediate& RHICmdList,
|
|
FGlobalShaderMap* ShaderMap,
|
|
TArrayView<IPooledRenderTarget* const> InRenderTargets,
|
|
TRefCountPtr<IPooledRenderTarget>& OutRTWriteMask,
|
|
ETextureCreateFlags RTWriteMaskFastVRamConfig,
|
|
const TCHAR* RTWriteMaskDebugName)
|
|
{
|
|
FRDGBuilder GraphBuilder(RHICmdList);
|
|
|
|
TArray<FRDGTextureRef, SceneRenderingAllocator> InputTextures;
|
|
InputTextures.Reserve(InRenderTargets.Num());
|
|
for (IPooledRenderTarget* RenderTarget : InRenderTargets)
|
|
{
|
|
InputTextures.Add(GraphBuilder.RegisterExternalTexture(RenderTarget));
|
|
}
|
|
|
|
FRDGTextureRef OutputTexture = nullptr;
|
|
Decode(GraphBuilder, ShaderMap, InputTextures, OutputTexture, RTWriteMaskFastVRamConfig, RTWriteMaskDebugName);
|
|
|
|
GraphBuilder.QueueTextureExtraction(OutputTexture, &OutRTWriteMask);
|
|
GraphBuilder.Execute();
|
|
}
|
|
|
|
void FRenderTargetWriteMask::Decode(
|
|
FRDGBuilder& GraphBuilder,
|
|
FGlobalShaderMap* ShaderMap,
|
|
TArrayView<FRDGTextureRef const> RenderTargets,
|
|
FRDGTextureRef& OutRTWriteMask,
|
|
ETextureCreateFlags RTWriteMaskFastVRamConfig,
|
|
const TCHAR* RTWriteMaskDebugName)
|
|
{
|
|
const uint32 NumRenderTargets = RenderTargets.Num();
|
|
|
|
check(RHISupportsRenderTargetWriteMask(GMaxRHIShaderPlatform));
|
|
checkf(FRTWriteMaskDecodeCS::IsSupported(NumRenderTargets), TEXT("FRenderTargetWriteMask::Decode does not currently support decoding %d render targets."), RenderTargets.Num());
|
|
|
|
FRDGTextureRef Texture0 = RenderTargets[0];
|
|
|
|
const FIntPoint RTWriteMaskDims(
|
|
FMath::DivideAndRoundUp(Texture0->Desc.Extent.X, (int32)FRTWriteMaskDecodeCS::ThreadGroupSizeX),
|
|
FMath::DivideAndRoundUp(Texture0->Desc.Extent.Y, (int32)FRTWriteMaskDecodeCS::ThreadGroupSizeY));
|
|
|
|
// Allocate the Mask from the render target pool.
|
|
const FRDGTextureDesc MaskDesc = FRDGTextureDesc::Create2D(
|
|
RTWriteMaskDims,
|
|
NumRenderTargets <= 2 ? PF_R8_UINT : PF_R16_UINT,
|
|
FClearValueBinding::None,
|
|
RTWriteMaskFastVRamConfig | TexCreate_UAV | TexCreate_RenderTargetable | TexCreate_ShaderResource);
|
|
|
|
OutRTWriteMask = GraphBuilder.CreateTexture(MaskDesc, RTWriteMaskDebugName);
|
|
|
|
auto* PassParameters = GraphBuilder.AllocParameters<FRTWriteMaskDecodeCS::FParameters>();
|
|
PassParameters->ReferenceInput = Texture0;
|
|
PassParameters->OutCombinedRTWriteMask = GraphBuilder.CreateUAV(OutRTWriteMask);
|
|
|
|
for (uint32 Index = 0; Index < NumRenderTargets; ++Index)
|
|
{
|
|
PassParameters->RTWriteMaskInputs[Index] = GraphBuilder.CreateSRV(FRDGTextureSRVDesc::CreateForMetaData(RenderTargets[Index], ERDGTextureMetaDataAccess::CMask));
|
|
}
|
|
|
|
FRTWriteMaskDecodeCS::FPermutationDomain PermutationVector;
|
|
PermutationVector.Set<FRTWriteMaskDecodeCS::FNumRenderTargets>(NumRenderTargets);
|
|
TShaderMapRef<FRTWriteMaskDecodeCS> DecodeCS(ShaderMap, PermutationVector);
|
|
|
|
GraphBuilder.AddPass(
|
|
RDG_EVENT_NAME("DecodeWriteMask[%d]", NumRenderTargets),
|
|
PassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[DecodeCS, PassParameters, ShaderMap, RTWriteMaskDims](FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
FRHITexture* Texture0RHI = PassParameters->ReferenceInput->GetRHI();
|
|
|
|
// Retrieve the platform specific data that the decode shader needs.
|
|
void* PlatformDataPtr = nullptr;
|
|
uint32 PlatformDataSize = 0;
|
|
Texture0RHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
check(PlatformDataSize > 0);
|
|
|
|
if (PlatformDataPtr == nullptr)
|
|
{
|
|
// If the returned pointer was null, the platform RHI wants us to allocate the memory instead.
|
|
PlatformDataPtr = alloca(PlatformDataSize);
|
|
Texture0RHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
}
|
|
|
|
SetComputePipelineState(RHICmdList, DecodeCS.GetComputeShader());
|
|
|
|
SetShaderParametersMixedCS(RHICmdList, DecodeCS, *PassParameters, PlatformDataPtr, PlatformDataSize);
|
|
|
|
RHICmdList.DispatchComputeShader(
|
|
FMath::DivideAndRoundUp((uint32)RTWriteMaskDims.X, FRTWriteMaskDecodeCS::ThreadGroupSizeX),
|
|
FMath::DivideAndRoundUp((uint32)RTWriteMaskDims.Y, FRTWriteMaskDecodeCS::ThreadGroupSizeY),
|
|
1);
|
|
});
|
|
}
|
|
|
|
FDepthBounds::FDepthBoundsValues FDepthBounds::CalculateNearFarDepthExcludingSky()
|
|
{
|
|
FDepthBounds::FDepthBoundsValues Values;
|
|
|
|
if (bool(ERHIZBuffer::IsInverted))
|
|
{
|
|
float SmallestFloatAbove0;
|
|
|
|
if (GPixelFormats[PF_DepthStencil].bIs24BitUnormDepthStencil)
|
|
{
|
|
SmallestFloatAbove0 = 1.0f / 16777215.0f; // 24bit norm depth
|
|
}
|
|
else
|
|
{
|
|
SmallestFloatAbove0 = 1.1754943508e-38; // 32bit float depth
|
|
}
|
|
|
|
Values.MinDepth = SmallestFloatAbove0;
|
|
Values.MaxDepth = float(ERHIZBuffer::NearPlane);
|
|
}
|
|
else
|
|
{
|
|
float SmallestFloatBelow1;
|
|
|
|
if (GPixelFormats[PF_DepthStencil].bIs24BitUnormDepthStencil)
|
|
{
|
|
SmallestFloatBelow1 = 16777214.0f / 16777215.0f; // 24bit norm depth
|
|
}
|
|
else
|
|
{
|
|
SmallestFloatBelow1 = 0.9999999404; // 32bit float depth
|
|
}
|
|
|
|
Values.MinDepth = float(ERHIZBuffer::NearPlane);
|
|
Values.MaxDepth = SmallestFloatBelow1;
|
|
}
|
|
|
|
return Values;
|
|
}
|
|
|
|
IMPLEMENT_GLOBAL_SHADER_PARAMETER_STRUCT(FSubstratePublicGlobalUniformParameters, "SubstratePublic");
|
|
|
|
namespace Substrate
|
|
{
|
|
void PreInitViews(FScene& Scene)
|
|
{
|
|
FSubstrateSceneData& SubstrateScene = Scene.SubstrateSceneData;
|
|
SubstrateScene.SubstratePublicGlobalUniformParameters = nullptr;
|
|
}
|
|
|
|
void PostRender(FScene& Scene)
|
|
{
|
|
FSubstrateSceneData& SubstrateScene = Scene.SubstrateSceneData;
|
|
SubstrateScene.SubstratePublicGlobalUniformParameters = nullptr;
|
|
}
|
|
|
|
TRDGUniformBufferRef<FSubstratePublicGlobalUniformParameters> GetPublicGlobalUniformBuffer(FRDGBuilder& GraphBuilder, FScene& Scene)
|
|
{
|
|
if(::Substrate::IsSubstrateEnabled())
|
|
{
|
|
FSubstrateSceneData& SubstrateScene = Scene.SubstrateSceneData;
|
|
|
|
if(SubstrateScene.SubstratePublicGlobalUniformParameters == nullptr)
|
|
{
|
|
return CreatePublicGlobalUniformBuffer(GraphBuilder, nullptr);//We are creating a dummy here so pass in null for the scene data.
|
|
}
|
|
return SubstrateScene.SubstratePublicGlobalUniformParameters;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
namespace Nanite
|
|
{
|
|
TRDGUniformBufferRef<FNaniteRayTracingUniformParameters> GetPublicGlobalRayTracingUniformBuffer()
|
|
{
|
|
#if RHI_RAYTRACING
|
|
return Nanite::GRayTracingManager.GetUniformBuffer();
|
|
#else
|
|
return nullptr;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void FBufferScatterUploader::UploadTo(FRDGBuilder& GraphBuilder, FRDGBuffer *DestBuffer, FRDGBuffer *ScatterOffsets, FRDGBuffer *Values, uint32 NumScatters, uint32 NumBytesPerElement, int32 NumValuesPerScatter)
|
|
{
|
|
FScatterCopyParams ScatterCopyParams { NumScatters, NumBytesPerElement, NumValuesPerScatter };
|
|
ScatterCopyResource(GraphBuilder, DestBuffer, GraphBuilder.CreateSRV(ScatterOffsets), GraphBuilder.CreateSRV(Values), ScatterCopyParams);
|
|
}
|
|
|
|
void FBufferScatterUploader::UploadTo(FRDGBuilder& GraphBuilder, FRDGBuffer* DestBuffer, FRDGBuffer* ScatterOffsets, FRDGBuffer* Values, TFunction<uint64()>&& GetNumScatters, uint32 NumBytesPerElement, int32 NumValuesPerScatter)
|
|
{
|
|
FAsyncScatterCopyParams ScatterCopyParams{ MoveTemp(GetNumScatters), NumBytesPerElement, NumValuesPerScatter };
|
|
ScatterCopyResource(GraphBuilder, DestBuffer, GraphBuilder.CreateSRV(ScatterOffsets), GraphBuilder.CreateSRV(Values), ScatterCopyParams);
|
|
}
|
|
|
|
namespace UE::RendererPrivateUtils::Implementation
|
|
{
|
|
|
|
FPersistentBuffer::FPersistentBuffer(int32 InMinimumNumElementsReserved, const TCHAR *InName, bool bInRoundUpToPOT)
|
|
: MinimumNumElementsReserved(InMinimumNumElementsReserved)
|
|
, Name(InName)
|
|
, bRoundUpToPOT(bInRoundUpToPOT)
|
|
{
|
|
}
|
|
|
|
FRDGBuffer* FPersistentBuffer::Register(FRDGBuilder& GraphBuilder) const
|
|
{
|
|
return GraphBuilder.RegisterExternalBuffer(PooledBuffer);
|
|
}
|
|
|
|
void FPersistentBuffer::Empty()
|
|
{
|
|
PooledBuffer.SafeRelease();
|
|
}
|
|
|
|
FRDGBuffer* FPersistentBuffer::ResizeBufferIfNeeded(FRDGBuilder& GraphBuilder, const FRDGBufferDesc& BufferDesc)
|
|
{
|
|
return ::ResizeBufferIfNeeded(GraphBuilder, PooledBuffer, BufferDesc, Name);
|
|
}
|
|
|
|
FRDGBuffer* FPersistentBuffer::ResizeAndClearBufferIfNeeded(FRDGBuilder& GraphBuilder, const FRDGBufferDesc& BufferDesc)
|
|
{
|
|
uint32 PrevNumElements = PooledBuffer ? PooledBuffer->Desc.NumElements : 0u;
|
|
FRDGBuffer* NewBuffer = ::ResizeBufferIfNeeded(GraphBuilder, PooledBuffer, BufferDesc, Name);
|
|
if (NewBuffer->Desc.NumElements > PrevNumElements)
|
|
{
|
|
::MemsetResource(GraphBuilder, NewBuffer, FMemsetResourceParams { 0u, NewBuffer->Desc.NumElements - PrevNumElements, PrevNumElements });
|
|
}
|
|
return NewBuffer;
|
|
}
|
|
|
|
}
|
|
|
|
TGlobalResource<FTileTexCoordVertexBuffer> GOneTileQuadVertexBuffer(1);
|
|
TGlobalResource<FTileIndexBuffer> GOneTileQuadIndexBuffer(1);
|
|
|
|
RENDERER_API FBufferRHIRef& GetOneTileQuadVertexBuffer()
|
|
{
|
|
return GOneTileQuadVertexBuffer.VertexBufferRHI;
|
|
}
|
|
|
|
RENDERER_API FBufferRHIRef& GetOneTileQuadIndexBuffer()
|
|
{
|
|
return GOneTileQuadIndexBuffer.IndexBufferRHI;
|
|
}
|
|
|
|
class FClearIndirectDispatchArgsCS : public FGlobalShader
|
|
{
|
|
DECLARE_GLOBAL_SHADER(FClearIndirectDispatchArgsCS);
|
|
SHADER_USE_PARAMETER_STRUCT(FClearIndirectDispatchArgsCS, FGlobalShader)
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER(uint32, NumIndirectArgs)
|
|
SHADER_PARAMETER(uint32, IndirectArgStride)
|
|
SHADER_PARAMETER(FIntVector3, DimClearValue)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer<uint>, OutIndirectArgsBuffer)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
};
|
|
IMPLEMENT_GLOBAL_SHADER(FClearIndirectDispatchArgsCS, "/Engine/Private/RendererUtils.usf", "ClearIndirectDispatchArgsCS", SF_Compute);
|
|
|
|
void AddClearIndirectDispatchArgsPass(FRDGBuilder& GraphBuilder, ERHIFeatureLevel::Type FeatureLevel, FRDGBufferRef IndirectArgsRDG, const FIntVector3 &DimClearValue, uint32 NumIndirectArgs, uint32 IndirectArgStride)
|
|
{
|
|
// Need room for XYZ dims at least.
|
|
check(IndirectArgStride >= 3);
|
|
|
|
FClearIndirectDispatchArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FClearIndirectDispatchArgsCS::FParameters>();
|
|
PassParameters->NumIndirectArgs = NumIndirectArgs;
|
|
PassParameters->IndirectArgStride = IndirectArgStride;
|
|
PassParameters->DimClearValue = DimClearValue;
|
|
PassParameters->OutIndirectArgsBuffer = GraphBuilder.CreateUAV(IndirectArgsRDG);
|
|
|
|
auto ComputeShader = GetGlobalShaderMap(FeatureLevel)->GetShader<FClearIndirectDispatchArgsCS>();
|
|
|
|
FComputeShaderUtils::AddPass(
|
|
GraphBuilder,
|
|
RDG_EVENT_NAME("ClearIndirectDispatchArgs"),
|
|
ComputeShader,
|
|
PassParameters,
|
|
FComputeShaderUtils::GetGroupCount(NumIndirectArgs, 64)
|
|
);
|
|
}
|
|
|
|
FRDGBufferRef CreateAndClearIndirectDispatchArgs(FRDGBuilder& GraphBuilder, ERHIFeatureLevel::Type FeatureLevel, const TCHAR* Name, const FIntVector3& DimClearValue, uint32 NumIndirectArgs, uint32 IndirectArgStride)
|
|
{
|
|
FRDGBufferRef IndirectArgsRDG = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc(NumIndirectArgs * IndirectArgStride), Name);
|
|
AddClearIndirectDispatchArgsPass(GraphBuilder, FeatureLevel, IndirectArgsRDG, DimClearValue, NumIndirectArgs, IndirectArgStride);
|
|
return IndirectArgsRDG;
|
|
}
|