2675 lines
101 KiB
C++
2675 lines
101 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "NaniteShading.h"
|
|
#include "NaniteShared.h"
|
|
#include "NaniteVertexFactory.h"
|
|
#include "NaniteRayTracing.h"
|
|
#include "NaniteVisualizationData.h"
|
|
#include "NaniteComposition.h"
|
|
#include "Rendering/NaniteResources.h"
|
|
#include "Rendering/NaniteStreamingManager.h"
|
|
#include "Lumen/LumenSceneCardCapture.h"
|
|
#include "ComponentRecreateRenderStateContext.h"
|
|
#include "VariableRateShadingImageManager.h"
|
|
#include "SystemTextures.h"
|
|
#include "SceneUtils.h"
|
|
#include "ScenePrivate.h"
|
|
#include "RHI.h"
|
|
#include "BasePassRendering.h"
|
|
#include "Async/ParallelFor.h"
|
|
#include "Materials/Material.h"
|
|
#include "Materials/MaterialRenderProxy.h"
|
|
#include "MeshPassUtils.h"
|
|
#include "PSOPrecacheMaterial.h"
|
|
#include "PSOPrecacheValidation.h"
|
|
#include "Nanite/NaniteMaterialsSceneExtension.h"
|
|
#include "RenderGraphResources.h"
|
|
|
|
extern TAutoConsoleVariable<int32> CVarNaniteShowDrawEvents;
|
|
extern TAutoConsoleVariable<int32> CVarRHICmdMinDrawsPerParallelCmdList;
|
|
|
|
extern int32 GSkipDrawOnPSOPrecaching;
|
|
extern int32 GNaniteShowStats;
|
|
|
|
#if WANTS_DRAW_MESH_EVENTS
|
|
static FORCEINLINE const FString& GetShadingMaterialName(const FMaterialRenderProxy* InShadingMaterial)
|
|
{
|
|
if (InShadingMaterial == nullptr)
|
|
{
|
|
static FString Invalid = TEXT("<Invalid>");
|
|
return Invalid;
|
|
}
|
|
|
|
return InShadingMaterial->GetMaterialName();
|
|
}
|
|
#endif
|
|
|
|
TAutoConsoleVariable<int32> CVarParallelBasePassBuild(
|
|
TEXT("r.Nanite.ParallelBasePassBuild"),
|
|
1,
|
|
TEXT(""),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteFastTileClear = 1;
|
|
static FAutoConsoleVariableRef CVarNaniteFastTileClear(
|
|
TEXT("r.Nanite.FastTileClear"),
|
|
GNaniteFastTileClear,
|
|
TEXT("Whether to enable Nanite fast tile clearing"),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteFastTileClearSubTiles = 1;
|
|
static FAutoConsoleVariableRef CVarNaniteFastTileClearSubTiles(
|
|
TEXT("r.Nanite.FastTileClear.SubTiles"),
|
|
GNaniteFastTileClearSubTiles,
|
|
TEXT("Whether to enable Nanite fast tile clearing (for 4x4 sub tiles)"),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteFastTileVis = INDEX_NONE;
|
|
static FAutoConsoleVariableRef CVarNaniteFastTileVis(
|
|
TEXT("r.Nanite.FastTileVis"),
|
|
GNaniteFastTileVis,
|
|
TEXT("Allows for just showing a single target in the visualization, or -1 to show all accumulated"),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
TAutoConsoleVariable<int32> CVarNaniteBundleEmulation(
|
|
TEXT("r.Nanite.Bundle.Emulation"),
|
|
0,
|
|
TEXT("Whether to force shader bundle dispatch emulation"),
|
|
FConsoleVariableDelegate::CreateLambda([](IConsoleVariable* InVariable)
|
|
{
|
|
// We need to recreate scene proxies so that BuildShadingCommands can be re-evaluated.
|
|
FGlobalComponentRecreateRenderStateContext Context;
|
|
}),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteBundleShading = 0;
|
|
static FAutoConsoleVariableRef CVarNaniteBundleShading(
|
|
TEXT("r.Nanite.Bundle.Shading"),
|
|
GNaniteBundleShading,
|
|
TEXT("Whether to enable Nanite shader bundle dispatch for shading"),
|
|
FConsoleVariableDelegate::CreateLambda([](IConsoleVariable* InVariable)
|
|
{
|
|
// We need to recreate scene proxies so that BuildShadingCommands can be re-evaluated.
|
|
FGlobalComponentRecreateRenderStateContext Context;
|
|
}),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteComputeMaterialsSort = 1;
|
|
static FAutoConsoleVariableRef CVarNaniteComputeMaterialsSort(
|
|
TEXT("r.Nanite.ComputeMaterials.Sort"),
|
|
GNaniteComputeMaterialsSort,
|
|
TEXT(""),
|
|
FConsoleVariableDelegate::CreateLambda([](IConsoleVariable* InVariable)
|
|
{
|
|
// We need to recreate scene proxies so that BuildShadingCommands can be re-evaluated.
|
|
FGlobalComponentRecreateRenderStateContext Context;
|
|
}),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GBinningTechnique = 0;
|
|
static FAutoConsoleVariableRef CVarNaniteBinningTechnique(
|
|
TEXT("r.Nanite.BinningTechnique"),
|
|
GBinningTechnique,
|
|
TEXT(""),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteShadeBinningMode = 0;
|
|
static FAutoConsoleVariableRef CVarNaniteShadeBinningMode(
|
|
TEXT("r.Nanite.ShadeBinningMode"),
|
|
GNaniteShadeBinningMode,
|
|
TEXT("0: Auto\n")
|
|
TEXT("1: Force to Pixel Mode\n")
|
|
TEXT("2: Force to Quad Mode\n"),
|
|
FConsoleVariableDelegate::CreateLambda([](IConsoleVariable* InVariable)
|
|
{
|
|
// We need to recreate scene proxies so that BuildShadingCommands can be re-evaluated.
|
|
FGlobalComponentRecreateRenderStateContext Context;
|
|
}),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteSoftwareVRS = 1;
|
|
static FAutoConsoleVariableRef CVarNaniteSoftwareVRS(
|
|
TEXT("r.Nanite.SoftwareVRS"),
|
|
GNaniteSoftwareVRS,
|
|
TEXT("Whether to enable Nanite software variable rate shading in compute."),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
int32 GNaniteValidateShadeBinning = 0;
|
|
static FAutoConsoleVariableRef CVarNaniteValidateShadeBinning(
|
|
TEXT("r.Nanite.Debug.ValidateShadeBinning"),
|
|
GNaniteValidateShadeBinning,
|
|
TEXT(""),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static int32 GNaniteCacheRelevanceParallel = 1;
|
|
static FAutoConsoleVariableRef CVarNaniteCacheRelevanceParallel(
|
|
TEXT("r.Nanite.CacheRelevanceParallel"),
|
|
GNaniteCacheRelevanceParallel,
|
|
TEXT("Enable parallel caching of Nanite material relevance. 0=disabled, 1=enabled (default)"),
|
|
ECVF_RenderThreadSafe
|
|
);
|
|
|
|
inline bool UsingHighPrecisionGBuffer()
|
|
{
|
|
static const auto CVarFormat = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.GBufferFormat"));
|
|
static const int32 EGBufferFormat_Force16BitsPerChannel = 5; // TODO: Refactor GBufferInfo.cpp to cleanly expose this
|
|
const bool bHighPrecisionGBuffer = CVarFormat && CVarFormat->GetValueOnRenderThread() >= EGBufferFormat_Force16BitsPerChannel;
|
|
return bHighPrecisionGBuffer;
|
|
}
|
|
|
|
bool CanUseShaderBundleWorkGraph(EShaderPlatform Platform)
|
|
{
|
|
static bool bNaniteBundleSupportWorkGraphs = NaniteWorkGraphMaterialsSupported();
|
|
return bNaniteBundleSupportWorkGraphs && !!GRHISupportsShaderBundleWorkGraphDispatch && RHISupportsWorkGraphs(Platform);
|
|
}
|
|
|
|
static bool UseWorkGraphForShadingBundles(EShaderPlatform Platform)
|
|
{
|
|
return GNaniteBundleShading != 0 && CanUseShaderBundleWorkGraph(Platform) && CVarNaniteBundleEmulation.GetValueOnRenderThread() == 0;
|
|
}
|
|
|
|
static bool UseShadingShaderBundle(EShaderPlatform Platform)
|
|
{
|
|
return GNaniteBundleShading != 0 && (!!GRHISupportsShaderBundleDispatch || CanUseShaderBundleWorkGraph(Platform));
|
|
}
|
|
|
|
static uint32 GetShadingRateTileSizeBits()
|
|
{
|
|
uint32 TileSizeBits = 0;
|
|
|
|
// Temporarily disable this on Intel until the shader is fixed to
|
|
// correctly handle a wave size of 16.
|
|
if (GNaniteSoftwareVRS != 0 && !IsRHIDeviceIntel() && GVRSImageManager.IsVRSEnabledForFrame() /* HW or SW VRS enabled? */)
|
|
{
|
|
bool bUseSoftwareImage = GVRSImageManager.IsSoftwareVRSEnabledForFrame();
|
|
if (!bUseSoftwareImage)
|
|
{
|
|
// Technically these could be different, but currently never in practice
|
|
// 8x8, 16x16, or 32x32 for DX12 Tier2 HW VRS
|
|
ensure
|
|
(
|
|
GRHIVariableRateShadingImageTileMinWidth == GRHIVariableRateShadingImageTileMinHeight &&
|
|
GRHIVariableRateShadingImageTileMinWidth == GRHIVariableRateShadingImageTileMaxWidth &&
|
|
GRHIVariableRateShadingImageTileMinWidth == GRHIVariableRateShadingImageTileMaxHeight &&
|
|
FMath::IsPowerOfTwo(GRHIVariableRateShadingImageTileMinWidth)
|
|
);
|
|
}
|
|
|
|
uint32 TileSize = GVRSImageManager.GetSRITileSize(bUseSoftwareImage).X;
|
|
TileSizeBits = FMath::FloorLog2(TileSize);
|
|
}
|
|
|
|
return TileSizeBits;
|
|
}
|
|
|
|
static FRDGTextureRef GetShadingRateImage(FRDGBuilder& GraphBuilder, const FViewInfo& ViewInfo)
|
|
{
|
|
FRDGTextureRef ShadingRateImage = nullptr;
|
|
|
|
if (GetShadingRateTileSizeBits() != 0)
|
|
{
|
|
bool bUseSoftwareImage = GVRSImageManager.IsSoftwareVRSEnabledForFrame();
|
|
ShadingRateImage = GVRSImageManager.GetVariableRateShadingImage(GraphBuilder, ViewInfo, FVariableRateShadingImageManager::EVRSPassType::NaniteEmitGBufferPass, bUseSoftwareImage);
|
|
}
|
|
|
|
if (ShadingRateImage == nullptr)
|
|
{
|
|
const FRDGSystemTextures& SystemTextures = FRDGSystemTextures::Get(GraphBuilder);
|
|
ShadingRateImage = SystemTextures.Black;
|
|
}
|
|
|
|
return ShadingRateImage;
|
|
}
|
|
|
|
class FVisualizeClearTilesCS : public FNaniteGlobalShader
|
|
{
|
|
public:
|
|
DECLARE_GLOBAL_SHADER(FVisualizeClearTilesCS);
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER(FUint32Vector4, ViewRect)
|
|
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTextureMetadata, OutCMaskBuffer)
|
|
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D<uint>, OutVisualized)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
|
|
FVisualizeClearTilesCS() = default;
|
|
FVisualizeClearTilesCS(const ShaderMetaType::CompiledShaderInitializerType& Initializer)
|
|
: FNaniteGlobalShader(Initializer)
|
|
{
|
|
PlatformDataParam.Bind(Initializer.ParameterMap, TEXT("PlatformData"), SPF_Mandatory);
|
|
BindForLegacyShaderParameters<FParameters>(this, Initializer.PermutationId, Initializer.ParameterMap);
|
|
}
|
|
|
|
// Shader parameter structs don't have a way to push variable sized data yet. So the we use the old shader parameter API.
|
|
void SetParameters(FRHIBatchedShaderParameters& BatchedParameters, const void* PlatformDataPtr, uint32 PlatformDataSize)
|
|
{
|
|
BatchedParameters.SetShaderParameter(PlatformDataParam.GetBufferIndex(), PlatformDataParam.GetBaseIndex(), PlatformDataSize, PlatformDataPtr);
|
|
}
|
|
|
|
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
|
|
{
|
|
return RHISupportsRenderTargetWriteMask(Parameters.Platform) && DoesPlatformSupportNanite(Parameters.Platform);
|
|
}
|
|
|
|
private:
|
|
LAYOUT_FIELD(FShaderParameter, PlatformDataParam);
|
|
};
|
|
IMPLEMENT_GLOBAL_SHADER(FVisualizeClearTilesCS, "/Engine/Private/Nanite/NaniteFastClear.usf", "VisualizeClearTilesCS", SF_Compute);
|
|
|
|
class FShadingBinBuildCS : public FNaniteGlobalShader
|
|
{
|
|
DECLARE_GLOBAL_SHADER(FShadingBinBuildCS);
|
|
|
|
class FBuildPassDim : SHADER_PERMUTATION_SPARSE_INT("SHADING_BIN_PASS", NANITE_SHADING_BIN_COUNT, NANITE_SHADING_BIN_SCATTER);
|
|
class FTechniqueDim : SHADER_PERMUTATION_INT("BINNING_TECHNIQUE", 2);
|
|
class FGatherStatsDim : SHADER_PERMUTATION_BOOL("GATHER_STATS");
|
|
class FVariableRateDim : SHADER_PERMUTATION_BOOL("VARIABLE_SHADING_RATE");
|
|
class FOptimizeWriteMaskDim : SHADER_PERMUTATION_BOOL("OPTIMIZE_WRITE_MASK");
|
|
class FNumExports : SHADER_PERMUTATION_RANGE_INT("NUM_EXPORTS", 1, MaxSimultaneousRenderTargets);
|
|
using FPermutationDomain = TShaderPermutationDomain<FBuildPassDim, FTechniqueDim, FGatherStatsDim, FVariableRateDim, FOptimizeWriteMaskDim, FNumExports>;
|
|
|
|
FShadingBinBuildCS() = default;
|
|
FShadingBinBuildCS(const ShaderMetaType::CompiledShaderInitializerType & Initializer)
|
|
: FNaniteGlobalShader(Initializer)
|
|
{
|
|
PlatformDataParam.Bind(Initializer.ParameterMap, TEXT("PlatformData"), SPF_Optional);
|
|
SubTileMatchParam.Bind(Initializer.ParameterMap, TEXT("SubTileMatch"), SPF_Optional);
|
|
BindForLegacyShaderParameters<FParameters>(this, Initializer.PermutationId, Initializer.ParameterMap);
|
|
}
|
|
|
|
// Shader parameter structs don't have a way to push variable sized data yet. So the we use the old shader parameter API.
|
|
void SetParameters(FRHIBatchedShaderParameters& BatchedParameters, const void* PlatformDataPtr, uint32 PlatformDataSize, bool bSubTileMatch)
|
|
{
|
|
BatchedParameters.SetShaderParameter(PlatformDataParam.GetBufferIndex(), PlatformDataParam.GetBaseIndex(), PlatformDataSize, PlatformDataPtr);
|
|
|
|
uint32 SubTileMatch = bSubTileMatch ? 1u : 0u;
|
|
BatchedParameters.SetShaderParameter(SubTileMatchParam.GetBufferIndex(), SubTileMatchParam.GetBaseIndex(), sizeof(SubTileMatch), &SubTileMatch);
|
|
}
|
|
|
|
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
|
|
{
|
|
if (!DoesPlatformSupportNanite(Parameters.Platform))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
FPermutationDomain PermutationVector(Parameters.PermutationId);
|
|
if (PermutationVector.Get<FOptimizeWriteMaskDim>() && !RHISupportsRenderTargetWriteMask(Parameters.Platform))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (PermutationVector.Get<FOptimizeWriteMaskDim>() && PermutationVector.Get<FBuildPassDim>() != NANITE_SHADING_BIN_COUNT)
|
|
{
|
|
// We only want one of the build passes to export out cmask, so we choose the
|
|
// counting pass because it touches less memory already than scatter.
|
|
return false;
|
|
}
|
|
|
|
if (!PermutationVector.Get<FOptimizeWriteMaskDim>() && PermutationVector.Get<FNumExports>() > 1)
|
|
{
|
|
// The NUM_EXPORTS perm is only valid when optimizing the write mask.
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
|
|
{
|
|
FNaniteGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
|
|
}
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER(FUint32Vector4, ViewRect)
|
|
SHADER_PARAMETER(uint32, ValidWriteMask)
|
|
SHADER_PARAMETER(FUint32Vector2, DispatchOffsetTL)
|
|
SHADER_PARAMETER(uint32, ShadingBinCount)
|
|
SHADER_PARAMETER(uint32, ShadingBinDataByteOffset)
|
|
SHADER_PARAMETER(uint32, ShadingRateTileSizeBits)
|
|
SHADER_PARAMETER(uint32, DummyZero)
|
|
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<uint>, ShadingRateImage)
|
|
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<uint>, ShadingMask)
|
|
SHADER_PARAMETER_SAMPLER(SamplerState, ShadingMaskSampler)
|
|
SHADER_PARAMETER_RDG_TEXTURE_UAV_ARRAY(RWTextureMetadata, OutCMaskBuffer, [MaxSimultaneousRenderTargets])
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<FNaniteShadingBinStats>, OutShadingBinStats)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, OutShadingBinData)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, OutShadingBinArgs)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<FNaniteShadingBinScatterMeta>, OutShadingBinScatterMeta)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
|
|
private:
|
|
LAYOUT_FIELD(FShaderParameter, PlatformDataParam);
|
|
LAYOUT_FIELD(FShaderParameter, SubTileMatchParam);
|
|
};
|
|
IMPLEMENT_GLOBAL_SHADER(FShadingBinBuildCS, "/Engine/Private/Nanite/NaniteShadeBinning.usf", "ShadingBinBuildCS", SF_Compute);
|
|
|
|
class FShadingBinReserveCS : public FNaniteGlobalShader
|
|
{
|
|
DECLARE_GLOBAL_SHADER(FShadingBinReserveCS);
|
|
SHADER_USE_PARAMETER_STRUCT(FShadingBinReserveCS, FNaniteGlobalShader);
|
|
|
|
class FGatherStatsDim : SHADER_PERMUTATION_BOOL("GATHER_STATS");
|
|
using FPermutationDomain = TShaderPermutationDomain<FGatherStatsDim>;
|
|
|
|
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
|
|
{
|
|
return DoesPlatformSupportNanite(Parameters.Platform);
|
|
}
|
|
|
|
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
|
|
{
|
|
FNaniteGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
|
|
OutEnvironment.SetDefine(TEXT("SHADING_BIN_PASS"), NANITE_SHADING_BIN_RESERVE);
|
|
}
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER(uint32, ShadingBinCount)
|
|
SHADER_PARAMETER(uint32, ShadingBinDataByteOffset)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<FNaniteShadingBinStats>, OutShadingBinStats)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, OutShadingBinData)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<uint>, OutShadingBinAllocator)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, OutShadingBinArgs)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<FNaniteShadingBinScatterMeta>, OutShadingBinScatterMeta)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
};
|
|
IMPLEMENT_GLOBAL_SHADER(FShadingBinReserveCS, "/Engine/Private/Nanite/NaniteShadeBinning.usf", "ShadingBinReserveCS", SF_Compute);
|
|
|
|
class FShadingBinValidateCS : public FNaniteGlobalShader
|
|
{
|
|
DECLARE_GLOBAL_SHADER(FShadingBinValidateCS);
|
|
SHADER_USE_PARAMETER_STRUCT(FShadingBinValidateCS, FNaniteGlobalShader);
|
|
|
|
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
|
|
{
|
|
return DoesPlatformSupportNanite(Parameters.Platform);
|
|
}
|
|
|
|
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
|
|
{
|
|
FNaniteGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
|
|
OutEnvironment.SetDefine(TEXT("SHADING_BIN_PASS"), NANITE_SHADING_BIN_VALIDATE);
|
|
}
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
|
|
SHADER_PARAMETER(uint32, ShadingBinCount)
|
|
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, OutShadingBinData)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
};
|
|
IMPLEMENT_GLOBAL_SHADER(FShadingBinValidateCS, "/Engine/Private/Nanite/NaniteShadeBinning.usf", "ShadingBinValidateCS", SF_Compute);
|
|
|
|
IMPLEMENT_UNIFORM_BUFFER_STRUCT_EX(FComputeShadingOutputs, "ComputeShadingOutputs", FShaderParametersMetadata::EUsageFlags::NeedsReflectedMembers|FShaderParametersMetadata::EUsageFlags::ManuallyBoundByPass);
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FNaniteShadingPassParameters, )
|
|
RDG_BUFFER_ACCESS(ShadingBinArgs, ERHIAccess::IndirectArgs)
|
|
|
|
SHADER_PARAMETER_STRUCT_INCLUDE(FViewShaderParameters, View) // To access VTFeedbackBuffer
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FNaniteRasterUniformParameters, NaniteRaster)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FNaniteShadingUniformParameters, NaniteShading)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FOpaqueBasePassUniformParameters, BasePass)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FLumenCardPassUniformParameters, CardPass)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FComputeShadingOutputs, ComputeShadingOutputs)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
|
|
namespace Nanite
|
|
{
|
|
|
|
bool HasNoDerivativeOps(FRHIComputeShader* ComputeShaderRHI)
|
|
{
|
|
if (GNaniteShadeBinningMode == 1)
|
|
{
|
|
return true;
|
|
}
|
|
else if (GNaniteShadeBinningMode == 2)
|
|
{
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
return ComputeShaderRHI ? ComputeShaderRHI->HasNoDerivativeOps() : false;
|
|
}
|
|
}
|
|
|
|
void BuildShadingCommands(FRDGBuilder& GraphBuilder, FScene& Scene, ENaniteMeshPass::Type MeshPass, FNaniteShadingCommands& ShadingCommands, EBuildShadingCommandsMode Mode)
|
|
{
|
|
FNaniteShadingPipelines& ShadingPipelines = Scene.NaniteShadingPipelines[MeshPass];
|
|
if (ShadingPipelines.bBuildCommands || Mode == EBuildShadingCommandsMode::Custom)
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::BuildShadingCommands);
|
|
const auto& Pipelines = ShadingPipelines.GetShadingPipelineMap();
|
|
const EShaderPlatform ShaderPlatform = Scene.GetShaderPlatform();
|
|
|
|
ShadingCommands.SetupTask = GraphBuilder.AddSetupTask([&ShadingCommands, &Pipelines, ShaderPlatform]
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::BuildShadingCommandsMetadata);
|
|
ShadingCommands.MaxShadingBin = 0u;
|
|
ShadingCommands.BoundTargetMask = 0x0u;
|
|
ShadingCommands.NumCommands = Pipelines.Num();
|
|
|
|
for (const auto& Iter : Pipelines)
|
|
{
|
|
const FNaniteShadingEntry& Entry = Iter.Value;
|
|
ShadingCommands.MaxShadingBin = FMath::Max<uint32>(ShadingCommands.MaxShadingBin, uint32(Entry.BinIndex));
|
|
ShadingCommands.BoundTargetMask |= Entry.ShadingPipeline->BoundTargetMask;
|
|
}
|
|
|
|
ShadingCommands.MetaBufferData.SetNumZeroed(ShadingCommands.MaxShadingBin + 1u);
|
|
|
|
for (const auto& Iter : Pipelines)
|
|
{
|
|
const FNaniteShadingEntry& Entry = Iter.Value;
|
|
FUintVector4& MetaEntry = ShadingCommands.MetaBufferData[Entry.BinIndex];
|
|
// Note: .XYZ are populated by the GPU during shade binning
|
|
MetaEntry.W = Entry.ShadingPipeline->MaterialBitFlags;
|
|
}
|
|
|
|
// Create Shader Bundle
|
|
if (UseShadingShaderBundle(ShaderPlatform) && ShadingCommands.NumCommands > 0)
|
|
{
|
|
FShaderBundleCreateInfo CreateInfo;
|
|
CreateInfo.ArgOffset = 0u;
|
|
CreateInfo.ArgStride = 16u;
|
|
CreateInfo.NumRecords = ShadingCommands.MaxShadingBin + 1u;
|
|
CreateInfo.Mode = ERHIShaderBundleMode::CS;
|
|
ShadingCommands.ShaderBundle = RHICreateShaderBundle(CreateInfo);
|
|
check(ShadingCommands.ShaderBundle != nullptr);
|
|
}
|
|
else
|
|
{
|
|
ShadingCommands.ShaderBundle = nullptr;
|
|
}
|
|
});
|
|
|
|
ShadingCommands.BuildCommandsTask = GraphBuilder.AddSetupTask([&Pipelines, &Commands = ShadingCommands.Commands, &CommandLookup = ShadingCommands.CommandLookup]
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::BuildShadingCommandsTask);
|
|
Commands.Reset();
|
|
Commands.Reserve(Pipelines.Num());
|
|
|
|
uint32 MaxShadingBin = 0;
|
|
|
|
for (const auto& Iter : Pipelines)
|
|
{
|
|
FNaniteShadingCommand& ShadingCommand = Commands.AddDefaulted_GetRef();
|
|
const FNaniteShadingEntry& Entry = Iter.Value;
|
|
ShadingCommand.Pipeline = Entry.ShadingPipeline;
|
|
ShadingCommand.ShadingBin = Entry.BinIndex;
|
|
|
|
MaxShadingBin = FMath::Max<uint32>(MaxShadingBin, uint32(Entry.BinIndex));
|
|
}
|
|
|
|
CommandLookup.SetNumZeroed(MaxShadingBin + 1);
|
|
|
|
if (GNaniteComputeMaterialsSort != 0)
|
|
{
|
|
Commands.Sort([](auto& A, auto& B)
|
|
{
|
|
const FNaniteShadingPipeline& PipelineA = *A.Pipeline.Get();
|
|
const FNaniteShadingPipeline& PipelineB = *B.Pipeline.Get();
|
|
|
|
// First group all shaders with the same bound target mask (UAV exports)
|
|
if (PipelineA.BoundTargetMask != PipelineB.BoundTargetMask)
|
|
{
|
|
return PipelineA.BoundTargetMask < PipelineB.BoundTargetMask;
|
|
}
|
|
|
|
// Then group up all shading bins using same shader but different bindings
|
|
if (PipelineA.ComputeShader != PipelineB.ComputeShader)
|
|
{
|
|
return PipelineA.ComputeShader < PipelineB.ComputeShader;
|
|
}
|
|
|
|
// Sort indirect arg memory location in ascending order to help minimize cache misses on the indirect args
|
|
return A.ShadingBin < B.ShadingBin;
|
|
});
|
|
}
|
|
|
|
for (int32 CommandIndex = 0; CommandIndex < Commands.Num(); ++CommandIndex)
|
|
{
|
|
const FNaniteShadingCommand& ShadingCommand = Commands[CommandIndex];
|
|
CommandLookup[ShadingCommand.ShadingBin] = CommandIndex;
|
|
}
|
|
|
|
}, ShadingCommands.SetupTask);
|
|
|
|
if (Mode == EBuildShadingCommandsMode::Default)
|
|
{
|
|
ShadingPipelines.bBuildCommands = false;
|
|
|
|
if (auto MaterialsExtension = Scene.GetExtensionPtr<Nanite::FMaterialsSceneExtension>())
|
|
{
|
|
MaterialsExtension->PostBuildNaniteShadingCommands(GraphBuilder, ShadingCommands.BuildCommandsTask, MeshPass);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32 PackMaterialBitFlags(const FMaterial& Material, uint32 BoundTargetMask, bool bNoDerivativeOps)
|
|
{
|
|
const bool bMaterialHasProgrammableVertexUVs = Material.HasVertexInterpolator() || Material.GetNumCustomizedUVs() > 0;
|
|
|
|
FNaniteMaterialFlags Flags = { 0 };
|
|
Flags.bPixelDiscard = Material.IsMasked();
|
|
Flags.bPixelDepthOffset = Material.MaterialUsesPixelDepthOffset_RenderThread();
|
|
Flags.bWorldPositionOffset = Material.MaterialUsesWorldPositionOffset_RenderThread();
|
|
Flags.bAllowVRS = Material.IsVariableRateShadingAllowed();
|
|
Flags.bDisplacement = UseNaniteTessellation() && Material.MaterialUsesDisplacement_RenderThread();
|
|
Flags.bNoDerivativeOps = bNoDerivativeOps;
|
|
Flags.bTwoSided = Material.IsTwoSided();
|
|
|
|
const bool bPixelProgrammable = IsNaniteMaterialPixelProgrammable(Flags);
|
|
Flags.bVertexUVs = bMaterialHasProgrammableVertexUVs && bPixelProgrammable;
|
|
|
|
const uint32 PackedFlags = PackNaniteMaterialBitFlags(Flags);
|
|
return ((BoundTargetMask & 0xFFu) << 24u) | (PackedFlags & 0x00FFFFFFu);
|
|
}
|
|
|
|
bool LoadBasePassPipeline(
|
|
const FScene& Scene,
|
|
FSceneProxyBase* SceneProxy,
|
|
FSceneProxyBase::FMaterialSection& Section,
|
|
FNaniteShadingPipeline& ShadingPipeline
|
|
)
|
|
{
|
|
static const bool bAllowStaticLighting = IsStaticLightingAllowed();
|
|
|
|
const ERHIFeatureLevel::Type FeatureLevel = Scene.GetFeatureLevel();
|
|
|
|
FNaniteVertexFactory* NaniteVertexFactory = Nanite::GVertexFactoryResource.GetVertexFactory();
|
|
FVertexFactoryType* NaniteVertexFactoryType = NaniteVertexFactory->GetType();
|
|
|
|
const FMaterialRenderProxy* MaterialProxy = Section.ShadingMaterialProxy;
|
|
while (MaterialProxy)
|
|
{
|
|
const FMaterial* Material = MaterialProxy->GetMaterialNoFallback(FeatureLevel);
|
|
if (Material)
|
|
{
|
|
break;
|
|
}
|
|
MaterialProxy = MaterialProxy->GetFallback(FeatureLevel);
|
|
}
|
|
|
|
check(MaterialProxy);
|
|
|
|
ELightMapPolicyType LightMapPolicyType = ELightMapPolicyType::LMP_NO_LIGHTMAP;
|
|
|
|
FLightCacheInterface* LightCacheInterface = nullptr;
|
|
if (bAllowStaticLighting)
|
|
{
|
|
FPrimitiveSceneProxy::FLCIArray LCIs;
|
|
SceneProxy->GetLCIs(LCIs);
|
|
|
|
// We expect a Nanite scene proxy can only ever have a single LCI, or none in cases like skeletal meshes
|
|
check(LCIs.Num() <= 1u);
|
|
if (LCIs.Num() == 1u)
|
|
{
|
|
LightCacheInterface = LCIs[0];
|
|
}
|
|
}
|
|
|
|
bool bRenderSkylight = false;
|
|
|
|
const bool bUseWorkGraphShaders = UseWorkGraphForShadingBundles(Scene.GetShaderPlatform());
|
|
TShaderRef<TBasePassComputeShaderPolicyParamType<FUniformLightMapPolicy>> BasePassShader;
|
|
|
|
auto LoadShadingMaterial = [&](const FMaterialRenderProxy* MaterialProxyPtr)
|
|
{
|
|
const FMaterial& ShadingMaterial = MaterialProxy->GetIncompleteMaterialWithFallback(FeatureLevel);
|
|
check(Nanite::IsSupportedMaterialDomain(ShadingMaterial.GetMaterialDomain()));
|
|
check(Nanite::IsSupportedBlendMode(ShadingMaterial));
|
|
|
|
const FMaterialShadingModelField ShadingModels = ShadingMaterial.GetShadingModels();
|
|
bRenderSkylight = Scene.ShouldRenderSkylightInBasePass(IsTranslucentBlendMode(ShadingMaterial.GetBlendMode())) && ShadingModels != MSM_Unlit;
|
|
|
|
if (LightCacheInterface)
|
|
{
|
|
LightMapPolicyType = FBasePassMeshProcessor::GetUniformLightMapPolicyType(FeatureLevel, &Scene, LightCacheInterface, SceneProxy, ShadingMaterial);
|
|
}
|
|
|
|
bool bShadersValid = GetBasePassShader<FUniformLightMapPolicy>(
|
|
ShadingMaterial,
|
|
NaniteVertexFactoryType,
|
|
FUniformLightMapPolicy(LightMapPolicyType),
|
|
FeatureLevel,
|
|
bRenderSkylight,
|
|
Scene.RequiresDebugMaterials(),
|
|
bUseWorkGraphShaders ? SF_WorkGraphComputeNode : SF_Compute,
|
|
&BasePassShader
|
|
);
|
|
|
|
return bShadersValid;
|
|
};
|
|
|
|
bool bLoaded = LoadShadingMaterial(MaterialProxy);
|
|
if (!bLoaded)
|
|
{
|
|
MaterialProxy = UMaterial::GetDefaultMaterial(MD_Surface)->GetRenderProxy();
|
|
bLoaded = LoadShadingMaterial(MaterialProxy);
|
|
}
|
|
|
|
if (bLoaded)
|
|
{
|
|
ShadingPipeline.MaterialProxy = MaterialProxy;
|
|
ShadingPipeline.Material = MaterialProxy->GetMaterialNoFallback(FeatureLevel);
|
|
ShadingPipeline.BoundTargetMask = BasePassShader->GetBoundTargetMask();
|
|
ShadingPipeline.ComputeShader = bUseWorkGraphShaders ? nullptr : BasePassShader.GetComputeShader();
|
|
ShadingPipeline.WorkGraphShader = bUseWorkGraphShaders ? BasePassShader.GetWorkGraphShader() : nullptr;
|
|
ShadingPipeline.bIsTwoSided = !!Section.MaterialRelevance.bTwoSided;
|
|
ShadingPipeline.bIsMasked = !!Section.MaterialRelevance.bMasked;
|
|
ShadingPipeline.bNoDerivativeOps = HasNoDerivativeOps(ShadingPipeline.ComputeShader);
|
|
ShadingPipeline.MaterialBitFlags = PackMaterialBitFlags(*ShadingPipeline.Material, ShadingPipeline.BoundTargetMask, ShadingPipeline.bNoDerivativeOps);
|
|
|
|
ShadingPipeline.BasePassData = MakePimpl<FNaniteBasePassData, EPimplPtrMode::DeepCopy>();
|
|
ShadingPipeline.BasePassData->TypedShader = BasePassShader;
|
|
|
|
#if WITH_DEBUG_VIEW_MODES
|
|
ShadingPipeline.InstructionCount = BasePassShader->GetNumInstructions();
|
|
ShadingPipeline.LWCComplexity = 0;
|
|
#if WITH_EDITOR
|
|
FMaterialShaderMap* MaterialShaderMap = ShadingPipeline.Material->GetRenderingThreadShaderMap();
|
|
if (ensure(MaterialShaderMap))
|
|
{
|
|
uint32 LWCComplexityVS = 0;
|
|
uint32 LWCComplexityPS = 0;
|
|
uint32 LWCComplexityCS = 0;
|
|
|
|
MaterialShaderMap->GetEstimatedLWCFuncUsageComplexity(LWCComplexityVS, LWCComplexityPS, LWCComplexityCS);
|
|
|
|
// Set minimum complexity to 1, to differentiate between 0 cost and missing data
|
|
ShadingPipeline.LWCComplexity = static_cast<uint16>(FMath::Clamp(LWCComplexityCS++, 1, TNumericLimits<uint16>::Max()));
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
TBasePassShaderElementData<FUniformLightMapPolicy> ShaderElementData(LightCacheInterface);
|
|
ShaderElementData.InitializeMeshMaterialData();
|
|
|
|
ShadingPipeline.ShaderBindings = MakePimpl<FMeshDrawShaderBindings, EPimplPtrMode::DeepCopy>();
|
|
|
|
UE::MeshPassUtils::SetupComputeBindings(BasePassShader, &Scene, FeatureLevel, SceneProxy, *MaterialProxy, *ShadingPipeline.Material, ShaderElementData, *ShadingPipeline.ShaderBindings);
|
|
|
|
ShadingPipeline.ShaderBindingsHash = ShadingPipeline.ShaderBindings->GetDynamicInstancingHash();
|
|
}
|
|
|
|
return bLoaded;
|
|
}
|
|
|
|
struct FShadingConfig
|
|
{
|
|
uint8 bBundleShading : 1;
|
|
uint8 bBundleEmulation : 1;
|
|
uint8 bHighPrecision : 1;
|
|
uint8 bShowDrawEvents : 1;
|
|
};
|
|
|
|
inline void RecordShadingParameters(
|
|
FRHIBatchedShaderParameters& BatchedParameters,
|
|
FNaniteShadingCommand& ShadingCommand,
|
|
const FShadingConfig& ShadingConfig,
|
|
const uint32 DataByteOffset,
|
|
const FUint32Vector4& ViewRect,
|
|
TUniformBufferRef<FComputeShadingOutputs> OutputTargetsBuffer
|
|
)
|
|
{
|
|
const bool bNoDerivativeOps = !!ShadingCommand.Pipeline->bNoDerivativeOps;
|
|
|
|
ShadingCommand.PassData.X = ShadingCommand.ShadingBin; // Active Shading Bin
|
|
ShadingCommand.PassData.Y = bNoDerivativeOps ? 0 /* Pixel Binning */ : 1 /* Quad Binning */;
|
|
ShadingCommand.PassData.Z = ShadingConfig.bHighPrecision ? 1 : 0;
|
|
ShadingCommand.PassData.W = DataByteOffset;
|
|
|
|
ShadingCommand.Pipeline->ShaderBindings->SetParameters(BatchedParameters);
|
|
|
|
if (ShadingCommand.Pipeline->ComputeShader || ShadingCommand.Pipeline->WorkGraphShader)
|
|
{
|
|
ShadingCommand.Pipeline->BasePassData->TypedShader->SetPassParameters(
|
|
BatchedParameters,
|
|
ViewRect,
|
|
ShadingCommand.PassData,
|
|
OutputTargetsBuffer.GetReference()
|
|
);
|
|
}
|
|
}
|
|
|
|
inline void RecordShadingCommand(
|
|
FRHIComputeCommandList& RHICmdList,
|
|
FRHIBuffer* IndirectArgsBuffer,
|
|
const uint32 IndirectArgStride,
|
|
const FShadingConfig& ShadingConfig,
|
|
FRHIBatchedShaderParameters& ShadingParameters,
|
|
FNaniteShadingCommand& ShadingCommand
|
|
)
|
|
{
|
|
#if WANTS_DRAW_MESH_EVENTS
|
|
SCOPED_CONDITIONAL_DRAW_EVENTF(RHICmdList, SWShading, !!ShadingConfig.bShowDrawEvents, TEXT("%s"), GetShadingMaterialName(ShadingCommand.Pipeline->MaterialProxy));
|
|
#endif
|
|
|
|
const uint32 IndirectOffset = (ShadingCommand.ShadingBin * IndirectArgStride);
|
|
|
|
FRHIComputeShader* ComputeShaderRHI = ShadingCommand.Pipeline->ComputeShader;
|
|
SetComputePipelineState(RHICmdList, ComputeShaderRHI);
|
|
|
|
if (GRHISupportsShaderRootConstants)
|
|
{
|
|
RHICmdList.SetShaderRootConstants(ShadingCommand.PassData);
|
|
}
|
|
|
|
RHICmdList.SetBatchedShaderParameters(ComputeShaderRHI, ShadingParameters);
|
|
RHICmdList.DispatchIndirectComputeShader(IndirectArgsBuffer, IndirectOffset);
|
|
}
|
|
|
|
inline bool PrepareShadingCommand(FNaniteShadingCommand& ShadingCommand)
|
|
{
|
|
if (!PipelineStateCache::IsPSOPrecachingEnabled())
|
|
{
|
|
ShadingCommand.PSOPrecacheState = EPSOPrecacheResult::Unknown;
|
|
return true;
|
|
}
|
|
|
|
EPSOPrecacheResult PSOPrecacheResult = ShadingCommand.PSOPrecacheState;
|
|
bool bShouldCheckPrecacheResult = false;
|
|
|
|
// If PSO precache validation is on, we need to check the state for stats tracking purposes.
|
|
#if PSO_PRECACHING_VALIDATE
|
|
if (PSOCollectorStats::IsPrecachingValidationEnabled() && PSOPrecacheResult == EPSOPrecacheResult::Unknown)
|
|
{
|
|
bShouldCheckPrecacheResult = true;
|
|
}
|
|
#endif
|
|
|
|
// If we are skipping commands when the PSO is being precached but is not ready, we
|
|
// need to keep checking the state until it's not marked active anymore.
|
|
const bool bAllowSkip = true;
|
|
if (bAllowSkip && GSkipDrawOnPSOPrecaching)
|
|
{
|
|
if (PSOPrecacheResult == EPSOPrecacheResult::Unknown ||
|
|
PSOPrecacheResult == EPSOPrecacheResult::Active)
|
|
{
|
|
bShouldCheckPrecacheResult = true;
|
|
}
|
|
}
|
|
|
|
if (bShouldCheckPrecacheResult)
|
|
{
|
|
// Cache the state so that it's only checked again if necessary.
|
|
PSOPrecacheResult = PipelineStateCache::CheckPipelineStateInCache(ShadingCommand.Pipeline->ComputeShader);
|
|
ShadingCommand.PSOPrecacheState = PSOPrecacheResult;
|
|
}
|
|
|
|
#if PSO_PRECACHING_VALIDATE
|
|
static int32 PSOCollectorIndex = FPSOCollectorCreateManager::GetIndex(EShadingPath::Deferred, TEXT("NaniteShading"));
|
|
PSOCollectorStats::CheckComputePipelineStateInCache(*ShadingCommand.Pipeline->ComputeShader, PSOPrecacheResult, ShadingCommand.Pipeline->MaterialProxy, PSOCollectorIndex);
|
|
#endif
|
|
|
|
// Try and skip draw if the PSO is not precached yet.
|
|
const bool bSkipped = (bAllowSkip && GSkipDrawOnPSOPrecaching && PSOPrecacheResult == EPSOPrecacheResult::Active);
|
|
return !bSkipped;
|
|
}
|
|
|
|
struct FNaniteShadingPassIntermediates
|
|
{
|
|
TUniformBufferRef<FComputeShadingOutputs> ShadingOutputs;
|
|
TBitArray<SceneRenderingBitArrayAllocator> VisibilityData;
|
|
FRHIBuffer* IndirectArgsBuffer = nullptr;
|
|
FUint32Vector4 ViewRect;
|
|
};
|
|
|
|
static TSharedPtr<FNaniteShadingPassIntermediates> CreateNaniteShadingPassIntermediates(
|
|
const FNaniteShadingPassParameters* ShadingPassParameters,
|
|
const FNaniteShadingCommands& ShadingCommands,
|
|
const FNaniteVisibilityQuery* VisibilityQuery,
|
|
FIntRect ViewRect)
|
|
{
|
|
// This is processed within the RDG pass lambda, so the setup task should be complete by now.
|
|
check(ShadingCommands.BuildCommandsTask.IsCompleted());
|
|
|
|
TSharedPtr<FNaniteShadingPassIntermediates> Intermediates = MakeShared<FNaniteShadingPassIntermediates>();
|
|
|
|
ShadingPassParameters->ShadingBinArgs->MarkResourceAsUsed();
|
|
Intermediates->IndirectArgsBuffer = ShadingPassParameters->ShadingBinArgs->GetIndirectRHICallBuffer();
|
|
|
|
const auto GetOutputTargetRHI = [](const FRDGTextureUAVRef OutputTarget)
|
|
{
|
|
FRHIUnorderedAccessView* OutputTargetRHI = nullptr;
|
|
if (OutputTarget != nullptr)
|
|
{
|
|
OutputTarget->MarkResourceAsUsed();
|
|
OutputTargetRHI = OutputTarget->GetRHI();
|
|
}
|
|
return OutputTargetRHI;
|
|
};
|
|
|
|
const FNaniteVisibilityResults* VisibilityResults = Nanite::GetVisibilityResults(VisibilityQuery);
|
|
|
|
TSharedPtr<TBitArray<SceneRenderingBitArrayAllocator>> VisibilityData;
|
|
if (VisibilityResults && VisibilityResults->IsShadingTestValid())
|
|
{
|
|
Intermediates->VisibilityData = VisibilityResults->GetShadingBinVisibility();
|
|
}
|
|
|
|
TRDGUniformBufferRef<FComputeShadingOutputs> ShadingOutputs = ShadingPassParameters->ComputeShadingOutputs.GetUniformBuffer();
|
|
Intermediates->ShadingOutputs = ShadingOutputs->GetRHIRef();
|
|
|
|
Intermediates->ViewRect = FUint32Vector4(
|
|
(uint32)ViewRect.Min.X,
|
|
(uint32)ViewRect.Min.Y,
|
|
(uint32)ViewRect.Max.X,
|
|
(uint32)ViewRect.Max.Y
|
|
);
|
|
|
|
return Intermediates;
|
|
};
|
|
|
|
static void DispatchComputeShaderBundle(
|
|
FRHIComputeCommandList& RHICmdList,
|
|
FNaniteShadingCommands& ShadingCommands,
|
|
const FShadingConfig& ShadingConfig,
|
|
const FShaderBundleRHIRef& ShaderBundle,
|
|
const FNaniteShadingPassIntermediates& Intermediates,
|
|
uint32 DataByteOffset,
|
|
EParallelForFlags ParallelForFlags = EParallelForFlags::None)
|
|
{
|
|
RHICmdList.DispatchComputeShaderBundle([&](FRHICommandDispatchComputeShaderBundle& Command)
|
|
{
|
|
Command.ShaderBundle = ShaderBundle;
|
|
Command.bEmulated = ShadingConfig.bBundleEmulation;
|
|
Command.RecordArgBuffer = Intermediates.IndirectArgsBuffer;
|
|
Command.Dispatches.SetNum(ShaderBundle->NumRecords);
|
|
|
|
std::atomic<uint32> PendingPSOs{ 0u };
|
|
|
|
TArray<FRHIBatchedShaderParametersAllocator*, SceneRenderingAllocator> Allocators;
|
|
|
|
ParallelForWithTaskContext(TEXT("RecordShadingCommands"), Allocators, ShadingCommands.Commands.Num(), 1,
|
|
[&] (int32, int32)
|
|
{
|
|
// Use the large page size for the allocator to reduce allocations
|
|
return RHICmdList.CreateBatchedShaderParameterAllocator(ERHIBatchedShaderParameterAllocatorPageSize::Large);
|
|
},
|
|
[&](FRHIBatchedShaderParametersAllocator* ParameterAllocator, int32 CommandIndex)
|
|
{
|
|
FNaniteShadingCommand& ShadingCommand = ShadingCommands.Commands[CommandIndex];
|
|
ShadingCommand.bVisible = Intermediates.VisibilityData.IsEmpty() || Intermediates.VisibilityData.AccessCorrespondingBit(FRelativeBitReference(ShadingCommand.ShadingBin));
|
|
|
|
if (ShadingCommand.bVisible && PrepareShadingCommand(ShadingCommand))
|
|
{
|
|
FRHIShaderBundleComputeDispatch& Dispatch = Command.Dispatches[ShadingCommand.ShadingBin];
|
|
|
|
Dispatch.RecordIndex = ShadingCommand.ShadingBin;
|
|
Dispatch.Parameters.Emplace(*ParameterAllocator);
|
|
RecordShadingParameters(*Dispatch.Parameters, ShadingCommand, ShadingConfig, DataByteOffset, Intermediates.ViewRect, Intermediates.ShadingOutputs);
|
|
Dispatch.Parameters->Finish();
|
|
Dispatch.Shader = ShadingCommand.Pipeline->ComputeShader;
|
|
Dispatch.WorkGraphShader = ShadingCommand.Pipeline->WorkGraphShader;
|
|
Dispatch.Constants = ShadingCommand.PassData;
|
|
Dispatch.PipelineState = Dispatch.Shader ? FindComputePipelineState(Dispatch.Shader) : nullptr;
|
|
|
|
if (Dispatch.Shader)
|
|
{
|
|
PendingPSOs.fetch_add(1u, std::memory_order_relaxed);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// TODO: Optimization: Send partial dispatch lists, but for now we'll leave the record index invalid so bundle dispatch skips it
|
|
Command.Dispatches[ShadingCommand.ShadingBin].RecordIndex = ~uint32(0u);
|
|
}
|
|
}
|
|
);
|
|
|
|
// Resolve invalid pipeline states
|
|
if (PendingPSOs.load(std::memory_order_relaxed) > 0)
|
|
{
|
|
for (FRHIShaderBundleComputeDispatch& Dispatch : Command.Dispatches)
|
|
{
|
|
if (!Dispatch.IsValid() || Dispatch.PipelineState != nullptr)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// If we don't have precaching, then GetComputePipelineState() might return a PipelineState that isn't ready.
|
|
const bool bSkipDraw = !PipelineStateCache::IsPSOPrecachingEnabled();
|
|
|
|
// This cache lookup cannot be parallelized due to the possibility of a fence insertion into the command list during a miss.
|
|
Dispatch.PipelineState = GetComputePipelineState(RHICmdList, Dispatch.Shader, !bSkipDraw);
|
|
|
|
if (bSkipDraw)
|
|
{
|
|
Dispatch.RecordIndex = ~uint32(0u);
|
|
continue;
|
|
}
|
|
|
|
if (Dispatch.Shader && RHICmdList.Bypass())
|
|
{
|
|
Dispatch.RHIPipeline = ExecuteSetComputePipelineState(Dispatch.PipelineState);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
FNaniteShadingPassParameters CreateNaniteShadingPassParams(
|
|
FRDGBuilder& GraphBuilder,
|
|
const FSceneRenderer& SceneRenderer,
|
|
const FSceneTextures& SceneTextures,
|
|
const FDBufferTextures& DBufferTextures,
|
|
const FViewInfo& View,
|
|
const FIntRect ViewRect,
|
|
const FRasterResults& RasterResults,
|
|
FRDGTextureRef ShadingMask,
|
|
FRDGTextureRef VisBuffer64,
|
|
FRDGTextureRef DbgBuffer64,
|
|
FRDGTextureRef DbgBuffer32,
|
|
FRDGBufferRef VisibleClustersSWHW,
|
|
FRDGBufferRef MultiViewIndices,
|
|
FRDGBufferRef MultiViewRectScaleOffsets,
|
|
FRDGBufferRef ViewsBuffer,
|
|
const FRenderTargetBindingSlots& BasePassRenderTargets,
|
|
const uint32 BoundTargetMask,
|
|
const FShadeBinning& ShadeBinning
|
|
)
|
|
{
|
|
FNaniteShadingPassParameters Result;
|
|
|
|
Result.ShadingBinArgs = ShadeBinning.ShadingBinArgs;
|
|
|
|
// NaniteRaster Uniform Buffer
|
|
{
|
|
FNaniteRasterUniformParameters* UniformParameters = GraphBuilder.AllocParameters<FNaniteRasterUniformParameters>();
|
|
UniformParameters->PageConstants = RasterResults.PageConstants;
|
|
UniformParameters->MaxNodes = RasterResults.MaxNodes;
|
|
UniformParameters->MaxVisibleClusters = RasterResults.MaxVisibleClusters;
|
|
UniformParameters->MaxCandidatePatches = RasterResults.MaxCandidatePatches;
|
|
UniformParameters->MaxPatchesPerGroup = RasterResults.MaxPatchesPerGroup;
|
|
UniformParameters->MeshPass = RasterResults.MeshPass;
|
|
UniformParameters->InvDiceRate = RasterResults.InvDiceRate;
|
|
UniformParameters->RenderFlags = RasterResults.RenderFlags;
|
|
UniformParameters->DebugFlags = RasterResults.DebugFlags;
|
|
Result.NaniteRaster = GraphBuilder.CreateUniformBuffer(UniformParameters);
|
|
}
|
|
|
|
// NaniteShading Uniform Buffer
|
|
{
|
|
FNaniteShadingUniformParameters* UniformParameters = GraphBuilder.AllocParameters<FNaniteShadingUniformParameters>();
|
|
|
|
UniformParameters->ClusterPageData = Nanite::GStreamingManager.GetClusterPageDataSRV(GraphBuilder);
|
|
UniformParameters->HierarchyBuffer = Nanite::GStreamingManager.GetHierarchySRV(GraphBuilder);
|
|
UniformParameters->VisibleClustersSWHW = GraphBuilder.CreateSRV(VisibleClustersSWHW);
|
|
|
|
UniformParameters->VisBuffer64 = VisBuffer64;
|
|
UniformParameters->DbgBuffer64 = DbgBuffer64;
|
|
UniformParameters->DbgBuffer32 = DbgBuffer32;
|
|
|
|
UniformParameters->ShadingMask = ShadingMask;
|
|
|
|
UniformParameters->MultiViewEnabled = 0;
|
|
UniformParameters->MultiViewIndices = GraphBuilder.CreateSRV(MultiViewIndices);
|
|
UniformParameters->MultiViewRectScaleOffsets = GraphBuilder.CreateSRV(MultiViewRectScaleOffsets);
|
|
UniformParameters->InViews = GraphBuilder.CreateSRV(ViewsBuffer);
|
|
|
|
UniformParameters->ShadingBinData = GraphBuilder.CreateSRV(ShadeBinning.ShadingBinData);
|
|
|
|
Result.NaniteShading = GraphBuilder.CreateUniformBuffer(UniformParameters);
|
|
}
|
|
|
|
Result.View = View.GetShaderParameters(); // To get VTFeedbackBuffer
|
|
Result.Scene = View.GetSceneUniforms().GetBuffer(GraphBuilder);
|
|
const bool bLumenGIEnabled = SceneRenderer.IsLumenGIEnabled(View);
|
|
Result.BasePass = CreateOpaqueBasePassUniformBuffer(GraphBuilder, View, 0, {}, DBufferTextures, bLumenGIEnabled);
|
|
|
|
FComputeShadingOutputs* ShadingOutputs = GraphBuilder.AllocParameters<FComputeShadingOutputs>();
|
|
|
|
// No possibility of read/write hazard due to fully resolved vbuffer/materials
|
|
const ERDGUnorderedAccessViewFlags OutTargetFlags = ERDGUnorderedAccessViewFlags::SkipBarrier;
|
|
|
|
FRDGTextureUAVRef DummyUAV{};
|
|
auto GetDummyUAV = [&DummyUAV, &GraphBuilder, OutTargetFlags]()
|
|
{
|
|
if (!DummyUAV)
|
|
{
|
|
FRDGTextureDesc DummyDesc = FRDGTextureDesc::Create2D(
|
|
FIntPoint(1u, 1u),
|
|
PF_R32_UINT,
|
|
FClearValueBinding::Transparent,
|
|
TexCreate_ShaderResource | TexCreate_UAV
|
|
);
|
|
|
|
DummyUAV = GraphBuilder.CreateUAV(GraphBuilder.CreateTexture(DummyDesc, TEXT("Nanite.TargetDummy")), OutTargetFlags);
|
|
}
|
|
return DummyUAV;
|
|
};
|
|
|
|
if (Substrate::IsSubstrateEnabled())
|
|
{
|
|
ShadingOutputs->OutTargets = GraphBuilder.CreateUAV(SceneRenderer.Scene->SubstrateSceneData.MaterialTextureArray, OutTargetFlags);
|
|
ShadingOutputs->OutTopLayerTarget = GraphBuilder.CreateUAV(SceneRenderer.Scene->SubstrateSceneData.TopLayerTexture, OutTargetFlags);
|
|
}
|
|
else
|
|
{
|
|
ShadingOutputs->OutTargets = GetDummyUAV();
|
|
ShadingOutputs->OutTopLayerTarget = GetDummyUAV();
|
|
}
|
|
|
|
const bool bMaintainCompression = (GNaniteFastTileClear == 2) && RHISupportsRenderTargetWriteMask(GMaxRHIShaderPlatform);
|
|
|
|
FRDGTextureUAVRef* OutTargets[MaxSimultaneousRenderTargets] =
|
|
{
|
|
&ShadingOutputs->OutTarget0,
|
|
&ShadingOutputs->OutTarget1,
|
|
&ShadingOutputs->OutTarget2,
|
|
&ShadingOutputs->OutTarget3,
|
|
&ShadingOutputs->OutTarget4,
|
|
&ShadingOutputs->OutTarget5,
|
|
&ShadingOutputs->OutTarget6,
|
|
&ShadingOutputs->OutTarget7
|
|
};
|
|
|
|
for (uint32 TargetIndex = 0; TargetIndex < MaxSimultaneousRenderTargets; ++TargetIndex)
|
|
{
|
|
if (FRDGTexture* TargetTexture = BasePassRenderTargets.Output[TargetIndex].GetTexture())
|
|
{
|
|
if ((BoundTargetMask & (1u << TargetIndex)) == 0u)
|
|
{
|
|
*OutTargets[TargetIndex] = GetDummyUAV();
|
|
}
|
|
else if (bMaintainCompression)
|
|
{
|
|
*OutTargets[TargetIndex] = GraphBuilder.CreateUAV(FRDGTextureUAVDesc::CreateForMetaData(TargetTexture, ERDGTextureMetaDataAccess::PrimaryCompressed), OutTargetFlags);
|
|
}
|
|
else
|
|
{
|
|
*OutTargets[TargetIndex] = GraphBuilder.CreateUAV(TargetTexture, OutTargetFlags);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*OutTargets[TargetIndex] = GetDummyUAV();
|
|
}
|
|
}
|
|
|
|
Result.ComputeShadingOutputs = GraphBuilder.CreateUniformBuffer(ShadingOutputs);
|
|
|
|
return Result;
|
|
}
|
|
|
|
void DispatchBasePass(
|
|
FRDGBuilder& GraphBuilder,
|
|
FNaniteShadingCommands& ShadingCommands,
|
|
const FSceneRenderer& SceneRenderer,
|
|
const FSceneTextures& SceneTextures,
|
|
const FRenderTargetBindingSlots& BasePassRenderTargets,
|
|
const FDBufferTextures& DBufferTextures,
|
|
const FScene& Scene,
|
|
const FViewInfo& View,
|
|
const uint32 ViewIndex,
|
|
const FRasterResults& RasterResults
|
|
)
|
|
{
|
|
checkSlow(DoesPlatformSupportNanite(GMaxRHIShaderPlatform));
|
|
|
|
LLM_SCOPE_BYTAG(Nanite);
|
|
RDG_EVENT_SCOPE(GraphBuilder, "Nanite::BasePass");
|
|
SCOPED_NAMED_EVENT(DispatchBasePass, FColor::Emerald);
|
|
|
|
ShadingCommands.SetupTask.Wait();
|
|
|
|
const uint32 ShadingBinCount = ShadingCommands.NumCommands;
|
|
if (ShadingBinCount == 0u)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FShaderBundleRHIRef ShaderBundle = ShadingCommands.ShaderBundle;
|
|
|
|
const bool bDrawSceneViewsInOneNanitePass = ShouldDrawSceneViewsInOneNanitePass(View);
|
|
FIntRect ViewRect = bDrawSceneViewsInOneNanitePass ? View.GetFamilyViewRect() : View.ViewRect;
|
|
|
|
const int32 ViewWidth = ViewRect.Max.X - ViewRect.Min.X;
|
|
const int32 ViewHeight = ViewRect.Max.Y - ViewRect.Min.Y;
|
|
const FIntPoint ViewSize = FIntPoint(ViewWidth, ViewHeight);
|
|
|
|
const FRDGSystemTextures& SystemTextures = FRDGSystemTextures::Get(GraphBuilder);
|
|
|
|
FRDGTextureRef VisBuffer64 = RasterResults.VisBuffer64 ? RasterResults.VisBuffer64 : SystemTextures.Black;
|
|
FRDGTextureRef DbgBuffer64 = RasterResults.DbgBuffer64 ? RasterResults.DbgBuffer64 : SystemTextures.Black;
|
|
FRDGTextureRef DbgBuffer32 = RasterResults.DbgBuffer32 ? RasterResults.DbgBuffer32 : SystemTextures.Black;
|
|
|
|
FRDGBufferRef VisibleClustersSWHW = RasterResults.VisibleClustersSWHW;
|
|
|
|
const uint32 IndirectArgsStride = sizeof(FUint32Vector4);
|
|
|
|
FRDGBufferRef MultiViewIndices = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 1), TEXT("Nanite.DummyMultiViewIndices"));
|
|
FRDGBufferRef MultiViewRectScaleOffsets = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FVector4f), 1), TEXT("Nanite.DummyMultiViewRectScaleOffsets"));
|
|
FRDGBufferRef ViewsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FVector4f), 1), TEXT("Nanite.PackedViews"));
|
|
|
|
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(MultiViewIndices), 0);
|
|
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(MultiViewRectScaleOffsets), 0);
|
|
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(ViewsBuffer), 0);
|
|
|
|
const FNaniteVisibilityQuery* VisibilityQuery = RasterResults.VisibilityQuery;
|
|
|
|
TStaticArray<FTextureRenderTargetBinding, MaxSimultaneousRenderTargets> BasePassTextures;
|
|
|
|
// NOTE: Always use a GBuffer layout with velocity output (It won't be written to unless the material has WPO or IsUsingBasePassVelocity())
|
|
uint32 BasePassTextureCount = SceneTextures.GetGBufferRenderTargets(BasePassTextures, GBL_ForceVelocity);
|
|
|
|
// We don't want to have Substrate MRTs appended to the list, except for the top layer data
|
|
if (Substrate::IsSubstrateEnabled() && SceneRenderer.Scene)
|
|
{
|
|
// Add another MRT for Substrate top layer information. We want to follow the usual clear process which can leverage fast clear.
|
|
{
|
|
BasePassTextures[BasePassTextureCount] = FTextureRenderTargetBinding(SceneRenderer.Scene->SubstrateSceneData.TopLayerTexture);
|
|
BasePassTextureCount++;
|
|
};
|
|
}
|
|
|
|
TArrayView<FTextureRenderTargetBinding> BasePassTexturesView = MakeArrayView(BasePassTextures.GetData(), BasePassTextureCount);
|
|
|
|
// Render targets bindings should remain constant at this point.
|
|
FRenderTargetBindingSlots BasePassBindings = GetRenderTargetBindings(ERenderTargetLoadAction::ELoad, BasePassTexturesView);
|
|
BasePassBindings.DepthStencil = BasePassRenderTargets.DepthStencil;
|
|
|
|
TArray<FRDGTextureRef, TInlineAllocator<MaxSimultaneousRenderTargets>> ClearTargetList;
|
|
|
|
// Fast tile clear prior to fast clear eliminate
|
|
const bool bFastTileClear = GNaniteFastTileClear != 0 && RHISupportsRenderTargetWriteMask(GMaxRHIShaderPlatform);
|
|
if (bFastTileClear)
|
|
{
|
|
for (uint32 TargetIndex = 0; TargetIndex < MaxSimultaneousRenderTargets; ++TargetIndex)
|
|
{
|
|
if (FRDGTexture* TargetTexture = BasePassRenderTargets.Output[TargetIndex].GetTexture())
|
|
{
|
|
if (!EnumHasAnyFlags(TargetTexture->Desc.Flags, TexCreate_DisableDCC))
|
|
{
|
|
// Skip any targets that do not explicitly disable DCC, as this clear would not work correctly for DCC
|
|
ClearTargetList.Add(nullptr);
|
|
continue;
|
|
}
|
|
|
|
if (EnumHasAnyFlags(TargetTexture->Desc.Flags, TexCreate_NoFastClear))
|
|
{
|
|
// Skip any targets that explicitly disable fast clear optimization
|
|
ClearTargetList.Add(nullptr);
|
|
continue;
|
|
}
|
|
|
|
if ((ShadingCommands.BoundTargetMask & (1u << TargetIndex)) == 0u)
|
|
{
|
|
// Skip any targets that are not written by at least one shading command
|
|
ClearTargetList.Add(nullptr);
|
|
continue;
|
|
}
|
|
|
|
ClearTargetList.Add(TargetTexture);
|
|
}
|
|
}
|
|
}
|
|
|
|
FShadeBinning Binning = ShadeBinning(GraphBuilder, Scene, View, ViewRect, ShadingCommands, RasterResults, ClearTargetList);
|
|
|
|
FNaniteShadingPassParameters* ShadingPassParameters = GraphBuilder.AllocParameters<FNaniteShadingPassParameters>();
|
|
*ShadingPassParameters = CreateNaniteShadingPassParams(
|
|
GraphBuilder,
|
|
SceneRenderer,
|
|
SceneTextures,
|
|
DBufferTextures,
|
|
View,
|
|
ViewRect,
|
|
RasterResults,
|
|
RasterResults.ShadingMask,
|
|
VisBuffer64,
|
|
DbgBuffer64,
|
|
DbgBuffer32,
|
|
VisibleClustersSWHW,
|
|
MultiViewIndices,
|
|
MultiViewRectScaleOffsets,
|
|
ViewsBuffer,
|
|
BasePassBindings,
|
|
ShadingCommands.BoundTargetMask,
|
|
Binning
|
|
);
|
|
|
|
FShadingConfig ShadingConfig{ 0 };
|
|
|
|
ShadingConfig.bHighPrecision = UsingHighPrecisionGBuffer();
|
|
ShadingConfig.bBundleShading = ShaderBundle != nullptr && UseShadingShaderBundle(Scene.GetShaderPlatform());
|
|
ShadingConfig.bBundleEmulation = ShadingConfig.bBundleShading && CVarNaniteBundleEmulation.GetValueOnRenderThread() != 0;
|
|
ShadingConfig.bShowDrawEvents = GShowMaterialDrawEvents != 0;
|
|
|
|
const bool bParallelDispatch = GRHICommandList.UseParallelAlgorithms() && CVarParallelBasePassBuild.GetValueOnRenderThread() != 0 &&
|
|
FParallelMeshDrawCommandPass::IsOnDemandShaderCreationEnabled();
|
|
if (bParallelDispatch)
|
|
{
|
|
GraphBuilder.AddDispatchPass(
|
|
RDG_EVENT_NAME("ShadeGBufferCS"),
|
|
ShadingPassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[ShadingPassParameters, &ShadingCommands, ShadingConfig, ShaderBundle, IndirectArgsStride, DataByteOffset = Binning.DataByteOffset, VisibilityQuery, &View, ViewRect]
|
|
(FRDGDispatchPassBuilder& DispatchPassBuilder)
|
|
{
|
|
TSharedPtr<FNaniteShadingPassIntermediates> Intermediates = CreateNaniteShadingPassIntermediates(ShadingPassParameters, ShadingCommands, VisibilityQuery, ViewRect);
|
|
|
|
if (ShadingConfig.bBundleShading)
|
|
{
|
|
FRHICommandList* RHICmdListTask = DispatchPassBuilder.CreateCommandList();
|
|
|
|
UE::Tasks::Launch(UE_SOURCE_LOCATION, [RHICmdListTask, Intermediates = MoveTemp(Intermediates), &ShadingCommands, ShaderBundle, ViewRect, DataByteOffset, ShadingConfig]
|
|
{
|
|
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(RecordBundleShadingCommandsTask);
|
|
DispatchComputeShaderBundle(*RHICmdListTask, ShadingCommands, ShadingConfig, ShaderBundle, *Intermediates, DataByteOffset);
|
|
RHICmdListTask->FinishRecording();
|
|
});
|
|
}
|
|
else
|
|
{
|
|
// Distribute work evenly to the available task graph workers based on NumPassCommands.
|
|
const int32 NumPassCommands = ShadingCommands.Commands.Num();
|
|
const int32 NumThreads = FMath::Min<int32>(FTaskGraphInterface::Get().GetNumWorkerThreads(), CVarRHICmdWidth.GetValueOnRenderThread());
|
|
const int32 NumTasks = FMath::Min<int32>(NumThreads, FMath::DivideAndRoundUp(NumPassCommands, CVarRHICmdMinDrawsPerParallelCmdList.GetValueOnRenderThread()));
|
|
const int32 NumCommandsPerTask = FMath::DivideAndRoundUp(NumPassCommands, NumTasks);
|
|
|
|
for (int32 TaskIndex = 0; TaskIndex < NumTasks; TaskIndex++)
|
|
{
|
|
const int32 StartIndex = TaskIndex * NumCommandsPerTask;
|
|
const int32 NumCommands = FMath::Min(NumCommandsPerTask, NumPassCommands - StartIndex);
|
|
checkSlow(NumCommands > 0);
|
|
|
|
FRHICommandList* RHICmdListTask = DispatchPassBuilder.CreateCommandList();
|
|
|
|
UE::Tasks::Launch(UE_SOURCE_LOCATION, [RHICmdListTask, &ShadingCommands, Intermediates = Intermediates, IndirectArgsStride, DataByteOffset, StartIndex, NumCommands, ShadingConfig]
|
|
{
|
|
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(RecordShadingCommandsTask);
|
|
|
|
for (int32 CommandIndex = 0; CommandIndex < NumCommands; ++CommandIndex)
|
|
{
|
|
FNaniteShadingCommand& ShadingCommand = ShadingCommands.Commands[StartIndex + CommandIndex];
|
|
ShadingCommand.bVisible = Intermediates->VisibilityData.IsEmpty() || Intermediates->VisibilityData.AccessCorrespondingBit(FRelativeBitReference(ShadingCommand.ShadingBin));
|
|
if (ShadingCommand.bVisible && PrepareShadingCommand(ShadingCommand))
|
|
{
|
|
FRHIBatchedShaderParameters& ShadingParameters = RHICmdListTask->GetScratchShaderParameters();
|
|
|
|
RecordShadingParameters(
|
|
ShadingParameters,
|
|
ShadingCommand,
|
|
ShadingConfig,
|
|
DataByteOffset,
|
|
Intermediates->ViewRect,
|
|
Intermediates->ShadingOutputs
|
|
);
|
|
|
|
RecordShadingCommand(
|
|
*RHICmdListTask,
|
|
Intermediates->IndirectArgsBuffer,
|
|
IndirectArgsStride,
|
|
ShadingConfig,
|
|
ShadingParameters,
|
|
ShadingCommand
|
|
);
|
|
}
|
|
}
|
|
|
|
RHICmdListTask->FinishRecording();
|
|
});
|
|
}
|
|
}
|
|
});
|
|
}
|
|
else
|
|
{
|
|
GraphBuilder.AddPass(
|
|
RDG_EVENT_NAME("ShadeGBufferCS"),
|
|
ShadingPassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[ShadingPassParameters, &ShadingCommands, ShadingConfig, ShaderBundle, IndirectArgsStride, DataByteOffset = Binning.DataByteOffset, VisibilityQuery, &View, ViewRect]
|
|
(FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
TSharedPtr<FNaniteShadingPassIntermediates> Intermediates = CreateNaniteShadingPassIntermediates(ShadingPassParameters, ShadingCommands, VisibilityQuery, ViewRect);
|
|
|
|
if (ShadingConfig.bBundleShading)
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(RecordBundleShadingCommands);
|
|
DispatchComputeShaderBundle(RHICmdList, ShadingCommands, ShadingConfig, ShaderBundle, *Intermediates, DataByteOffset, EParallelForFlags::ForceSingleThread);
|
|
}
|
|
else
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(RecordShadingCommands);
|
|
for (FNaniteShadingCommand& ShadingCommand : ShadingCommands.Commands)
|
|
{
|
|
ShadingCommand.bVisible = Intermediates->VisibilityData.IsEmpty() || Intermediates->VisibilityData.AccessCorrespondingBit(FRelativeBitReference(ShadingCommand.ShadingBin));
|
|
if (ShadingCommand.bVisible && PrepareShadingCommand(ShadingCommand))
|
|
{
|
|
FRHIBatchedShaderParameters& ShadingParameters = RHICmdList.GetScratchShaderParameters();
|
|
RecordShadingParameters(ShadingParameters, ShadingCommand, ShadingConfig, DataByteOffset, Intermediates->ViewRect, Intermediates->ShadingOutputs);
|
|
RecordShadingCommand(RHICmdList, Intermediates->IndirectArgsBuffer, IndirectArgsStride, ShadingConfig, ShadingParameters, ShadingCommand);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
ExtractShadingDebug(GraphBuilder, View, Binning, ShadingBinCount);
|
|
}
|
|
|
|
FShadeBinning ShadeBinning(
|
|
FRDGBuilder& GraphBuilder,
|
|
const FScene& Scene,
|
|
const FViewInfo& View,
|
|
const FIntRect InViewRect,
|
|
const FNaniteShadingCommands& ShadingCommands,
|
|
const FRasterResults& RasterResults,
|
|
const TConstArrayView<FRDGTextureRef> ClearTargets
|
|
)
|
|
{
|
|
FShadeBinning Binning = {};
|
|
|
|
LLM_SCOPE_BYTAG(Nanite);
|
|
RDG_EVENT_SCOPE(GraphBuilder, "Nanite::ShadeBinning");
|
|
|
|
const FSceneTexturesConfig& Config = View.GetSceneTexturesConfig();
|
|
const EShaderPlatform ShaderPlatform = View.GetShaderPlatform();
|
|
|
|
if (!ShadingCommands.NumCommands)
|
|
{
|
|
return Binning;
|
|
}
|
|
|
|
const FNaniteShadingCommands::FMetaBufferArray& MetaBufferData = ShadingCommands.MetaBufferData;
|
|
|
|
TArray<FRDGTextureRef, TInlineAllocator<MaxSimultaneousRenderTargets>> ValidClearTargets;
|
|
|
|
uint32 ValidWriteMask = 0x0u;
|
|
if (ClearTargets.Num() > 0)
|
|
{
|
|
for (int32 TargetIndex = 0; TargetIndex < ClearTargets.Num(); ++TargetIndex)
|
|
{
|
|
if (ClearTargets[TargetIndex] != nullptr)
|
|
{
|
|
// Compute a mask containing only set bits for MRT targets that are suitable for meta data optimization.
|
|
ValidWriteMask |= (1u << uint32(TargetIndex));
|
|
ValidClearTargets.Add(ClearTargets[TargetIndex]);
|
|
}
|
|
}
|
|
}
|
|
|
|
const uint32 ShadingBinCount = ShadingCommands.MaxShadingBin + 1u;
|
|
const uint32 ShadingBinCountPow2 = FMath::RoundUpToPowerOfTwo(ShadingBinCount);
|
|
|
|
const bool bGatherStats = GNaniteShowStats != 0;
|
|
|
|
const FUintVector4 ViewRect = FUintVector4(uint32(InViewRect.Min.X), uint32(InViewRect.Min.Y), uint32(InViewRect.Max.X), uint32(InViewRect.Max.Y));
|
|
|
|
const uint32 PixelCount = InViewRect.Width() * InViewRect.Height();
|
|
|
|
const int32 QuadWidth = FMath::DivideAndRoundUp(InViewRect.Width(), 2);
|
|
const int32 QuadHeight = FMath::DivideAndRoundUp(InViewRect.Height(), 2);
|
|
|
|
const FIntPoint GroupDim = GBinningTechnique == 0 ? FIntPoint(8u, 8u) : FIntPoint(32u, 32u);
|
|
const FIntVector QuadDispatchDim = FComputeShaderUtils::GetGroupCount(FIntPoint(QuadWidth, QuadHeight), GroupDim);
|
|
const FIntVector BinDispatchDim = FComputeShaderUtils::GetGroupCount(ShadingBinCount, 64u);
|
|
|
|
const FUint32Vector2 DispatchOffsetTL = FUint32Vector2(InViewRect.Min.X, InViewRect.Min.Y);
|
|
|
|
const uint32 NumBytes_Meta = sizeof(FNaniteShadingBinMeta) * ShadingBinCountPow2;
|
|
const uint32 NumBytes_Data = PixelCount * 8;
|
|
|
|
FRDGBufferRef ShadingBinMeta = CreateStructuredBuffer(
|
|
GraphBuilder,
|
|
TEXT("Nanite.ShadingBinMeta"),
|
|
sizeof(FNaniteShadingBinMeta),
|
|
ShadingBinCountPow2,
|
|
MetaBufferData.GetData(),
|
|
sizeof(FNaniteShadingBinMeta) * MetaBufferData.Num()
|
|
);
|
|
|
|
Binning.DataByteOffset = NumBytes_Meta;
|
|
Binning.ShadingBinData = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateByteAddressDesc(NumBytes_Meta + NumBytes_Data), TEXT("Nanite.ShadingBinData"));
|
|
|
|
AddCopyBufferPass(GraphBuilder, Binning.ShadingBinData, 0, ShadingBinMeta, 0, NumBytes_Meta);
|
|
|
|
Binning.ShadingBinArgs = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateRawIndirectDesc(sizeof(FUint32Vector4) * ShadingBinCountPow2), TEXT("Nanite.ShadingBinArgs"));
|
|
Binning.ShadingBinStats = bGatherStats ? GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FNaniteShadingBinStats), 1u), TEXT("Nanite.ShadingBinStats")) : nullptr;
|
|
|
|
FRDGBufferUAVRef ShadingBinArgsUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(Binning.ShadingBinArgs, PF_R32_UINT));
|
|
FRDGBufferUAVRef ShadingBinDataUAV = GraphBuilder.CreateUAV(Binning.ShadingBinData);
|
|
FRDGBufferUAVRef ShadingBinStatsUAV = bGatherStats ? GraphBuilder.CreateUAV(Binning.ShadingBinStats) : nullptr;
|
|
|
|
FRDGBufferRef ShadingBinScatterMetaBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FNaniteShadingBinScatterMeta), ShadingBinCountPow2), TEXT("Nanite.ShadingBinScatterMeta"));
|
|
FRDGBufferUAVRef ShadingBinScatterMetaUAV = GraphBuilder.CreateUAV(ShadingBinScatterMetaBuffer);
|
|
|
|
if (bGatherStats)
|
|
{
|
|
AddClearUAVPass(GraphBuilder, ShadingBinStatsUAV, 0);
|
|
}
|
|
|
|
const bool bOptimizeWriteMask = (ValidClearTargets.Num() > 0);
|
|
|
|
const uint32 ShadingRateTileSizeBits = GetShadingRateTileSizeBits();
|
|
const bool bVariableRateShading = (ShadingRateTileSizeBits != 0);
|
|
|
|
const uint32 TargetAlignment = bOptimizeWriteMask ? 8 : // 8x8 for optimized write mask
|
|
bVariableRateShading ? 4 : // 4x4 for VRS
|
|
2; // 2x2 for just quad processing
|
|
const uint32 TargetAlignmentMask = ~(TargetAlignment - 1u);
|
|
|
|
const FUint32Vector2 AlignedDispatchOffsetTL = FUint32Vector2(InViewRect.Min.X & TargetAlignmentMask, InViewRect.Min.Y & TargetAlignmentMask);
|
|
const FIntVector AlignedDispatchDim = FComputeShaderUtils::GetGroupCount(FIntPoint(InViewRect.Max.X - AlignedDispatchOffsetTL.X, InViewRect.Max.Y - AlignedDispatchOffsetTL.Y), GroupDim * 2);
|
|
|
|
check(QuadDispatchDim.X == AlignedDispatchDim.X);
|
|
check(QuadDispatchDim.Y == AlignedDispatchDim.Y);
|
|
|
|
// Shading Bin Count
|
|
{
|
|
FShadingBinBuildCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FShadingBinBuildCS::FParameters>();
|
|
PassParameters->ViewRect = ViewRect;
|
|
PassParameters->ValidWriteMask = ValidWriteMask;
|
|
PassParameters->DispatchOffsetTL = bOptimizeWriteMask ? AlignedDispatchOffsetTL : DispatchOffsetTL;
|
|
PassParameters->ShadingBinCount = ShadingBinCount;
|
|
PassParameters->ShadingBinDataByteOffset = Binning.DataByteOffset;
|
|
PassParameters->ShadingRateTileSizeBits = GetShadingRateTileSizeBits();
|
|
PassParameters->DummyZero = 0;
|
|
PassParameters->ShadingRateImage = GetShadingRateImage(GraphBuilder, View);
|
|
PassParameters->ShadingMaskSampler = TStaticSamplerState<SF_Point>::GetRHI();
|
|
PassParameters->ShadingMask = RasterResults.ShadingMask;
|
|
PassParameters->OutShadingBinData = ShadingBinDataUAV;
|
|
PassParameters->OutShadingBinArgs = ShadingBinArgsUAV;
|
|
|
|
FShadingBinBuildCS::FPermutationDomain PermutationVector;
|
|
PermutationVector.Set<FShadingBinBuildCS::FBuildPassDim>(NANITE_SHADING_BIN_COUNT);
|
|
PermutationVector.Set<FShadingBinBuildCS::FTechniqueDim>(FMath::Clamp<int32>(GBinningTechnique, 0, 1));
|
|
PermutationVector.Set<FShadingBinBuildCS::FGatherStatsDim>(bGatherStats);
|
|
PermutationVector.Set<FShadingBinBuildCS::FVariableRateDim>(bVariableRateShading);
|
|
PermutationVector.Set<FShadingBinBuildCS::FOptimizeWriteMaskDim>(bOptimizeWriteMask);
|
|
PermutationVector.Set<FShadingBinBuildCS::FNumExports>(FMath::Max(1, ValidClearTargets.Num()));
|
|
auto ComputeShader = View.ShaderMap->GetShader<FShadingBinBuildCS>(PermutationVector);
|
|
|
|
if (bOptimizeWriteMask)
|
|
{
|
|
for (int32 TargetIndex = 0; TargetIndex < ValidClearTargets.Num(); ++TargetIndex)
|
|
{
|
|
PassParameters->OutCMaskBuffer[TargetIndex] = GraphBuilder.CreateUAV(FRDGTextureUAVDesc::CreateForMetaData(ValidClearTargets[TargetIndex], ERDGTextureMetaDataAccess::CMask));
|
|
}
|
|
|
|
const bool bWriteSubTiles = GNaniteFastTileClearSubTiles != 0u;
|
|
|
|
GraphBuilder.AddPass(
|
|
RDG_EVENT_NAME("ShadingCount"),
|
|
PassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[AlignedDispatchDim, ComputeShader, PassParameters, TargetCount = ValidClearTargets.Num(), bWriteSubTiles](FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
void* PlatformDataPtr = nullptr;
|
|
uint32 PlatformDataSize = 0;
|
|
|
|
// Note: Assumes all targets match in resolution (which they should)
|
|
if (PassParameters->OutCMaskBuffer[0] != nullptr)
|
|
{
|
|
FRHITexture* TargetTextureRHI = PassParameters->OutCMaskBuffer[0]->GetParentRHI();
|
|
|
|
// Retrieve the platform specific data that the decode shader needs.
|
|
TargetTextureRHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
check(PlatformDataSize > 0);
|
|
|
|
if (PlatformDataPtr == nullptr)
|
|
{
|
|
// If the returned pointer was null, the platform RHI wants us to allocate the memory instead.
|
|
PlatformDataPtr = alloca(PlatformDataSize);
|
|
TargetTextureRHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
}
|
|
}
|
|
|
|
check(PlatformDataPtr != nullptr && PlatformDataSize > 0);
|
|
|
|
bool bSubTileMatch = bWriteSubTiles;
|
|
|
|
// If we want to write 4x4 subtiles, ensure platform specific data matches across all MRTs (tile modes, etc..)
|
|
if (bWriteSubTiles)
|
|
{
|
|
TArray<uint8, TInlineAllocator<8>> Scratch;
|
|
|
|
for (int32 TargetIndex = 1; TargetIndex < TargetCount; ++TargetIndex)
|
|
{
|
|
void* TestPlatformDataPtr = nullptr;
|
|
uint32 TestPlatformDataSize = 0;
|
|
|
|
// We want to enforce that the platform metadata is bit exact across all MRTs
|
|
if (PassParameters->OutCMaskBuffer[TargetIndex] != nullptr)
|
|
{
|
|
FRHITexture* TargetTextureRHI = PassParameters->OutCMaskBuffer[TargetIndex]->GetParentRHI();
|
|
|
|
TargetTextureRHI->GetWriteMaskProperties(TestPlatformDataPtr, TestPlatformDataSize);
|
|
check(TestPlatformDataSize > 0);
|
|
|
|
if (TestPlatformDataPtr == nullptr)
|
|
{
|
|
// If the returned pointer was null, the platform RHI wants us to allocate the memory instead.
|
|
Scratch.SetNumZeroed(TestPlatformDataSize);
|
|
TestPlatformDataPtr = Scratch.GetData();
|
|
TargetTextureRHI->GetWriteMaskProperties(TestPlatformDataPtr, TestPlatformDataSize);
|
|
}
|
|
|
|
check(TestPlatformDataPtr != nullptr && TestPlatformDataSize == PlatformDataSize);
|
|
|
|
if (FMemory::Memcmp(PlatformDataPtr, TestPlatformDataPtr, PlatformDataSize) != 0)
|
|
{
|
|
bSubTileMatch = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SetComputePipelineState(RHICmdList, ComputeShader.GetComputeShader());
|
|
SetShaderParametersMixedCS(RHICmdList, ComputeShader, *PassParameters, PlatformDataPtr, PlatformDataSize, bSubTileMatch);
|
|
|
|
RHICmdList.DispatchComputeShader(AlignedDispatchDim.X, AlignedDispatchDim.Y, AlignedDispatchDim.Z);
|
|
}
|
|
);
|
|
}
|
|
else
|
|
{
|
|
FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("ShadingCount"), ComputeShader, PassParameters, AlignedDispatchDim);
|
|
}
|
|
}
|
|
|
|
// Shading Bin Reserve
|
|
{
|
|
FRDGBufferRef ShadingBinAllocator = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 1), TEXT("Nanite.ShadingBinAllocator"));
|
|
FRDGBufferUAVRef ShadingBinAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ShadingBinAllocator, PF_R32_UINT));
|
|
AddClearUAVPass(GraphBuilder, ShadingBinAllocatorUAV, 0);
|
|
|
|
FShadingBinReserveCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FShadingBinReserveCS::FParameters>();
|
|
PassParameters->ShadingBinCount = ShadingBinCount;
|
|
PassParameters->ShadingBinDataByteOffset = Binning.DataByteOffset;
|
|
PassParameters->OutShadingBinStats = ShadingBinStatsUAV;
|
|
PassParameters->OutShadingBinData = ShadingBinDataUAV;
|
|
PassParameters->OutShadingBinAllocator = ShadingBinAllocatorUAV;
|
|
PassParameters->OutShadingBinArgs = ShadingBinArgsUAV;
|
|
PassParameters->OutShadingBinStats = ShadingBinStatsUAV;
|
|
PassParameters->OutShadingBinScatterMeta = ShadingBinScatterMetaUAV;
|
|
|
|
FShadingBinReserveCS::FPermutationDomain PermutationVector;
|
|
PermutationVector.Set<FShadingBinReserveCS::FGatherStatsDim>(bGatherStats);
|
|
auto ComputeShader = View.ShaderMap->GetShader<FShadingBinReserveCS>(PermutationVector);
|
|
|
|
FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("ShadingReserve"), ComputeShader, PassParameters, BinDispatchDim);
|
|
}
|
|
|
|
// Shading Bin Scatter
|
|
{
|
|
FShadingBinBuildCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FShadingBinBuildCS::FParameters>();
|
|
PassParameters->ViewRect = ViewRect;
|
|
PassParameters->DispatchOffsetTL = AlignedDispatchOffsetTL;
|
|
PassParameters->ShadingBinCount = ShadingBinCount;
|
|
PassParameters->ShadingBinDataByteOffset = Binning.DataByteOffset;
|
|
PassParameters->ShadingRateTileSizeBits = GetShadingRateTileSizeBits();
|
|
PassParameters->DummyZero = 0;
|
|
PassParameters->ShadingRateImage = GetShadingRateImage(GraphBuilder, View);
|
|
PassParameters->ShadingMaskSampler = TStaticSamplerState<SF_Point>::GetRHI();
|
|
PassParameters->ShadingMask = RasterResults.ShadingMask;
|
|
PassParameters->OutShadingBinStats = ShadingBinStatsUAV;
|
|
PassParameters->OutShadingBinData = ShadingBinDataUAV;
|
|
PassParameters->OutShadingBinArgs = nullptr;
|
|
PassParameters->OutShadingBinScatterMeta = ShadingBinScatterMetaUAV;
|
|
|
|
FShadingBinBuildCS::FPermutationDomain PermutationVector;
|
|
PermutationVector.Set<FShadingBinBuildCS::FBuildPassDim>(NANITE_SHADING_BIN_SCATTER);
|
|
PermutationVector.Set<FShadingBinBuildCS::FTechniqueDim>(FMath::Clamp<int32>(GBinningTechnique, 0, 1));
|
|
PermutationVector.Set<FShadingBinBuildCS::FGatherStatsDim>(bGatherStats);
|
|
PermutationVector.Set<FShadingBinBuildCS::FVariableRateDim>(bVariableRateShading);
|
|
PermutationVector.Set<FShadingBinBuildCS::FOptimizeWriteMaskDim>(false);
|
|
PermutationVector.Set<FShadingBinBuildCS::FNumExports>(1);
|
|
auto ComputeShader = View.ShaderMap->GetShader<FShadingBinBuildCS>(PermutationVector);
|
|
|
|
FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("ShadingScatter"), ComputeShader, PassParameters, AlignedDispatchDim);
|
|
}
|
|
|
|
// Shading Bin Validate
|
|
if (GNaniteValidateShadeBinning)
|
|
{
|
|
FShadingBinValidateCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FShadingBinValidateCS::FParameters>();
|
|
PassParameters->ShadingBinCount = ShadingBinCount;
|
|
PassParameters->OutShadingBinData = ShadingBinDataUAV;
|
|
|
|
auto ComputeShader = View.ShaderMap->GetShader<FShadingBinValidateCS>();
|
|
FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("ShadingValidate"), ERDGPassFlags::Compute | ERDGPassFlags::NeverCull, ComputeShader, PassParameters, BinDispatchDim);
|
|
}
|
|
|
|
const FNaniteVisualizationData& VisualizationData = GetNaniteVisualizationData();
|
|
if (bOptimizeWriteMask && VisualizationData.IsActive())
|
|
{
|
|
auto ComputeShader = View.ShaderMap->GetShader<FVisualizeClearTilesCS>();
|
|
|
|
FRDGTextureDesc VisClearMaskDesc = FRDGTextureDesc::Create2D(
|
|
FIntPoint(InViewRect.Width(), InViewRect.Height()),
|
|
PF_R32_UINT,
|
|
FClearValueBinding::Transparent,
|
|
TexCreate_ShaderResource | TexCreate_UAV
|
|
);
|
|
|
|
Binning.FastClearVisualize = GraphBuilder.CreateTexture(VisClearMaskDesc, TEXT("Nanite.VisClearMask"));
|
|
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(Binning.FastClearVisualize), FUintVector4(ForceInitToZero));
|
|
|
|
for (int32 TargetIndex = 0; TargetIndex < ValidClearTargets.Num(); ++TargetIndex)
|
|
{
|
|
if (TargetIndex != GNaniteFastTileVis && GNaniteFastTileVis != INDEX_NONE)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FVisualizeClearTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FVisualizeClearTilesCS::FParameters>();
|
|
PassParameters->ViewRect = ViewRect;
|
|
PassParameters->OutCMaskBuffer = GraphBuilder.CreateUAV(FRDGTextureUAVDesc::CreateForMetaData(ValidClearTargets[TargetIndex], ERDGTextureMetaDataAccess::CMask));
|
|
PassParameters->OutVisualized = GraphBuilder.CreateUAV(Binning.FastClearVisualize);
|
|
|
|
GraphBuilder.AddPass(
|
|
RDG_EVENT_NAME("VisualizeFastClear"),
|
|
PassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[InViewRect, ComputeShader, PassParameters](FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
void* PlatformDataPtr = nullptr;
|
|
uint32 PlatformDataSize = 0;
|
|
|
|
if (PassParameters->OutCMaskBuffer != nullptr)
|
|
{
|
|
FRHITexture* TargetTextureRHI = PassParameters->OutCMaskBuffer->GetParentRHI();
|
|
|
|
// Retrieve the platform specific data that the decode shader needs.
|
|
TargetTextureRHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
check(PlatformDataSize > 0);
|
|
|
|
if (PlatformDataPtr == nullptr)
|
|
{
|
|
// If the returned pointer was null, the platform RHI wants us to allocate the memory instead.
|
|
PlatformDataPtr = alloca(PlatformDataSize);
|
|
TargetTextureRHI->GetWriteMaskProperties(PlatformDataPtr, PlatformDataSize);
|
|
}
|
|
}
|
|
|
|
SetComputePipelineState(RHICmdList, ComputeShader.GetComputeShader());
|
|
SetShaderParametersMixedCS(RHICmdList, ComputeShader, *PassParameters, PlatformDataPtr, PlatformDataSize);
|
|
|
|
const FIntVector DispatchDim = FComputeShaderUtils::GetGroupCount(FIntPoint(InViewRect.Width(), InViewRect.Height()), FIntPoint(8u, 8u));
|
|
RHICmdList.DispatchComputeShader(DispatchDim.X, DispatchDim.Y, DispatchDim.Z);
|
|
}
|
|
);
|
|
}
|
|
}
|
|
|
|
return Binning;
|
|
}
|
|
|
|
void CollectBasePassShadingPSOInitializers(
|
|
const FSceneTexturesConfig& SceneTexturesConfig,
|
|
const FPSOPrecacheVertexFactoryData& VertexFactoryData,
|
|
const FMaterial& Material,
|
|
const FPSOPrecacheParams& PreCacheParams,
|
|
ERHIFeatureLevel::Type FeatureLevel,
|
|
EShaderPlatform ShaderPlatform,
|
|
int32 PSOCollectorIndex,
|
|
TArray<FPSOPrecacheData>& PSOInitializers)
|
|
{
|
|
TArray<ELightMapPolicyType, TInlineAllocator<2>> UniformLightMapPolicyTypes = FBasePassMeshProcessor::GetUniformLightMapPolicyTypeForPSOCollection(FeatureLevel, Material);
|
|
|
|
auto CollectBasePass = [&](bool bRenderSkyLight)
|
|
{
|
|
for (ELightMapPolicyType UniformLightMapPolicyType : UniformLightMapPolicyTypes)
|
|
{
|
|
TShaderRef<TBasePassComputeShaderPolicyParamType<FUniformLightMapPolicy>> BasePassComputeShader;
|
|
|
|
bool bShadersValid = GetBasePassShader<FUniformLightMapPolicy>(
|
|
Material,
|
|
VertexFactoryData.VertexFactoryType,
|
|
FUniformLightMapPolicy(UniformLightMapPolicyType),
|
|
FeatureLevel,
|
|
bRenderSkyLight,
|
|
false, // bIsDebug
|
|
SF_Compute,
|
|
&BasePassComputeShader
|
|
);
|
|
|
|
if (!bShadersValid)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FPSOPrecacheData ComputePSOPrecacheData;
|
|
ComputePSOPrecacheData.Type = FPSOPrecacheData::EType::Compute;
|
|
ComputePSOPrecacheData.SetComputeShader(BasePassComputeShader);
|
|
#if PSO_PRECACHING_VALIDATE
|
|
ComputePSOPrecacheData.PSOCollectorIndex = PSOCollectorIndex;
|
|
ComputePSOPrecacheData.VertexFactoryType = VertexFactoryData.VertexFactoryType;
|
|
if (PSOCollectorStats::IsFullPrecachingValidationEnabled())
|
|
{
|
|
ComputePSOPrecacheData.bDefaultMaterial = Material.IsDefaultMaterial();
|
|
ConditionalBreakOnPSOPrecacheShader(ComputePSOPrecacheData.ComputeShader);
|
|
}
|
|
#endif // PSO_PRECACHING_VALIDATE
|
|
PSOInitializers.Add(MoveTemp(ComputePSOPrecacheData));
|
|
}
|
|
};
|
|
|
|
CollectBasePass(true);
|
|
CollectBasePass(false);
|
|
}
|
|
|
|
} // Nanite
|
|
|
|
FNaniteRasterPipeline FNaniteRasterPipeline::GetFixedFunctionPipeline(uint8 BinMask)
|
|
{
|
|
FNaniteRasterPipeline Pipeline;
|
|
Pipeline.RasterMaterial = UMaterial::GetDefaultMaterial(MD_Surface)->GetRenderProxy();
|
|
Pipeline.bIsTwoSided = (BinMask & NANITE_FIXED_FUNCTION_BIN_TWOSIDED) != 0;
|
|
Pipeline.bWPOEnabled = false;
|
|
Pipeline.bDisplacementEnabled = false;
|
|
Pipeline.bPerPixelEval = false;
|
|
Pipeline.bVoxel = (BinMask & NANITE_FIXED_FUNCTION_BIN_VOXEL) != 0;
|
|
Pipeline.bSplineMesh = (BinMask & NANITE_FIXED_FUNCTION_BIN_SPLINE) != 0;
|
|
Pipeline.bSkinnedMesh = (BinMask & NANITE_FIXED_FUNCTION_BIN_SKINNED) != 0;
|
|
Pipeline.bHasWPODistance = false;
|
|
Pipeline.bHasPixelDistance = false;
|
|
Pipeline.bHasDisplacementFadeOut = false;
|
|
Pipeline.bCastShadow = (BinMask & NANITE_FIXED_FUNCTION_BIN_CAST_SHADOW) != 0;
|
|
Pipeline.bVertexUVs = false;
|
|
return Pipeline;
|
|
}
|
|
|
|
uint32 FNaniteRasterPipeline::GetPipelineHash() const
|
|
{
|
|
struct FHashKey
|
|
{
|
|
uint32 MaterialFlags;
|
|
uint32 MaterialHash;
|
|
|
|
FDisplacementScaling DisplacementScaling;
|
|
FDisplacementFadeRange DisplacementFadeRange;
|
|
|
|
static inline uint32 PointerHash(const void* Key)
|
|
{
|
|
#if PLATFORM_64BITS
|
|
// Ignoring the lower 4 bits since they are likely zero anyway.
|
|
// Higher bits are more significant in 64 bit builds.
|
|
return reinterpret_cast<UPTRINT>(Key) >> 4;
|
|
#else
|
|
return reinterpret_cast<UPTRINT>(Key);
|
|
#endif
|
|
};
|
|
|
|
} HashKey;
|
|
|
|
FMemory::Memzero(HashKey);
|
|
|
|
HashKey.MaterialFlags = 0;
|
|
HashKey.MaterialFlags |= bIsTwoSided ? 0x1u : 0x0u;
|
|
HashKey.MaterialFlags |= bWPOEnabled ? 0x2u : 0x0u;
|
|
HashKey.MaterialFlags |= bDisplacementEnabled ? 0x4u : 0x0u;
|
|
HashKey.MaterialFlags |= bPerPixelEval ? 0x8u : 0x0u;
|
|
HashKey.MaterialFlags |= bSplineMesh ? 0x10u : 0x0u;
|
|
HashKey.MaterialFlags |= bSkinnedMesh ? 0x20u : 0x0u;
|
|
HashKey.MaterialFlags |= bCastShadow ? 0x40u : 0x0u;
|
|
HashKey.MaterialFlags |= bFixedDisplacementFallback ? 0x80u : 0x0u;
|
|
HashKey.MaterialFlags |= bVertexUVs ? 0x100u : 0x0u;
|
|
HashKey.MaterialFlags |= bVoxel ? 0x200u : 0x0u;
|
|
HashKey.MaterialHash = FHashKey::PointerHash(RasterMaterial);
|
|
|
|
if (bDisplacementEnabled)
|
|
{
|
|
HashKey.DisplacementScaling = DisplacementScaling;
|
|
if (bHasDisplacementFadeOut)
|
|
{
|
|
HashKey.DisplacementFadeRange = DisplacementFadeRange;
|
|
}
|
|
}
|
|
|
|
const uint64 PipelineHash = CityHash64((char*)&HashKey, sizeof(FHashKey));
|
|
return HashCombineFast(uint32(PipelineHash & 0xFFFFFFFF), uint32((PipelineHash >> 32) & 0xFFFFFFFF));
|
|
}
|
|
|
|
bool FNaniteRasterPipeline::GetFallbackPipeline(FNaniteRasterPipeline& OutFallback) const
|
|
{
|
|
// Get a mask of the required fixed function features for this pipeline to fall back to a fixed function bin.
|
|
const uint32 FixedBinMask =
|
|
(bIsTwoSided ? NANITE_FIXED_FUNCTION_BIN_TWOSIDED : 0) |
|
|
(bSplineMesh ? NANITE_FIXED_FUNCTION_BIN_SPLINE : 0) |
|
|
(bSkinnedMesh ? NANITE_FIXED_FUNCTION_BIN_SKINNED : 0) |
|
|
(bCastShadow ? NANITE_FIXED_FUNCTION_BIN_CAST_SHADOW : 0) |
|
|
(bVoxel ? NANITE_FIXED_FUNCTION_BIN_VOXEL : 0);
|
|
|
|
// NOTE: Ordering matters here. We don't want to have to create many bins to handle enabled/disabled state of
|
|
// pixel programmable, WPO, and displacement, so when we have overlap, WPO disabled clusters rely on branching
|
|
// rather than using simpler shaders until either pixel programmable distance or displacement fade-out occurs,
|
|
// and when either pixel programmable or displacement is disabled, both are.
|
|
if ((bPerPixelEval && bHasPixelDistance) || (bDisplacementEnabled && bHasDisplacementFadeOut))
|
|
{
|
|
if (bWPOEnabled)
|
|
{
|
|
// The fallback bin must still be a programmable bin, but with pixel programmable and displacement disabled
|
|
OutFallback = *this;
|
|
OutFallback.bHasWPODistance = false;
|
|
OutFallback.bHasPixelDistance = false;
|
|
OutFallback.bHasDisplacementFadeOut = false;
|
|
OutFallback.bPerPixelEval = false;
|
|
OutFallback.bDisplacementEnabled = false;
|
|
OutFallback.bVertexUVs = false;
|
|
}
|
|
else
|
|
{
|
|
// The fallback bin can be a non-programmable, fixed-function bin
|
|
OutFallback = GetFixedFunctionPipeline(FixedBinMask);
|
|
}
|
|
|
|
if (bDisplacementEnabled)
|
|
{
|
|
// NOTE: We do something special for displacement fallback bins. The displacement scaling still has to be unique
|
|
// per bin, so it can't strictly be a "fixed function bin", though it does use default material permutations if
|
|
// the fallback does not have WPO (and is therefore not itself programmable in any way).
|
|
OutFallback.bFixedDisplacementFallback = !bWPOEnabled;
|
|
OutFallback.DisplacementScaling = DisplacementScaling;
|
|
OutFallback.DisplacementFadeRange = FDisplacementFadeRange::Invalid();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
else if (bHasWPODistance)
|
|
{
|
|
if (bPerPixelEval || bDisplacementEnabled)
|
|
{
|
|
// The fallback bin must still be a programmable bin, but with WPO force disabled.
|
|
OutFallback = *this;
|
|
OutFallback.bHasWPODistance = false;
|
|
OutFallback.bWPOEnabled = false;
|
|
}
|
|
else
|
|
{
|
|
// The fallback bin can be a non-programmable, fixed-function bin
|
|
OutFallback = GetFixedFunctionPipeline(FixedBinMask);
|
|
}
|
|
|
|
if (bDisplacementEnabled)
|
|
{
|
|
// Make sure the fallback bin preserves the displacement scaling
|
|
OutFallback.DisplacementScaling = DisplacementScaling;
|
|
OutFallback.DisplacementFadeRange = FDisplacementFadeRange::Invalid();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
FNaniteRasterPipelines::FNaniteRasterPipelines()
|
|
{
|
|
PipelineBins.Reserve(256);
|
|
PerPixelEvalPipelineBins.Reserve(256);
|
|
PipelineMap.Reserve(256);
|
|
|
|
AllocateFixedFunctionBins();
|
|
}
|
|
|
|
FNaniteRasterPipelines::~FNaniteRasterPipelines()
|
|
{
|
|
ReleaseFixedFunctionBins();
|
|
|
|
PipelineBins.Reset();
|
|
PerPixelEvalPipelineBins.Reset();
|
|
PipelineMap.Empty();
|
|
}
|
|
|
|
void FNaniteRasterPipelines::AllocateFixedFunctionBins()
|
|
{
|
|
check(FixedFunctionBins.Num() == 0);
|
|
|
|
// Note: Invalid mutually exclusive permutation: NANITE_FIXED_FUNCTION_BIN_SKINNED | NANITE_FIXED_FUNCTION_BIN_SPLINE
|
|
// We let the registration succeed because permutations are not actually fetched for the fixed function material here.
|
|
// When caching the raster passes we remap skinned | spline => skinned permutation and also skip launching these bins.
|
|
|
|
for (uint32 BinMask = 0; BinMask <= NANITE_FIXED_FUNCTION_BIN_MASK; ++BinMask)
|
|
{
|
|
FFixedFunctionBin Bin;
|
|
FNaniteRasterPipeline Pipeline = FNaniteRasterPipeline::GetFixedFunctionPipeline(BinMask);
|
|
Bin.RasterBin = Register(Pipeline);
|
|
Bin.BinMask = BinMask;
|
|
check(Bin.RasterBin.BinIndex == BinMask);
|
|
|
|
FixedFunctionBins.Emplace(Bin);
|
|
}
|
|
}
|
|
|
|
void FNaniteRasterPipelines::ReleaseFixedFunctionBins()
|
|
{
|
|
for (const FFixedFunctionBin& FixedFunctionBin : FixedFunctionBins)
|
|
{
|
|
Unregister(FixedFunctionBin.RasterBin);
|
|
}
|
|
|
|
FixedFunctionBins.Reset();
|
|
}
|
|
|
|
void FNaniteRasterPipelines::ReloadFixedFunctionBins()
|
|
{
|
|
for (const FFixedFunctionBin& FixedFunctionBin : FixedFunctionBins)
|
|
{
|
|
FNaniteRasterPipeline Pipeline = FNaniteRasterPipeline::GetFixedFunctionPipeline(FixedFunctionBin.BinMask);
|
|
FNaniteRasterEntry* RasterEntry = PipelineMap.Find(Pipeline);
|
|
check(RasterEntry != nullptr);
|
|
RasterEntry->RasterPipeline = Pipeline;
|
|
}
|
|
|
|
// Reset the entire raster setup cache
|
|
for (const auto& Pair : PipelineMap)
|
|
{
|
|
Pair.Value.CacheMap.Reset();
|
|
}
|
|
}
|
|
|
|
uint16 FNaniteRasterPipelines::AllocateBin(bool bPerPixelEval)
|
|
{
|
|
TBitArray<>& BinUsageMask = bPerPixelEval ? PerPixelEvalPipelineBins : PipelineBins;
|
|
|
|
int32 BinIndex = BinUsageMask.FindAndSetFirstZeroBit();
|
|
if (BinIndex == INDEX_NONE)
|
|
{
|
|
BinIndex = BinUsageMask.Add(true);
|
|
}
|
|
|
|
check(int32(uint16(BinIndex)) == BinIndex && PipelineBins.Num() + PerPixelEvalPipelineBins.Num() <= int32(MAX_uint16));
|
|
return bPerPixelEval ? FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex) : uint16(BinIndex);
|
|
}
|
|
|
|
void FNaniteRasterPipelines::ReleaseBin(uint16 BinIndex)
|
|
{
|
|
check(IsBinAllocated(BinIndex));
|
|
if (BinIndex < PipelineBins.Num())
|
|
{
|
|
PipelineBins[BinIndex] = false;
|
|
}
|
|
else
|
|
{
|
|
PerPixelEvalPipelineBins[FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex)] = false;
|
|
}
|
|
}
|
|
|
|
bool FNaniteRasterPipelines::IsBinAllocated(uint16 BinIndex) const
|
|
{
|
|
return BinIndex < PipelineBins.Num() ? PipelineBins[BinIndex] : PerPixelEvalPipelineBins[FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex)];
|
|
}
|
|
|
|
uint32 FNaniteRasterPipelines::GetRegularBinCount() const
|
|
{
|
|
return PipelineBins.FindLast(true) + 1;
|
|
}
|
|
|
|
uint32 FNaniteRasterPipelines::GetBinCount() const
|
|
{
|
|
return GetRegularBinCount() + PerPixelEvalPipelineBins.FindLast(true) + 1;
|
|
}
|
|
|
|
FNaniteRasterBin FNaniteRasterPipelines::Register(const FNaniteRasterPipeline& InRasterPipeline)
|
|
{
|
|
FNaniteRasterBin RasterBin;
|
|
|
|
const FRasterHash RasterPipelineHash = PipelineMap.ComputeHash(InRasterPipeline);
|
|
FRasterId RasterBinId = PipelineMap.FindOrAddIdByHash(RasterPipelineHash, InRasterPipeline, FNaniteRasterEntry());
|
|
RasterBin.BinId = RasterBinId.GetIndex();
|
|
|
|
FNaniteRasterEntry& RasterEntry = PipelineMap.GetByElementId(RasterBinId).Value;
|
|
if (RasterEntry.ReferenceCount == 0)
|
|
{
|
|
// First reference
|
|
RasterEntry.RasterPipeline = InRasterPipeline;
|
|
RasterEntry.BinIndex = AllocateBin(InRasterPipeline.bPerPixelEval);
|
|
}
|
|
|
|
++RasterEntry.ReferenceCount;
|
|
|
|
RasterBin.BinIndex = RasterEntry.BinIndex;
|
|
return RasterBin;
|
|
}
|
|
|
|
void FNaniteRasterPipelines::Unregister(const FNaniteRasterBin& InRasterBin)
|
|
{
|
|
FRasterId RasterBinId(InRasterBin.BinId);
|
|
check(RasterBinId.IsValid());
|
|
|
|
FNaniteRasterEntry& RasterEntry = PipelineMap.GetByElementId(RasterBinId).Value;
|
|
|
|
check(RasterEntry.ReferenceCount > 0);
|
|
--RasterEntry.ReferenceCount;
|
|
if (RasterEntry.ReferenceCount == 0)
|
|
{
|
|
checkf(!ShouldBinRenderInCustomPass(InRasterBin.BinIndex), TEXT("A raster bin has dangling references to Custom Pass on final release."));
|
|
ReleaseBin(RasterEntry.BinIndex);
|
|
PipelineMap.RemoveByElementId(RasterBinId);
|
|
}
|
|
}
|
|
|
|
void FNaniteRasterPipelines::RegisterBinForCustomPass(uint16 BinIndex)
|
|
{
|
|
check(IsBinAllocated(BinIndex));
|
|
|
|
const bool bPerPixelEval = BinIndex >= PipelineBins.Num();
|
|
TArray<uint32>& RefCounts = bPerPixelEval ? PerPixelEvalCustomPassRefCounts : CustomPassRefCounts;
|
|
const uint16 ArrayIndex = bPerPixelEval ? FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex) : BinIndex;
|
|
|
|
if (RefCounts.Num() <= ArrayIndex)
|
|
{
|
|
RefCounts.AddZeroed(ArrayIndex - RefCounts.Num() + 1);
|
|
}
|
|
RefCounts[ArrayIndex]++;
|
|
}
|
|
|
|
void FNaniteRasterPipelines::UnregisterBinForCustomPass(uint16 BinIndex)
|
|
{
|
|
check(IsBinAllocated(BinIndex));
|
|
|
|
const bool bPerPixelEval = BinIndex >= PipelineBins.Num();
|
|
TArray<uint32>& RefCounts = bPerPixelEval ? PerPixelEvalCustomPassRefCounts : CustomPassRefCounts;
|
|
const uint16 ArrayIndex = bPerPixelEval ? FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex) : BinIndex;
|
|
|
|
checkf(RefCounts.IsValidIndex(ArrayIndex), TEXT("Attempting to unregister a bin that was never registered for Custom Pass"));
|
|
checkf(RefCounts[ArrayIndex] > 0, TEXT("Mismatched calls to RegisterBinForCustomPass/UnregisterBinForCustomPass"));
|
|
|
|
RefCounts[ArrayIndex]--;
|
|
}
|
|
|
|
bool FNaniteRasterPipelines::ShouldBinRenderInCustomPass(uint16 BinIndex) const
|
|
{
|
|
check(IsBinAllocated(BinIndex));
|
|
|
|
const bool bPerPixelEval = BinIndex >= PipelineBins.Num();
|
|
const TArray<uint32>& RefCounts = bPerPixelEval ? PerPixelEvalCustomPassRefCounts : CustomPassRefCounts;
|
|
const uint16 ArrayIndex = bPerPixelEval ? FNaniteRasterBinIndexTranslator::RevertBinIndex(BinIndex) : BinIndex;
|
|
|
|
return RefCounts.IsValidIndex(ArrayIndex) ? RefCounts[ArrayIndex] > 0 : false;
|
|
}
|
|
|
|
FNaniteShadingPipelines::FNaniteShadingPipelines()
|
|
{
|
|
PipelineBins.Reserve(256);
|
|
PipelineMap.Reserve(256);
|
|
}
|
|
|
|
FNaniteShadingPipelines::~FNaniteShadingPipelines()
|
|
{
|
|
PipelineBins.Reset();
|
|
PipelineMap.Empty();
|
|
}
|
|
|
|
uint16 FNaniteShadingPipelines::AllocateBin()
|
|
{
|
|
TBitArray<>& BinUsageMask = PipelineBins;
|
|
int32 BinIndex = BinUsageMask.FindAndSetFirstZeroBit();
|
|
if (BinIndex == INDEX_NONE)
|
|
{
|
|
BinIndex = BinUsageMask.Add(true);
|
|
}
|
|
|
|
check(int32(uint16(BinIndex)) == BinIndex && PipelineBins.Num() <= int32(MAX_uint16));
|
|
return uint16(BinIndex);
|
|
}
|
|
|
|
void FNaniteShadingPipelines::ReleaseBin(uint16 BinIndex)
|
|
{
|
|
check(IsBinAllocated(BinIndex));
|
|
if (BinIndex < PipelineBins.Num())
|
|
{
|
|
PipelineBins[BinIndex] = false;
|
|
}
|
|
}
|
|
|
|
bool FNaniteShadingPipelines::IsBinAllocated(uint16 BinIndex) const
|
|
{
|
|
return BinIndex < PipelineBins.Num() ? PipelineBins[BinIndex] : false;
|
|
}
|
|
|
|
uint32 FNaniteShadingPipelines::GetBinCount() const
|
|
{
|
|
return PipelineBins.FindLast(true) + 1;
|
|
}
|
|
|
|
FNaniteShadingBin FNaniteShadingPipelines::Register(const FNaniteShadingPipeline& InShadingPipeline)
|
|
{
|
|
FNaniteShadingBin ShadingBin;
|
|
|
|
const FShadingHash ShadingPipelineHash = PipelineMap.ComputeHash(InShadingPipeline);
|
|
FShadingId ShadingBinId = PipelineMap.FindOrAddIdByHash(ShadingPipelineHash, InShadingPipeline, FNaniteShadingEntry());
|
|
ShadingBin.BinId = ShadingBinId.GetIndex();
|
|
|
|
FNaniteShadingEntry& ShadingEntry = PipelineMap.GetByElementId(ShadingBinId).Value;
|
|
if (ShadingEntry.ReferenceCount == 0)
|
|
{
|
|
// First reference
|
|
ShadingEntry.ShadingPipeline = MakeShared<FNaniteShadingPipeline>(InShadingPipeline);
|
|
ShadingEntry.BinIndex = AllocateBin();
|
|
bBuildIdList = true;
|
|
}
|
|
|
|
++ShadingEntry.ReferenceCount;
|
|
|
|
ShadingBin.BinIndex = ShadingEntry.BinIndex;
|
|
return ShadingBin;
|
|
}
|
|
|
|
void FNaniteShadingPipelines::Unregister(const FNaniteShadingBin& InShadingBin)
|
|
{
|
|
FShadingId ShadingBinId(InShadingBin.BinId);
|
|
check(ShadingBinId.IsValid());
|
|
|
|
FNaniteShadingEntry& ShadingEntry = PipelineMap.GetByElementId(ShadingBinId).Value;
|
|
|
|
check(ShadingEntry.ReferenceCount > 0);
|
|
--ShadingEntry.ReferenceCount;
|
|
if (ShadingEntry.ReferenceCount == 0)
|
|
{
|
|
ReleaseBin(ShadingEntry.BinIndex);
|
|
PipelineMap.RemoveByElementId(ShadingBinId);
|
|
bBuildIdList = true;
|
|
}
|
|
}
|
|
|
|
void FNaniteShadingPipelines::BuildIdList()
|
|
{
|
|
if (bBuildIdList)
|
|
{
|
|
ShadingIdList.Reset(PipelineMap.Num());
|
|
|
|
for (auto Iter = PipelineMap.begin(); Iter != PipelineMap.end(); ++Iter)
|
|
{
|
|
ShadingIdList.Add(Iter.GetElementId());
|
|
}
|
|
|
|
bBuildIdList = false;
|
|
}
|
|
}
|
|
|
|
const TConstArrayView<const FNaniteShadingPipelines::FShadingId> FNaniteShadingPipelines::GetIdList() const
|
|
{
|
|
check(!bBuildIdList);
|
|
return ShadingIdList;
|
|
}
|
|
|
|
static void ComputeMaterialRelevance_Thread(
|
|
const ERHIFeatureLevel::Type InFeatureLevel,
|
|
const FNaniteShadingPipelineMap& InPipelineMap,
|
|
const FNaniteShadingPipelines::FShadingId& InShadingId,
|
|
FMaterialRelevance& OutMaterialRelevance
|
|
)
|
|
{
|
|
const FNaniteShadingEntry& ShadingEntry = InPipelineMap.GetByElementId(InShadingId).Value;
|
|
|
|
if (ShadingEntry.ShadingPipeline.IsValid())
|
|
{
|
|
const FMaterialRenderProxy* MaterialProxy = ShadingEntry.ShadingPipeline->MaterialProxy;
|
|
const FMaterial* Material = ShadingEntry.ShadingPipeline->Material;
|
|
if (MaterialProxy && Material)
|
|
{
|
|
const UMaterialInterface* MaterialInterface = MaterialProxy->GetMaterialInterface();
|
|
if (MaterialInterface)
|
|
{
|
|
OutMaterialRelevance |= MaterialInterface->GetRelevance_Concurrent(InFeatureLevel);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FNaniteShadingPipelines::ComputeRelevance(ERHIFeatureLevel::Type InFeatureLevel)
|
|
{
|
|
// Reset relevance
|
|
CombinedRelevance = FPrimitiveViewRelevance();
|
|
|
|
struct FRelevanceContext
|
|
{
|
|
FMaterialRelevance MaterialRelevance{};
|
|
};
|
|
|
|
TArray<FRelevanceContext, TInlineAllocator<8>> RelevanceContexts;
|
|
|
|
BuildIdList();
|
|
|
|
if (ShadingIdList.Num() > 0)
|
|
{
|
|
CombinedRelevance.bDrawRelevance = true;
|
|
CombinedRelevance.bStaticRelevance = true;
|
|
CombinedRelevance.bRenderInMainPass = true;
|
|
CombinedRelevance.bShadowRelevance = true;
|
|
|
|
// Nanite::GetSupportsCustomDepthRendering() && ShouldRenderCustomDepth();
|
|
CombinedRelevance.bRenderCustomDepth = false; // TODO: Unsupported in fast path
|
|
|
|
// GetLightingChannelMask() != GetDefaultLightingChannelMask();
|
|
CombinedRelevance.bUsesLightingChannels = false; // TODO: Unsupported in fast path
|
|
|
|
if (GNaniteCacheRelevanceParallel && FApp::ShouldUseThreadingForPerformance())
|
|
{
|
|
ParallelForWithTaskContext(
|
|
RelevanceContexts,
|
|
ShadingIdList.Num(),
|
|
[this, InFeatureLevel](FRelevanceContext& Context, int32 Index)
|
|
{
|
|
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
|
|
const FNaniteShadingPipelines::FShadingId& ShadingId = ShadingIdList[Index];
|
|
ComputeMaterialRelevance_Thread(InFeatureLevel, PipelineMap, ShadingId, Context.MaterialRelevance);
|
|
}
|
|
);
|
|
|
|
for (int32 MergeIndex = 1; MergeIndex < RelevanceContexts.Num(); ++MergeIndex)
|
|
{
|
|
// Update combined material relevance
|
|
RelevanceContexts[0].MaterialRelevance |= RelevanceContexts[MergeIndex].MaterialRelevance;
|
|
}
|
|
|
|
// Apply combined material relevance to combined primitive view relevance
|
|
RelevanceContexts[0].MaterialRelevance.SetPrimitiveViewRelevance(CombinedRelevance);
|
|
}
|
|
else
|
|
{
|
|
FMaterialRelevance MaterialRelevance{};
|
|
|
|
for (const FNaniteShadingPipelines::FShadingId& ShadingId : ShadingIdList)
|
|
{
|
|
// Update combined material relevance
|
|
ComputeMaterialRelevance_Thread(InFeatureLevel, PipelineMap, ShadingId, MaterialRelevance);
|
|
}
|
|
|
|
// Apply combined material relevance to combined primitive view relevance
|
|
MaterialRelevance.SetPrimitiveViewRelevance(CombinedRelevance);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct FLumenShadingBinEntry
|
|
{
|
|
FLumenShadingBinEntry(int32 InBuildIndex, const FNaniteShadingBin& InShadingBin)
|
|
: BuildIndex(InBuildIndex)
|
|
, ShadingBin(InShadingBin)
|
|
{
|
|
}
|
|
|
|
inline friend uint32 GetTypeHash(const FLumenShadingBinEntry& InEntry)
|
|
{
|
|
return uint32(InEntry.ShadingBin.BinId);
|
|
}
|
|
|
|
inline bool operator==(const FLumenShadingBinEntry& Other) const
|
|
{
|
|
return ShadingBin == Other.ShadingBin;
|
|
}
|
|
|
|
int32 BuildIndex = INDEX_NONE;
|
|
FNaniteShadingBin ShadingBin;
|
|
};
|
|
|
|
BEGIN_SHADER_PARAMETER_STRUCT(FLumenMeshCapturePassParameters, )
|
|
SHADER_PARAMETER_STRUCT_INCLUDE(FViewShaderParameters, View)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FNaniteRasterUniformParameters, NaniteRaster)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FNaniteShadingUniformParameters, NaniteShading)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FOpaqueBasePassUniformParameters, BasePass)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FLumenCardPassUniformParameters, CardPass)
|
|
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FLumenCardOutputs, LumenCardOutputs)
|
|
END_SHADER_PARAMETER_STRUCT()
|
|
|
|
void DispatchLumenMeshCapturePass(
|
|
FRDGBuilder& GraphBuilder,
|
|
FScene& Scene,
|
|
FViewInfo* SharedView,
|
|
TArrayView<const FCardPageRenderData> CardPagesToRender,
|
|
const Nanite::FRasterResults& RasterResults,
|
|
const Nanite::FRasterContext& RasterContext,
|
|
FLumenCardPassUniformParameters* PassUniformParameters,
|
|
FRDGBufferSRVRef RectMinMaxBufferSRV,
|
|
uint32 NumRects,
|
|
FIntPoint ViewportSize,
|
|
FRDGTextureRef AlbedoAtlasTexture,
|
|
FRDGTextureRef NormalAtlasTexture,
|
|
FRDGTextureRef EmissiveAtlasTexture,
|
|
FRDGTextureRef DepthAtlasTexture
|
|
)
|
|
{
|
|
checkSlow(DoesPlatformSupportNanite(GMaxRHIShaderPlatform));
|
|
checkSlow(DoesPlatformSupportLumenGI(GMaxRHIShaderPlatform));
|
|
|
|
LLM_SCOPE_BYTAG(Nanite);
|
|
RDG_EVENT_SCOPE(GraphBuilder, "Nanite::LumenMeshCapturePass");
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite_LumenMeshCapturePass);
|
|
|
|
const FRDGSystemTextures& SystemTextures = FRDGSystemTextures::Get(GraphBuilder);
|
|
|
|
FNaniteShadingCommands& ShadingCommands = Scene.NaniteShadingCommands[ENaniteMeshPass::LumenCardCapture];
|
|
ShadingCommands.SetupTask.Wait();
|
|
|
|
struct FLumenCaptureTile
|
|
{
|
|
// Top Left X: 8 bits (tile x in card atlas) - multiplied by 8 and added to card view rect min.x in shader
|
|
// Top Left Y: 8 bits (tile y in card atlas) - multiplied by 8 and added to card view rect min.y in shader
|
|
// Card Index: 16 bits
|
|
uint32 Packed;
|
|
};
|
|
|
|
struct FLumenCapturePass
|
|
{
|
|
FNaniteShadingBin ShadingBin;
|
|
TArray<uint16, TInlineAllocator<64>> ViewIndices;
|
|
uint32 TotalTileCount = 0;
|
|
|
|
bool operator<(const FLumenCapturePass& Other) const
|
|
{
|
|
return ShadingBin.BinIndex < Other.ShadingBin.BinIndex;
|
|
}
|
|
};
|
|
|
|
struct FLumenShadingBinMeta
|
|
{
|
|
uint32 DataByteOffset;
|
|
};
|
|
|
|
struct FLumenCaptureContext
|
|
{
|
|
uint32 TotalPassCount = 0;
|
|
uint32 TotalTileCount = 0;
|
|
|
|
TArray<FLumenCapturePass, SceneRenderingAllocator> Passes;
|
|
TArray<uint32, SceneRenderingAllocator> ViewIndices;
|
|
TArray<Nanite::FPackedView, SceneRenderingAllocator> PackedViews;
|
|
|
|
uint32 ShadingBinCount = 0;
|
|
|
|
uint32 NumBytes_Meta = 0;
|
|
uint32 NumBytes_Data = 0;
|
|
|
|
uint32 MaxShadingBin = 0u;
|
|
|
|
TArray<uint32, SceneRenderingAllocator> ShadingBinData;
|
|
};
|
|
|
|
FLumenCaptureContext& CaptureContext = *GraphBuilder.AllocObject<FLumenCaptureContext>();
|
|
|
|
GraphBuilder.AddSetupTask([&CaptureContext, CardPagesToRender, &Scene, ViewportSize]
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(BuildLumenMeshCaptureMaterialPasses);
|
|
|
|
CaptureContext.Passes.Reserve(CardPagesToRender.Num());
|
|
CaptureContext.PackedViews.Reserve(CardPagesToRender.Num());
|
|
CaptureContext.MaxShadingBin = 0u;
|
|
|
|
CaptureContext.TotalTileCount = 0;
|
|
|
|
// Determine unique list of shading bins
|
|
Experimental::TRobinHoodHashSet<FLumenShadingBinEntry> CapturePassSet;
|
|
|
|
for (int32 CardPageIndex = 0; CardPageIndex < CardPagesToRender.Num(); ++CardPageIndex)
|
|
{
|
|
const FCardPageRenderData& CardPageRenderData = CardPagesToRender[CardPageIndex];
|
|
check((CardPageRenderData.CardCaptureAtlasRect.Min.X & 7u) == 0 &&
|
|
(CardPageRenderData.CardCaptureAtlasRect.Min.Y & 7u) == 0);
|
|
|
|
if (!CardPageRenderData.NeedsRender())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
const uint32 CardWidth = CardPageRenderData.CardCaptureAtlasRect.Width();
|
|
const uint32 CardHeight = CardPageRenderData.CardCaptureAtlasRect.Height();
|
|
check((CardWidth & 7u) == 0 && (CardHeight & 7u) == 0);
|
|
|
|
const uint32 TilesWide = CardWidth >> 3u;
|
|
const uint32 TilesTall = CardHeight >> 3u;
|
|
check(TilesWide <= 256 && TilesTall <= 256);
|
|
|
|
const uint32 TileCount = TilesWide * TilesTall;
|
|
|
|
for (const FNaniteShadingBin& ShadingBin : CardPageRenderData.NaniteShadingBins)
|
|
{
|
|
const FLumenShadingBinEntry& ShadingBinEntry = *CapturePassSet.FindOrAdd(FLumenShadingBinEntry(CaptureContext.Passes.Num(), ShadingBin));
|
|
|
|
if (ShadingBinEntry.BuildIndex >= CaptureContext.Passes.Num())
|
|
{
|
|
FLumenCapturePass CapturePass;
|
|
CapturePass.ShadingBin = ShadingBin;
|
|
CaptureContext.Passes.Emplace(CapturePass);
|
|
CaptureContext.MaxShadingBin = FMath::Max<uint32>(CaptureContext.MaxShadingBin, uint32(ShadingBin.BinIndex));
|
|
}
|
|
|
|
CaptureContext.Passes[ShadingBinEntry.BuildIndex].ViewIndices.Add(CardPageIndex);
|
|
CaptureContext.Passes[ShadingBinEntry.BuildIndex].TotalTileCount += TileCount;
|
|
|
|
CaptureContext.TotalTileCount += TileCount;
|
|
++CaptureContext.TotalPassCount;
|
|
}
|
|
|
|
//check(CaptureContext.Passes.Num() > 0);
|
|
}
|
|
|
|
if (CaptureContext.Passes.Num() > 0)
|
|
{
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(Sort);
|
|
CaptureContext.Passes.Sort();
|
|
}
|
|
|
|
CaptureContext.ShadingBinCount = CaptureContext.MaxShadingBin + 1u;
|
|
|
|
CaptureContext.NumBytes_Meta = CaptureContext.ShadingBinCount * sizeof(FLumenShadingBinMeta);
|
|
CaptureContext.NumBytes_Data = CaptureContext.TotalTileCount * sizeof(FLumenCaptureTile);
|
|
|
|
CaptureContext.ShadingBinData.SetNumUninitialized((CaptureContext.NumBytes_Meta + CaptureContext.NumBytes_Data) >> 2u);
|
|
uint8* ShadingBinDataPtr = reinterpret_cast<uint8*>(CaptureContext.ShadingBinData.GetData());
|
|
|
|
uint32 DataWriteOffset = CaptureContext.NumBytes_Meta;
|
|
|
|
// We only need to zero the shading bin meta data headers
|
|
FMemory::Memzero(ShadingBinDataPtr, CaptureContext.NumBytes_Meta);
|
|
|
|
for (FLumenCapturePass& CapturePass : CaptureContext.Passes)
|
|
{
|
|
FLumenShadingBinMeta& MetaEntry = reinterpret_cast<FLumenShadingBinMeta*>(ShadingBinDataPtr)[CapturePass.ShadingBin.BinIndex];
|
|
MetaEntry.DataByteOffset = DataWriteOffset;
|
|
|
|
DataWriteOffset += (sizeof(FLumenCaptureTile) * CapturePass.TotalTileCount);
|
|
|
|
FLumenCaptureTile* TileData = reinterpret_cast<FLumenCaptureTile*>(ShadingBinDataPtr + MetaEntry.DataByteOffset);
|
|
|
|
for (uint32 ViewIndex : CapturePass.ViewIndices)
|
|
{
|
|
const FCardPageRenderData& CardPageRenderData = CardPagesToRender[ViewIndex];
|
|
const uint32 TilesWide = CardPageRenderData.CardCaptureAtlasRect.Width() >> 3u;
|
|
const uint32 TilesTall = CardPageRenderData.CardCaptureAtlasRect.Height() >> 3u;
|
|
for (uint32 TileX = 0; TileX < TilesWide; ++TileX)
|
|
{
|
|
for (uint32 TileY = 0; TileY < TilesTall; ++TileY)
|
|
{
|
|
FLumenCaptureTile* Tile = new(TileData) FLumenCaptureTile;
|
|
Tile->Packed = (TileX & 0xFFu) | ((TileY & 0xFFu) << 8u) | ((ViewIndex & 0xFFFFu) << 16u);
|
|
++TileData;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const FCardPageRenderData& CardPageRenderData : CardPagesToRender)
|
|
{
|
|
Nanite::FPackedViewParams Params;
|
|
Params.ViewMatrices = CardPageRenderData.ViewMatrices;
|
|
Params.PrevViewMatrices = CardPageRenderData.ViewMatrices;
|
|
Params.ViewRect = CardPageRenderData.CardCaptureAtlasRect;
|
|
Params.RasterContextSize = ViewportSize;
|
|
Params.MaxPixelsPerEdgeMultipler = 1.0f;
|
|
|
|
CaptureContext.PackedViews.Add(Nanite::CreatePackedView(Params));
|
|
}
|
|
});
|
|
|
|
FRDGBuffer* PackedViewBuffer = CreateStructuredBuffer(
|
|
GraphBuilder,
|
|
TEXT("Nanite.PackedViews"),
|
|
CaptureContext.PackedViews.GetTypeSize(),
|
|
[&PackedViews = CaptureContext.PackedViews] { return FMath::RoundUpToPowerOfTwo(PackedViews.Num()); },
|
|
[&PackedViews = CaptureContext.PackedViews] { return PackedViews.GetData(); },
|
|
[&PackedViews = CaptureContext.PackedViews] { return PackedViews.Num() * PackedViews.GetTypeSize(); }
|
|
);
|
|
|
|
FRDGBuffer* ShadingBinData = CreateByteAddressBuffer(
|
|
GraphBuilder,
|
|
TEXT("Nanite.ShadingBinData"),
|
|
[&BinData = CaptureContext.ShadingBinData]() -> auto& { return BinData; }
|
|
);
|
|
|
|
FLumenMeshCapturePassParameters* LumenCardPassParameters = GraphBuilder.AllocParameters<FLumenMeshCapturePassParameters>();
|
|
|
|
{
|
|
// NaniteRaster Uniform Buffer
|
|
{
|
|
FNaniteRasterUniformParameters* UniformParameters = GraphBuilder.AllocParameters<FNaniteRasterUniformParameters>();
|
|
|
|
UniformParameters->PageConstants = RasterResults.PageConstants;
|
|
UniformParameters->MaxNodes = Nanite::FGlobalResources::GetMaxNodes();
|
|
UniformParameters->MaxVisibleClusters = Nanite::FGlobalResources::GetMaxVisibleClusters();
|
|
UniformParameters->MaxCandidatePatches = Nanite::FGlobalResources::GetMaxCandidatePatches();
|
|
UniformParameters->MaxPatchesPerGroup = RasterResults.MaxPatchesPerGroup;
|
|
UniformParameters->MeshPass = RasterResults.MeshPass;
|
|
UniformParameters->InvDiceRate = RasterResults.InvDiceRate;
|
|
UniformParameters->RenderFlags = RasterResults.RenderFlags;
|
|
UniformParameters->DebugFlags = RasterResults.DebugFlags;
|
|
|
|
LumenCardPassParameters->NaniteRaster = GraphBuilder.CreateUniformBuffer(UniformParameters);
|
|
}
|
|
|
|
// NaniteShading Uniform Buffer
|
|
{
|
|
FNaniteShadingUniformParameters* UniformParameters = GraphBuilder.AllocParameters<FNaniteShadingUniformParameters>();
|
|
|
|
UniformParameters->ClusterPageData = Nanite::GStreamingManager.GetClusterPageDataSRV(GraphBuilder);
|
|
UniformParameters->HierarchyBuffer = Nanite::GStreamingManager.GetHierarchySRV(GraphBuilder);
|
|
UniformParameters->VisibleClustersSWHW = GraphBuilder.CreateSRV(RasterResults.VisibleClustersSWHW);
|
|
|
|
UniformParameters->VisBuffer64 = RasterContext.VisBuffer64;
|
|
UniformParameters->DbgBuffer64 = SystemTextures.Black;
|
|
UniformParameters->DbgBuffer32 = SystemTextures.Black;
|
|
UniformParameters->ShadingMask = SystemTextures.Black;
|
|
|
|
UniformParameters->ShadingBinData = GraphBuilder.CreateSRV(ShadingBinData);
|
|
|
|
UniformParameters->MultiViewEnabled = 1;
|
|
UniformParameters->MultiViewIndices = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer<uint32>(GraphBuilder));
|
|
UniformParameters->MultiViewRectScaleOffsets = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer<FVector4>(GraphBuilder));
|
|
UniformParameters->InViews = GraphBuilder.CreateSRV(PackedViewBuffer);
|
|
|
|
LumenCardPassParameters->NaniteShading = GraphBuilder.CreateUniformBuffer(UniformParameters);
|
|
}
|
|
}
|
|
|
|
CardPagesToRender[0].PatchView(&Scene, SharedView);
|
|
LumenCardPassParameters->View = SharedView->GetShaderParameters();
|
|
LumenCardPassParameters->Scene = SharedView->GetSceneUniforms().GetBuffer(GraphBuilder);
|
|
LumenCardPassParameters->CardPass = GraphBuilder.CreateUniformBuffer(PassUniformParameters);
|
|
|
|
{
|
|
FLumenCardOutputs* Outputs = GraphBuilder.AllocParameters<FLumenCardOutputs>();
|
|
|
|
// No possibility of read/write hazard due to fully resolved vbuffer/materials
|
|
const ERDGUnorderedAccessViewFlags OutTargetFlags = ERDGUnorderedAccessViewFlags::SkipBarrier;
|
|
|
|
Outputs->OutTarget0 = GraphBuilder.CreateUAV(AlbedoAtlasTexture, OutTargetFlags);
|
|
Outputs->OutTarget1 = GraphBuilder.CreateUAV(NormalAtlasTexture, OutTargetFlags);
|
|
Outputs->OutTarget2 = GraphBuilder.CreateUAV(EmissiveAtlasTexture, OutTargetFlags);
|
|
|
|
LumenCardPassParameters->LumenCardOutputs = GraphBuilder.CreateUniformBuffer(Outputs);
|
|
}
|
|
|
|
GraphBuilder.AddPass(
|
|
RDG_EVENT_NAME("LumenShadeCS"),
|
|
LumenCardPassParameters,
|
|
ERDGPassFlags::Compute,
|
|
[LumenCardPassParameters, SharedView, &ShadingCommands, &CapturePasses = CaptureContext.Passes]
|
|
(FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
// This is processed within the RDG pass lambda, so the setup task should be complete by now.
|
|
check(ShadingCommands.BuildCommandsTask.IsCompleted());
|
|
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(LumenEmitGBuffer);
|
|
SCOPED_DRAW_EVENTF(RHICmdList, LumenEmitGBuffer, TEXT("%d materials"), CapturePasses.Num());
|
|
|
|
FRHIBatchedShaderParameters& BatchedParameters = RHICmdList.GetScratchShaderParameters();
|
|
check(!BatchedParameters.HasParameters());
|
|
|
|
for (const FLumenCapturePass& CapturePass : CapturePasses)
|
|
{
|
|
const int32 CommandIndex = ShadingCommands.CommandLookup[CapturePass.ShadingBin.BinIndex];
|
|
FNaniteShadingCommand& ShadingCommand = ShadingCommands.Commands[CommandIndex];
|
|
check(ShadingCommand.ShadingBin == CapturePass.ShadingBin.BinIndex);
|
|
|
|
if (!Nanite::PrepareShadingCommand(ShadingCommand))
|
|
{
|
|
break;
|
|
}
|
|
|
|
#if WANTS_DRAW_MESH_EVENTS
|
|
SCOPED_CONDITIONAL_DRAW_EVENTF(RHICmdList, LumenCS, GShowMaterialDrawEvents != 0, TEXT("%s [%d tiles]"), GetShadingMaterialName(ShadingCommand.Pipeline->MaterialProxy), CapturePass.TotalTileCount);
|
|
#endif
|
|
|
|
TRDGUniformBufferRef<FLumenCardOutputs> LumenCardOutputs = LumenCardPassParameters->LumenCardOutputs.GetUniformBuffer();
|
|
|
|
// Record parameters
|
|
FRHIBatchedShaderParameters& ShadingParameters = RHICmdList.GetScratchShaderParameters();
|
|
Nanite::RecordLumenCardParameters(ShadingParameters, ShadingCommand, LumenCardPassParameters->LumenCardOutputs->GetRHIRef());
|
|
|
|
// Record dispatch
|
|
{
|
|
FRHIComputeShader* ComputeShaderRHI = ShadingCommand.Pipeline->ComputeShader;
|
|
SetComputePipelineState(RHICmdList, ComputeShaderRHI);
|
|
|
|
if (GRHISupportsShaderRootConstants)
|
|
{
|
|
RHICmdList.SetShaderRootConstants(ShadingCommand.PassData);
|
|
}
|
|
|
|
RHICmdList.SetBatchedShaderParameters(ComputeShaderRHI, ShadingParameters);
|
|
RHICmdList.DispatchComputeShader(CapturePass.TotalTileCount, 1, 1);
|
|
}
|
|
}
|
|
}
|
|
);
|
|
|
|
// Mark scene stencil for all Nanite pixels
|
|
{
|
|
MarkSceneStencilRects(
|
|
GraphBuilder,
|
|
RasterContext,
|
|
Scene,
|
|
SharedView,
|
|
ViewportSize,
|
|
NumRects,
|
|
RectMinMaxBufferSRV,
|
|
DepthAtlasTexture
|
|
);
|
|
}
|
|
|
|
// Emit scene depth values for all Nanite pixels
|
|
{
|
|
EmitSceneDepthRects(
|
|
GraphBuilder,
|
|
RasterContext,
|
|
Scene,
|
|
SharedView,
|
|
ViewportSize,
|
|
NumRects,
|
|
RectMinMaxBufferSRV,
|
|
DepthAtlasTexture
|
|
);
|
|
}
|
|
}
|