Files
UnrealEngine/Engine/Source/Runtime/Renderer/Private/Nanite/NaniteShared.h
2025-05-18 13:04:45 +08:00

939 lines
29 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "SceneView.h"
#include "ShaderParameterMacros.h"
#include "GlobalShader.h"
#include "UnifiedBuffer.h"
#include "RenderGraphResources.h"
#include "RenderGraphUtils.h"
#include "Rendering/NaniteResources.h"
#include "NaniteFeedback.h"
#include "MaterialDomain.h"
#include "MaterialShaderType.h"
#include "MaterialShader.h"
#include "Misc/ScopeRWLock.h"
#include "Experimental/Containers/RobinHoodHashTable.h"
#include "LightMapRendering.h" // TODO: Remove with later refactor (moving Nanite shading into its own files)
#include "RenderUtils.h"
#include "PrimitiveViewRelevance.h"
DECLARE_LOG_CATEGORY_EXTERN(LogNanite, Warning, All);
DECLARE_GPU_STAT_NAMED_EXTERN(NaniteDebug, TEXT("Nanite Debug"));
struct FSceneTextures;
struct FDBufferTextures;
namespace Nanite
{
// Counterpart to FPackedNaniteView in NanitePackedNaniteView.ush
struct FPackedView
{
FMatrix44f SVPositionToTranslatedWorld;
FMatrix44f ViewToTranslatedWorld;
FMatrix44f TranslatedWorldToView;
FMatrix44f TranslatedWorldToClip;
FMatrix44f ViewToClip;
FMatrix44f ClipToRelativeWorld;
FMatrix44f PrevTranslatedWorldToView;
FMatrix44f PrevTranslatedWorldToClip;
FMatrix44f PrevViewToClip;
FMatrix44f PrevClipToRelativeWorld;
FIntVector4 ViewRect;
FVector4f ViewSizeAndInvSize;
FVector4f ClipSpaceScaleOffset;
FVector4f MaterialCacheUnwrapMinAndInvSize;
FVector4f MaterialCachePageAdvanceAndInvCount;
FVector3f PreViewTranslationHigh;
float ViewOriginHighX;
FVector3f PrevPreViewTranslationHigh;
float ViewOriginHighY;
FVector3f PrevPreViewTranslationLow;
float CullingViewMinRadiusTestFactorSq;
FVector3f ViewOriginLow;
float ViewOriginHighZ;
FVector3f CullingViewOriginTranslatedWorld;
float RangeBasedCullingDistance;
FVector3f ViewForward;
float NearPlane;
FVector4f TranslatedGlobalClipPlane;
FVector3f PreViewTranslationLow;
float CullingViewScreenMultipleSq;
FVector2f LODScales;
uint32 InstanceOcclusionQueryMask;
uint32 StreamingPriorityCategory_AndFlags;
FIntVector4 TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ;
FIntVector4 HZBTestViewRect; // In full resolution
FUintVector4 FirstPersonTransformRowsExceptRow2Z; // Packed into half floats
uint32 FirstPersonTransformRow2Z;
uint32 LightingChannelMask;
int32 SceneRendererPrimaryViewId; // The primary view ID either refers to this view itself, OR it refers (in the case of a secondsry, shadow view mostly) to the relevant primary view.
uint32 Padding1;
FVector2f DynamicDepthCullRange;
uint32 Padding2[2];
/**
* Calculates the LOD scales assuming view size and projection is already set up.
* TODO: perhaps more elegant/robust if this happened at construction time, and input was a non-packed NaniteView.
* Note: depends on the global 'GNaniteMaxPixelsPerEdge'.
*/
void UpdateLODScales(const float NaniteMaxPixelsPerEdge, const float MinPixelsPerEdgeHW);
};
class FPackedViewArray
{
public:
using ArrayType = TArray<FPackedView, SceneRenderingAllocator>;
using TaskLambdaType = TFunction<void(ArrayType&)>;
/** Creates a packed view array for a single element. */
static FPackedViewArray* Create(FRDGBuilder& GraphBuilder, const FPackedView& View);
/** Creates a packed view array for an existing array. */
static FPackedViewArray* Create(FRDGBuilder& GraphBuilder, uint32 NumViews, ArrayType&& Views);
/** Creates a packed view array by launching an RDG setup task. */
static FPackedViewArray* CreateWithSetupTask(FRDGBuilder& GraphBuilder, uint32 NumViews, TaskLambdaType&& TaskLambda, UE::Tasks::FPipe* Pipe = nullptr, bool bExecuteInTask = true);
/** Returns the view array, and will sync the setup task if one exists. */
const ArrayType& GetViews() const
{
SetupTask.Wait();
check(Views.Num() == NumViews);
return Views;
}
const uint32 NumViews;
UE::Tasks::FTask GetSetupTask() const
{
return SetupTask;
}
private:
FPackedViewArray(uint32 InNumViews)
: NumViews(InNumViews)
{}
// Packed views containing all expanded mips.
ArrayType Views;
// The task that is generating the Views data array, if any.
mutable UE::Tasks::FTask SetupTask;
RDG_FRIEND_ALLOCATOR_FRIEND(FPackedViewArray);
};
struct FPackedViewParams
{
FViewMatrices ViewMatrices;
FViewMatrices PrevViewMatrices;
FIntRect ViewRect;
FIntPoint RasterContextSize;
uint32 StreamingPriorityCategory = 0;
float MinBoundsRadius = 0.0f;
float LODScaleFactor = 1.0f;
float ViewLODDistanceFactor = 1.0f;
uint32 Flags = NANITE_VIEW_FLAG_NEAR_CLIP;
int32 TargetLayerIndex = INDEX_NONE;
int32 PrevTargetLayerIndex = INDEX_NONE;
int32 TargetMipLevel = 0;
int32 TargetMipCount = 1;
float RangeBasedCullingDistance = 0.0f; // not used unless the flag NANITE_VIEW_FLAG_DISTANCE_CULL is set
FIntRect HZBTestViewRect = {0, 0, 0, 0};
float MaxPixelsPerEdgeMultipler = 1.0f;
bool bUseCullingViewOverrides = false;
FVector CullingViewOrigin = FVector::ZeroVector;
float CullingViewScreenMultipleSq = 0.0f;
float CullingViewMinRadiusTestFactorSq = 0.0f; // not used unless the flag NANITE_VIEW_MIN_SCREEN_RADIUS_CULL is set and support is compiled into the culling shader
FPlane GlobalClippingPlane = {0.0f, 0.0f, 0.0f, 0.0f};
// Identifies the bit in the GPUScene::InstanceVisibilityMaskBuffer associated with the current view.
// Visibility mask buffer may be used if this is non-zero.
uint32 InstanceOcclusionQueryMask = 0;
uint32 LightingChannelMask = 0b111; // All channels are visible by default
bool bUseLightingChannelMask = false;
int32 SceneRendererPrimaryViewId = -1;
// clip-space Far/Near extra culling range for dynamic geometry (for VSM)
// discards geometry that fails the culling test See FBoxCull::Frustum
// Defaults to (0.0f, FLT_MAX) which means no extra culling
FVector2f DynamicDepthCullRange = FVector2f(0.0f, FLT_MAX);
};
// Helper function to setup the overrides for a culling view.
// This is used for shadow views that have an associated "main" view that drives distance/screensize elements of the culling.
void SetCullingViewOverrides(FViewInfo const* InCullingView, Nanite::FPackedViewParams& InOutParams);
FPackedView CreatePackedView(const FPackedViewParams& Params);
// Convenience function to pull relevant packed view parameters out of a FViewInfo
FPackedView CreatePackedViewFromViewInfo(
const FViewInfo& View,
FIntPoint RasterContextSize,
uint32 Flags,
uint32 StreamingPriorityCategory = 0,
float MinBoundsRadius = 0.0f,
float MaxPixelsPerEdgeMultipler = 1.0f,
/** Note: this rect should be in HZB space. */
const FIntRect* InHZBTestViewRect = nullptr
);
/** Whether to draw multiple FSceneView in one Nanite pass (as opposed to view by view). */
bool ShouldDrawSceneViewsInOneNanitePass(const FViewInfo& View);
struct FVisualizeResult
{
FRDGTextureRef ModeOutput;
FName ModeName;
int32 ModeID;
uint8 bCompositeScene : 1;
uint8 bSkippedTile : 1;
};
struct FBinningData
{
uint32 BinCount = 0;
FRDGBufferRef DataBuffer = nullptr;
FRDGBufferRef MetaBuffer = nullptr;
FRDGBufferRef IndirectArgs = nullptr;
};
struct FNodesAndClusterBatchesBuffer
{
TRefCountPtr<FRDGPooledBuffer> Buffer;
uint32 NumNodes = 0;
uint32 NumClusterBatches = 0;
};
/*
* GPU side buffers containing Nanite resource data.
*/
class FGlobalResources : public FRenderResource
{
public:
struct PassBuffers
{
// Used for statistics
TRefCountPtr<FRDGPooledBuffer> StatsRasterizeArgsSWHWBuffer;
};
// Used for statistics
uint32 StatsRenderFlags = 0;
uint32 StatsDebugFlags = 0;
const int32 MaxPickingBuffers = 4;
int32 PickingBufferWriteIndex = 0;
int32 PickingBufferNumPending = 0;
TArray<TUniquePtr<FRHIGPUBufferReadback>> PickingBuffers;
FNodesAndClusterBatchesBuffer MainAndPostNodesAndClusterBatchesBuffer;
public:
virtual void InitRHI(FRHICommandListBase& RHICmdList) override;
virtual void ReleaseRHI() override;
void Update(FRDGBuilder& GraphBuilder); // Called once per frame before any Nanite rendering has occurred.
static uint32 GetMaxCandidateClusters();
static uint32 GetMaxClusterBatches();
static uint32 GetMaxVisibleClusters();
static uint32 GetMaxNodes();
static uint32 GetMaxCandidatePatches();
static uint32 GetMaxVisiblePatches();
inline PassBuffers& GetMainPassBuffers() { return MainPassBuffers; }
inline PassBuffers& GetPostPassBuffers() { return PostPassBuffers; }
TRefCountPtr<FRDGPooledBuffer>& GetStatsBufferRef() { return StatsBuffer; }
TRefCountPtr<FRDGPooledBuffer>& GetShadingBinDataBufferRef() { return ShadingBinDataBuffer; }
TRefCountPtr<IPooledRenderTarget>& GetFastClearTileVisRef() { return FastClearTileVis; }
#if !UE_BUILD_SHIPPING
FFeedbackManager* GetFeedbackManager() { return FeedbackManager; }
#endif
private:
PassBuffers MainPassBuffers;
PassBuffers PostPassBuffers;
// Used for statistics
TRefCountPtr<FRDGPooledBuffer> StatsBuffer;
// Used for visualizations
TRefCountPtr<FRDGPooledBuffer> ShadingBinDataBuffer;
TRefCountPtr<IPooledRenderTarget> FastClearTileVis;
#if !UE_BUILD_SHIPPING
FFeedbackManager* FeedbackManager = nullptr;
#endif
};
extern TGlobalResource< FGlobalResources > GGlobalResources;
} // namespace Nanite
BEGIN_GLOBAL_SHADER_PARAMETER_STRUCT(FNaniteShadingUniformParameters, )
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, ClusterPageData)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, VisibleClustersSWHW)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, HierarchyBuffer)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<uint>, ShadingMask)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<UlongType>, VisBuffer64)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<UlongType>, DbgBuffer64)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<uint>, DbgBuffer32)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, ShadingBinData)
// Multi view
SHADER_PARAMETER(uint32, MultiViewEnabled)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<uint>, MultiViewIndices)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, MultiViewRectScaleOffsets)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<FPackedNaniteView>, InViews)
END_SHADER_PARAMETER_STRUCT()
extern TRDGUniformBufferRef<FNaniteShadingUniformParameters> CreateDebugNaniteShadingUniformBuffer(FRDGBuilder& GraphBuilder);
BEGIN_GLOBAL_SHADER_PARAMETER_STRUCT(FNaniteRasterUniformParameters, )
SHADER_PARAMETER(FIntVector4, PageConstants)
SHADER_PARAMETER(uint32, MaxNodes)
SHADER_PARAMETER(uint32, MaxVisibleClusters)
SHADER_PARAMETER(uint32, MaxCandidatePatches)
SHADER_PARAMETER(uint32, MaxPatchesPerGroup)
SHADER_PARAMETER(uint32, MeshPass)
SHADER_PARAMETER(float, InvDiceRate)
SHADER_PARAMETER(uint32, RenderFlags)
SHADER_PARAMETER(uint32, DebugFlags)
END_SHADER_PARAMETER_STRUCT()
extern TRDGUniformBufferRef<FNaniteRasterUniformParameters> CreateDebugNaniteRasterUniformBuffer(FRDGBuilder& GraphBuilder);
BEGIN_GLOBAL_SHADER_PARAMETER_STRUCT(FNaniteRayTracingUniformParameters, RENDERER_API)
SHADER_PARAMETER(FIntVector4, PageConstants)
SHADER_PARAMETER(uint32, MaxNodes)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, ClusterPageData)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, HierarchyBuffer)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<uint>, RayTracingDataBuffer)
END_SHADER_PARAMETER_STRUCT()
class FNaniteGlobalShader : public FGlobalShader
{
public:
FNaniteGlobalShader() = default;
FNaniteGlobalShader(const ShaderMetaType::CompiledShaderInitializerType& Initializer)
: FGlobalShader(Initializer)
{
}
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
return DoesPlatformSupportNanite(Parameters.Platform);
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1);
// Use the spline mesh texture when possible for performance
OutEnvironment.SetDefine(TEXT("USE_SPLINE_MESH_SCENE_RESOURCES"), UseSplineMeshSceneResources(Parameters.Platform));
// Force shader model 6.0+
OutEnvironment.CompilerFlags.Add(CFLAG_ForceDXC);
OutEnvironment.CompilerFlags.Add(CFLAG_HLSL2021);
OutEnvironment.CompilerFlags.Add(CFLAG_WarningsAsErrors);
}
};
class FNaniteMaterialShader : public FMaterialShader
{
public:
FNaniteMaterialShader() = default;
FNaniteMaterialShader(const ShaderMetaType::CompiledShaderInitializerType& Initializer)
: FMaterialShader(Initializer)
{
}
static bool IsVertexProgrammable(const FMaterialShaderParameters& MaterialParameters, bool bHWRasterShader)
{
const bool bPixelProgrammable = IsPixelProgrammable(MaterialParameters);
const bool bHasVertexUVs = bPixelProgrammable && (MaterialParameters.bHasVertexInterpolator || MaterialParameters.NumCustomizedUVs > 0);
const bool bHasTessellation = (!bHWRasterShader && MaterialParameters.bIsTessellationEnabled);
return MaterialParameters.bHasVertexPositionOffsetConnected || bHasVertexUVs || bHasTessellation || MaterialParameters.bSupportsMaterialCache;
}
static bool IsVertexProgrammable(uint32 MaterialBitFlags)
{
return (MaterialBitFlags & NANITE_MATERIAL_VERTEX_PROGRAMMABLE_FLAGS);
}
static bool IsPixelProgrammable(const FMaterialShaderParameters& MaterialParameters)
{
return MaterialParameters.bIsMasked || MaterialParameters.bHasPixelDepthOffsetConnected;
}
static bool IsPixelProgrammable(uint32 MaterialBitFlags)
{
return (MaterialBitFlags & NANITE_MATERIAL_PIXEL_PROGRAMMABLE_FLAGS);
}
static bool ShouldCompileProgrammablePermutation(
const FMaterialShaderParameters& MaterialParameters,
bool bPermutationVertexProgrammable,
bool bPermutationPixelProgrammable,
bool bHWRasterShader)
{
if (MaterialParameters.bIsDefaultMaterial)
{
return true;
}
// Custom materials should compile only the specific combination that is actually used
// TODO: The status of material attributes on the FMaterialShaderParameters is determined without knowledge of any static
// switches' values, and therefore when true could represent the set of materials that both enable them and do not. We could
// isolate a narrower set of required shaders if FMaterialShaderParameters reflected the status after static switches are
// applied.
//return IsVertexProgrammable(MaterialParameters, bHWRasterShader) == bPermutationVertexProgrammable &&
// IsPixelProgrammable(MaterialParameters) == bPermutationPixelProgrammable;
return (IsVertexProgrammable(MaterialParameters, bHWRasterShader) || !bPermutationVertexProgrammable) &&
(IsPixelProgrammable(MaterialParameters) || !bPermutationPixelProgrammable) &&
(bPermutationVertexProgrammable || bPermutationPixelProgrammable);
}
static bool ShouldCompilePixelPermutation(const FMaterialShaderPermutationParameters& Parameters)
{
// Always compile default material as the fast opaque "fixed function" raster path
bool bValidMaterial = Parameters.MaterialParameters.bIsDefaultMaterial;
// Compile this pixel shader if it requires programmable raster
if (Parameters.MaterialParameters.bIsUsedWithNanite && FNaniteMaterialShader::IsPixelProgrammable(Parameters.MaterialParameters))
{
bValidMaterial = true;
}
return
DoesPlatformSupportNanite(Parameters.Platform) &&
Parameters.MaterialParameters.MaterialDomain == MD_Surface &&
bValidMaterial;
}
static bool ShouldCompileVertexPermutation(const FMaterialShaderPermutationParameters& Parameters)
{
// Always compile default material as the fast opaque "fixed function" raster path
bool bValidMaterial = Parameters.MaterialParameters.bIsDefaultMaterial;
// Compile this vertex shader if it requires programmable raster
static const bool bHWRasterShader = true; // all vertex permutations are HWRaster
if (Parameters.MaterialParameters.bIsUsedWithNanite &&
FNaniteMaterialShader::IsVertexProgrammable(Parameters.MaterialParameters, bHWRasterShader))
{
bValidMaterial = true;
}
return
DoesPlatformSupportNanite(Parameters.Platform) &&
Parameters.MaterialParameters.MaterialDomain == MD_Surface &&
bValidMaterial;
}
static bool ShouldCompileComputePermutation(const FMaterialShaderPermutationParameters& Parameters)
{
// Always compile default material as the fast opaque "fixed function" raster path
bool bValidMaterial = Parameters.MaterialParameters.bIsDefaultMaterial;
// Compile this compute shader if it requires programmable raster
static const bool bHWRasterShader = false; // all compute permutations are SWRaster
if (Parameters.MaterialParameters.bIsUsedWithNanite &&
(IsVertexProgrammable(Parameters.MaterialParameters, bHWRasterShader) || IsPixelProgrammable(Parameters.MaterialParameters)))
{
bValidMaterial = true;
}
return
DoesPlatformSupportNanite(Parameters.Platform) &&
Parameters.MaterialParameters.MaterialDomain == MD_Surface &&
bValidMaterial;
}
static void ModifyCompilationEnvironment(const FMaterialShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FMaterialShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
// Force shader model 6.0+
OutEnvironment.CompilerFlags.Add(CFLAG_ForceDXC);
OutEnvironment.CompilerFlags.Add(CFLAG_HLSL2021);
OutEnvironment.CompilerFlags.Add(CFLAG_ShaderBundle);
OutEnvironment.CompilerFlags.Add(CFLAG_RootConstants);
OutEnvironment.CompilerFlags.Add(CFLAG_SupportsMinimalBindless);
OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1);
OutEnvironment.SetDefine(TEXT("NANITE_MATERIAL_SHADER"), 1);
OutEnvironment.SetDefine(TEXT("IS_NANITE_RASTER_PASS"), 1);
OutEnvironment.SetDefine(TEXT("IS_NANITE_PASS"), 1);
OutEnvironment.SetDefine(TEXT("NANITE_USE_SHADING_UNIFORM_BUFFER"), 0);
OutEnvironment.SetDefine(TEXT("NANITE_USE_RASTER_UNIFORM_BUFFER"), 1);
OutEnvironment.SetDefine(TEXT("NANITE_USE_VIEW_UNIFORM_BUFFER"), 0);
// Force definitions of GetObjectWorldPosition(), etc..
OutEnvironment.SetDefine(TEXT("HAS_PRIMITIVE_UNIFORM_BUFFER"), 1);
OutEnvironment.SetDefine(TEXT("ALWAYS_EVALUATE_WORLD_POSITION_OFFSET"),
Parameters.MaterialParameters.bAlwaysEvaluateWorldPositionOffset ? 1 : 0);
// Use the spline mesh texture when possible for performance
OutEnvironment.SetDefine(TEXT("USE_SPLINE_MESH_SCENE_RESOURCES"), UseSplineMeshSceneResources(Parameters.Platform));
}
};
class FMaterialRenderProxy;
class FHWRasterizePS;
class FHWRasterizeVS;
class FHWRasterizeMS;
class FMicropolyRasterizeCS;
struct FNaniteRasterPipeline
{
const FMaterialRenderProxy* RasterMaterial = nullptr;
FDisplacementScaling DisplacementScaling;
FDisplacementFadeRange DisplacementFadeRange;
bool bIsTwoSided : 1 = false;
bool bWPOEnabled : 1 = false;
bool bDisplacementEnabled : 1 = false;
bool bPerPixelEval : 1 = false;
bool bSplineMesh : 1 = false;
bool bSkinnedMesh : 1 = false;
bool bVoxel : 1 = false;
bool bHasWPODistance : 1 = false;
bool bHasPixelDistance : 1 = false;
bool bHasDisplacementFadeOut : 1 = false;
bool bFixedDisplacementFallback : 1 = false;
bool bCastShadow : 1 = false;
bool bVertexUVs : 1 = false;
static FNaniteRasterPipeline GetFixedFunctionPipeline(uint8 BinMask);
uint32 GetPipelineHash() const;
bool GetFallbackPipeline(FNaniteRasterPipeline& OutFallback) const;
FORCENOINLINE friend uint32 GetTypeHash(const FNaniteRasterPipeline& Other)
{
return Other.GetPipelineHash();
}
};
struct FNaniteRasterBin
{
int32 BinId = INDEX_NONE;
uint16 BinIndex = 0xFFFFu;
inline bool operator==(const FNaniteRasterBin& Other) const
{
return Other.BinId == BinId && Other.BinIndex == BinIndex;
}
inline bool operator!=(const FNaniteRasterBin& Other) const
{
return !(*this == Other);
}
inline bool IsValid() const
{
return *this != FNaniteRasterBin();
}
};
struct FNaniteRasterMaterialCacheKey
{
union
{
struct
{
uint32 FeatureLevel : 3;
uint32 bWPOEnabled : 1;
uint32 bPerPixelEval : 1;
uint32 bUseMeshShader : 1;
uint32 bUsePrimitiveShader : 1;
uint32 bDisplacementEnabled : 1;
uint32 bVisualizeActive : 1;
uint32 bHasVirtualShadowMap : 1;
uint32 bIsDepthOnly : 1;
uint32 bIsTwoSided : 1;
uint32 bCastShadow : 1;
uint32 bVoxel : 1;
uint32 bSplineMesh : 1;
uint32 bSkinnedMesh : 1;
uint32 bFixedDisplacementFallback : 1;
uint32 bUseWorkGraphSW : 1;
uint32 bUseWorkGraphHW : 1;
uint32 Unused : 13;
};
uint32 Packed = 0;
};
bool operator < (FNaniteRasterMaterialCacheKey Other) const
{
return Packed < Other.Packed;
}
bool operator == (FNaniteRasterMaterialCacheKey Other) const
{
return Packed == Other.Packed;
}
bool operator != (FNaniteRasterMaterialCacheKey Other) const
{
return Packed != Other.Packed;
}
};
static_assert((int32)ERHIFeatureLevel::Num <= 8);
static_assert(sizeof(FNaniteRasterMaterialCacheKey) == sizeof(uint32));
inline uint32 GetTypeHash(const FNaniteRasterMaterialCacheKey& Key)
{
return Key.Packed;
}
struct FNaniteRasterMaterialCache
{
const FMaterial* VertexMaterial = nullptr;
const FMaterial* PixelMaterial = nullptr;
const FMaterial* ComputeMaterial = nullptr;
const FMaterialRenderProxy* VertexMaterialProxy = nullptr;
const FMaterialRenderProxy* PixelMaterialProxy = nullptr;
const FMaterialRenderProxy* ComputeMaterialProxy = nullptr;
TShaderRef<FHWRasterizePS> RasterPixelShader;
TShaderRef<FHWRasterizeVS> RasterVertexShader;
TShaderRef<FHWRasterizeMS> RasterMeshShader;
TShaderRef<FMicropolyRasterizeCS> ClusterComputeShader;
TShaderRef<FMicropolyRasterizeCS> PatchComputeShader;
TOptional<uint32> MaterialBitFlags;
TOptional<FDisplacementScaling> DisplacementScaling;
TOptional<FDisplacementFadeRange> DisplacementFadeRange;
bool bFinalized = false;
};
struct FNaniteRasterEntry
{
mutable TMap<FNaniteRasterMaterialCacheKey, FNaniteRasterMaterialCache> CacheMap;
FNaniteRasterPipeline RasterPipeline{};
uint32 ReferenceCount = 0;
uint16 BinIndex = 0xFFFFu;
};
struct FNaniteRasterEntryKeyFuncs : TDefaultMapHashableKeyFuncs<FNaniteRasterPipeline, FNaniteRasterEntry, false>
{
static inline bool Matches(KeyInitType A, KeyInitType B)
{
return A.GetPipelineHash() == B.GetPipelineHash() && A.RasterMaterial == B.RasterMaterial;
}
static inline uint32 GetKeyHash(KeyInitType Key)
{
return Key.GetPipelineHash();
}
};
using FNaniteRasterPipelineMap = Experimental::TRobinHoodHashMap<FNaniteRasterPipeline, FNaniteRasterEntry, FNaniteRasterEntryKeyFuncs>;
class FNaniteRasterBinIndexTranslator
{
public:
FNaniteRasterBinIndexTranslator() = default;
uint16 Translate(uint16 BinIndex) const
{
return BinIndex < RegularBinCount ? BinIndex : RevertBinIndex(BinIndex) + RegularBinCount;
}
private:
friend class FNaniteRasterPipelines;
uint32 RegularBinCount;
FNaniteRasterBinIndexTranslator(uint32 InRegularBinCount)
: RegularBinCount(InRegularBinCount)
{}
static uint16 RevertBinIndex(uint16 BinIndex)
{
return MAX_uint16 - BinIndex;
}
};
class FNaniteRasterPipelines
{
public:
typedef Experimental::FHashType FRasterHash;
typedef Experimental::FHashElementId FRasterId;
public:
FNaniteRasterPipelines();
~FNaniteRasterPipelines();
void AllocateFixedFunctionBins();
void ReleaseFixedFunctionBins();
void ReloadFixedFunctionBins();
uint16 AllocateBin(bool bPerPixelEval);
void ReleaseBin(uint16 BinIndex);
bool IsBinAllocated(uint16 BinIndex) const;
uint32 GetRegularBinCount() const;
uint32 GetBinCount() const;
FNaniteRasterBin Register(const FNaniteRasterPipeline& InRasterPipeline);
void Unregister(const FNaniteRasterBin& InRasterBin);
inline const FNaniteRasterPipelineMap& GetRasterPipelineMap() const
{
return PipelineMap;
}
inline FNaniteRasterBinIndexTranslator GetBinIndexTranslator() const
{
return FNaniteRasterBinIndexTranslator(GetRegularBinCount());
}
/**
* These "Custom Pass" methods allow for a rasterization pass that renders a subset of the objects in the mesh pass that
* registered these pipelines, and aims to exclude rasterizing unused bins for performance (e.g. Custom Depth pass).
**/
void RegisterBinForCustomPass(uint16 BinIndex);
void UnregisterBinForCustomPass(uint16 BinIndex);
bool ShouldBinRenderInCustomPass(uint16 BinIndex) const;
private:
TBitArray<> PipelineBins;
TBitArray<> PerPixelEvalPipelineBins;
TArray<uint32> CustomPassRefCounts;
TArray<uint32> PerPixelEvalCustomPassRefCounts;
FNaniteRasterPipelineMap PipelineMap;
struct FFixedFunctionBin
{
FNaniteRasterBin RasterBin;
uint8 BinMask;
};
TArray<FFixedFunctionBin, TInlineAllocator<6u>> FixedFunctionBins;
};
struct FNaniteShadingBin
{
int32 BinId = INDEX_NONE;
uint16 BinIndex = 0xFFFFu;
inline bool operator==(const FNaniteShadingBin& Other) const
{
return Other.BinId == BinId && Other.BinIndex == BinIndex;
}
inline bool operator!=(const FNaniteShadingBin& Other) const
{
return !(*this == Other);
}
inline bool IsValid() const
{
return *this != FNaniteShadingBin();
}
};
struct FNaniteBasePassData;
struct FNaniteLumenCardData;
struct FNaniteMaterialCacheData;
class FMeshDrawShaderBindings;
struct FNaniteShadingPipeline
{
TPimplPtr<FNaniteBasePassData, EPimplPtrMode::DeepCopy> BasePassData;
TPimplPtr<FNaniteLumenCardData, EPimplPtrMode::DeepCopy> LumenCardData;
TPimplPtr<FNaniteMaterialCacheData, EPimplPtrMode::DeepCopy> MaterialCacheData;
TPimplPtr<FMeshDrawShaderBindings, EPimplPtrMode::DeepCopy> ShaderBindings;
const FMaterialRenderProxy* MaterialProxy = nullptr;
const FMaterial* Material = nullptr;
FRHIComputeShader* ComputeShader = nullptr;
FRHIWorkGraphShader* WorkGraphShader = nullptr;
#if WITH_DEBUG_VIEW_MODES
uint32 InstructionCount = 0;
uint32 LWCComplexity = 0;
#endif
uint32 BoundTargetMask = 0u;
uint32 ShaderBindingsHash = 0u;
uint32 MaterialBitFlags = 0x0u;
// Shading flags
union
{
struct
{
uint16 bIsTwoSided : 1;
uint16 bIsMasked : 1;
uint16 bNoDerivativeOps : 1;
uint16 bPadding : 13;
};
uint16 ShadingFlagsHash = 0;
};
inline uint32 GetPipelineHash() const
{
// Ignoring the lower 4 bits since they are likely zero anyway.
// Higher bits are more significant in 64 bit builds.
uint64 PipelineHash = uint64(reinterpret_cast<UPTRINT>(MaterialProxy) >> 4);
// Combine shader flags hash and material hash
PipelineHash = CityHash128to64({ PipelineHash, ShadingFlagsHash });
// Combine with bound target mask
PipelineHash = CityHash128to64({ PipelineHash, BoundTargetMask });
// Combine with shader bindings hash
PipelineHash = CityHash128to64({ PipelineHash, ShaderBindingsHash });
return HashCombineFast(uint32(PipelineHash & 0xFFFFFFFF), uint32((PipelineHash >> 32) & 0xFFFFFFFF));
}
FORCENOINLINE friend uint32 GetTypeHash(const FNaniteShadingPipeline& Other)
{
return Other.GetPipelineHash();
}
};
struct FNaniteShadingEntry
{
TSharedPtr<FNaniteShadingPipeline> ShadingPipeline;
uint32 ReferenceCount = 0;
uint16 BinIndex = 0xFFFFu;
};
struct FNaniteShadingEntryKeyFuncs : TDefaultMapHashableKeyFuncs<FNaniteShadingPipeline, FNaniteShadingEntry, false>
{
static inline bool Matches(KeyInitType A, KeyInitType B)
{
return A.GetPipelineHash() == B.GetPipelineHash() && A.MaterialProxy == B.MaterialProxy;
}
static inline uint32 GetKeyHash(KeyInitType Key)
{
return Key.GetPipelineHash();
}
};
using FNaniteShadingPipelineMap = Experimental::TRobinHoodHashMap<FNaniteShadingPipeline, FNaniteShadingEntry, FNaniteShadingEntryKeyFuncs>;
class FNaniteShadingPipelines
{
public:
typedef Experimental::FHashType FShadingHash;
typedef Experimental::FHashElementId FShadingId;
public:
FNaniteShadingPipelines();
~FNaniteShadingPipelines();
uint16 AllocateBin();
void ReleaseBin(uint16 BinIndex);
bool IsBinAllocated(uint16 BinIndex) const;
uint32 GetBinCount() const;
FNaniteShadingBin Register(const FNaniteShadingPipeline& InShadingPipeline);
void Unregister(const FNaniteShadingBin& InShadingBin);
inline const FNaniteShadingPipelineMap& GetShadingPipelineMap() const
{
return PipelineMap;
}
bool bBuildCommands = true;
FPrimitiveViewRelevance CombinedRelevance;
void BuildIdList();
const TConstArrayView<const FShadingId> GetIdList() const;
void ComputeRelevance(ERHIFeatureLevel::Type InFeatureLevel);
private:
TBitArray<> PipelineBins;
FNaniteShadingPipelineMap PipelineMap;
TArray<FShadingId> ShadingIdList;
bool bBuildIdList = true;
};
struct FNaniteShadingCommand
{
TSharedPtr<FNaniteShadingPipeline> Pipeline;
FUint32Vector4 PassData;
uint16 ShadingBin = 0xFFFFu;
bool bVisible = true;
// The PSO precache state - updated at dispatch time and can be used to skip command when still precaching
EPSOPrecacheResult PSOPrecacheState = EPSOPrecacheResult::Unknown;
};
struct FNaniteShadingCommands
{
using FMetaBufferArray = TArray<FUintVector4, SceneRenderingAllocator>;
uint32 MaxShadingBin = 0u;
uint32 NumCommands = 0u;
uint32 BoundTargetMask = 0x0u;
FShaderBundleRHIRef ShaderBundle;
TArray<FNaniteShadingCommand> Commands;
TArray<int32> CommandLookup;
FMetaBufferArray MetaBufferData;
UE::Tasks::FTask SetupTask;
UE::Tasks::FTask BuildCommandsTask;
};
extern bool ShouldRenderNanite(const FScene* Scene, const FViewInfo& View, bool bCheckForAtomicSupport = true);
/** Checks whether Nanite would be rendered in this view. Used to give a visual warning about the project settings that can disable Nanite. */
extern bool WouldRenderNanite(const FScene* Scene, const FViewInfo& View, bool bCheckForAtomicSupport = true, bool bCheckForProjectSetting = true);
extern bool UseComputeDepthExport();