Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteCullingCommon.ush
2025-05-18 13:04:45 +08:00

505 lines
20 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "../Common.ush"
#if VIRTUAL_TEXTURE_TARGET
#include "../VirtualShadowMaps/VirtualShadowMapPageOverlap.ush"
#endif
#ifndef CULLING_PASS
#define CULLING_PASS CULLING_PASS_NO_OCCLUSION
#endif
// Allow enabling this code path for only the non-nanite rendering
#ifndef NANITE_CULLING_ENABLE_MIN_RADIUS_CULL
#define NANITE_CULLING_ENABLE_MIN_RADIUS_CULL 0
#endif
#include "../SceneData.ush"
#include "NaniteDataDecode.ush"
#include "NaniteHZBCull.ush"
#include "NaniteCulling.ush"
#include "NaniteSceneCommon.ush"
struct FNodeCullingBounds
{
float3 BoxCenter;
float3 BoxExtent;
float4 Sphere; // XYZ = Center, W = Radius
// These values are the approximate amount of scaling due to deformation (such as spline meshes). This is used when
// scaling error for culling. One is the smallest average dimension for the whole mesh and is used to scale error,
// which must necessarily be uniform between a parent node and its children or it's possible the two may not agree on
// visibility, resulting in missing clusters. The second is the largest deformation dimension for the node, and is
// used to scale the edge length when determining if the cluster should be rendered in HW or SW, as we can use a more
// localized value to get more accurate results.
float MeshMinDeformScale;
float NodeMaxDeformScale;
};
FNodeCullingBounds InitNodeCullingBounds(FHierarchyNodeSlice HierarchyNodeSlice)
{
FNodeCullingBounds Bounds;
Bounds.BoxCenter = HierarchyNodeSlice.BoxBoundsCenter;
Bounds.BoxExtent = HierarchyNodeSlice.BoxBoundsExtent;
Bounds.Sphere = HierarchyNodeSlice.LODBounds;
Bounds.MeshMinDeformScale = Bounds.NodeMaxDeformScale = 1.0f;
return Bounds;
}
FNodeCullingBounds InitNodeCullingBounds(FInstanceSceneData InstanceData, FVisibleCluster VisibleCluster, FCluster Cluster)
{
FNodeCullingBounds Bounds;
Bounds.BoxCenter = Cluster.BoxBoundsCenter;
Bounds.BoxExtent = Cluster.BoxBoundsExtent;
Bounds.Sphere = Cluster.LODBounds;
Bounds.MeshMinDeformScale = Bounds.NodeMaxDeformScale = 1.0f;
// Cluster culling bounds of assembly parts must be transformed
if (IsAssemblyPartCluster(VisibleCluster))
{
const float4x4 AssemblyTransform = LoadNaniteAssemblyTransform(InstanceData, VisibleCluster);
const float3x3 AssemblyScaleRot = (float3x3)AssemblyTransform;
const float3 ScaleSq = {
length2(AssemblyScaleRot[0]),
length2(AssemblyScaleRot[1]),
length2(AssemblyScaleRot[2])
};
const float MaxScale = sqrt(max3(ScaleSq.x, ScaleSq.y, ScaleSq.z));
Bounds.BoxCenter = mul(float4(Bounds.BoxCenter, 1.0f), AssemblyTransform).xyz;
Bounds.BoxExtent = mul(Bounds.BoxExtent, abs(AssemblyScaleRot));
Bounds.Sphere.xyz = mul(float4(Bounds.Sphere.xyz, 1.0f), AssemblyTransform).xyz;
Bounds.Sphere.w *= MaxScale;
}
return Bounds;
}
float GetGlobalClipPlaneDistance(FNaniteView NaniteView, float3 TranslatedWorldPosition)
{
return dot(NaniteView.TranslatedGlobalClipPlane, float4(TranslatedWorldPosition, 1.0f));
}
struct FBoxCull
{
FNaniteView NaniteView;
float3 LocalBoxCenter;
float3 LocalBoxExtent;
float4 NonUniformScale;
float4x4 LocalToTranslatedWorld;
float4x4 PrevLocalToTranslatedWorld;
bool bIsOrtho;
bool bNearClip;
bool bViewHZB;
bool bDistanceCull;
bool bIsViewUncached;
bool bSkipCullFrustum;
bool bSkipCullHZB;
bool bSkipCullGlobalClipPlane;
bool bSkipDrawDistance;
bool bSkipWPODisableDistance;
bool bSkipPixelProgrammableDistance;
bool bSkipDisplacementFadeOutDistance;
bool bIsVisible;
bool bWasOccluded;
bool bNeedsClipping;
bool bEnableWPO;
bool bFallbackRaster;
bool bMinScreenRadiusCull;
#if VIRTUAL_TEXTURE_TARGET
uint PageFlagMask;
bool bDetailGeometry;
uint4 RectPages;
// controls which HZB layer is used and whether the DynamicDepthCullRange is used.
// static HZB is always safe to use but cached static geometry should not use the dynamic HZB or we get incorrect cached pages
bool bIsStaticGeometry;
// Maps to NANITE_VIEW_FLAG_USE_RECEIVER_MASK, in the future may be driven differently (always on for dynamic paths).
bool bUseReceiverMask;
// Extra range used to cull dynamic geometry tighter than static so as to allow using a large depth range to "scrolling" cached pages while tight culling of dynamic geo
float2 DynamicDepthCullRange;
#endif
void Init(
const FNaniteView InNaniteView,
const float3 InLocalBoxCenter,
const float3 InLocalBoxExtent,
const float4 InNonUniformScale,
const float4x4 InLocalToTranslatedWorld,
const float4x4 InPrevLocalToTranslatedWorld )
{
NaniteView = InNaniteView;
LocalBoxCenter = InLocalBoxCenter;
LocalBoxExtent = InLocalBoxExtent;
NonUniformScale = InNonUniformScale;
LocalToTranslatedWorld = InLocalToTranslatedWorld;
PrevLocalToTranslatedWorld = InPrevLocalToTranslatedWorld;
bIsOrtho = IsOrthoProjection( NaniteView.ViewToClip );
bNearClip = (NaniteView.Flags & NANITE_VIEW_FLAG_NEAR_CLIP) != 0u;
bViewHZB = (NaniteView.Flags & NANITE_VIEW_FLAG_HZBTEST) != 0u;
bDistanceCull = (NaniteView.Flags & NANITE_VIEW_FLAG_DISTANCE_CULL) != 0u;
bIsViewUncached = (NaniteView.Flags & NANITE_VIEW_FLAG_UNCACHED) != 0u;
bMinScreenRadiusCull = NANITE_CULLING_ENABLE_MIN_RADIUS_CULL != 0 && (NaniteView.Flags & NANITE_VIEW_MIN_SCREEN_RADIUS_CULL) != 0u;
#if DEBUG_FLAGS
bSkipCullFrustum = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_CULL_FRUSTUM) != 0u;
bSkipCullHZB = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_CULL_HZB) != 0u;
bSkipCullGlobalClipPlane = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_CULL_GLOBAL_CLIP_PLANE) != 0u;
bSkipDrawDistance = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_CULL_DRAW_DISTANCE) != 0u;
bSkipWPODisableDistance = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_WPO_DISABLE_DISTANCE) != 0u;
bSkipPixelProgrammableDistance = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_PIXEL_PROGRAMMABLE_DISTANCE) != 0u;
bSkipDisplacementFadeOutDistance = (DebugFlags & NANITE_DEBUG_FLAG_DISABLE_DISPLACEMENT_FADE_OUT_DISTANCE) != 0u;
#else
bSkipCullFrustum = false;
bSkipCullHZB = false;
bSkipCullGlobalClipPlane = false;
bSkipDrawDistance = false;
bSkipWPODisableDistance = false;
bSkipPixelProgrammableDistance = false;
bSkipDisplacementFadeOutDistance = false;
#endif
#if MATERIAL_CACHE
bSkipCullFrustum = true;
bSkipCullHZB = true;
bSkipCullGlobalClipPlane = true;
bSkipDrawDistance = true;
#endif // MATERIAL_CACHE
bIsVisible = true;
bWasOccluded = false;
bNeedsClipping = false;
bEnableWPO = false;
bFallbackRaster = false;
#if VIRTUAL_TEXTURE_TARGET
PageFlagMask = ~0u;
// Rect of overlapped virtual pages, is inclusive (as in zw is max, not max + 1)
RectPages = uint4(1U, 1U, 0U, 0U);
// Whether we consider this "detail geometry", i.e. not rendered into coarse pages
bDetailGeometry = false;
// Always safe to use the static occlusion HZB (when result may be cached) but better to use the dynamic (combined) as it has more info if not cached.
bIsStaticGeometry = true;
bUseReceiverMask = (NaniteView.Flags & NANITE_VIEW_FLAG_USE_RECEIVER_MASK) != 0u;
DynamicDepthCullRange = NaniteView.DynamicDepthCullRange;
#endif
}
void GlobalClipPlane()
{
#if USE_GLOBAL_CLIP_PLANE
BRANCH
if( bIsVisible )
{
// Prevent the result being "intersecting" when the global plane is invalid (effectively disabled). This prevents clusters that
// should rasterize in SW from being sent down the HW path
if (bSkipCullGlobalClipPlane || all(NaniteView.TranslatedGlobalClipPlane.xyz == (float3)0.0f))
{
return;
}
// Get the global clipping plane in local space (multiply by inverse transpose)
const float4 PlaneLocal = mul(LocalToTranslatedWorld, NaniteView.TranslatedGlobalClipPlane);
// AABB/Plane intersection test
const float3 ScaledExtents = LocalBoxExtent * NonUniformScale.xyz;
const float ExtentAlongPlaneN = dot(abs(ScaledExtents * PlaneLocal.xyz), (float3)1.0f);
const float CenterDist = dot(PlaneLocal, float4(LocalBoxCenter, 1.0f));
if (CenterDist < -ExtentAlongPlaneN)
{
bIsVisible = false;
}
else if (CenterDist < ExtentAlongPlaneN)
{
bNeedsClipping = true;
}
}
#endif
}
/*
* Helper to collect shared data between the different flavors of distance testing.
* Not moving this data to the parent class as it changes life times, potentially to the detriment of register allocation.
*/
struct FDistanceCullSetup
{
float3 CenterTranslatedWorld;
float Radius;
};
FDistanceCullSetup ViewDistanceInternal(bool bTestViewDistance)
{
FDistanceCullSetup DistanceCullSetup;
DistanceCullSetup.CenterTranslatedWorld = mul( float4( LocalBoxCenter, 1.0f ), LocalToTranslatedWorld ).xyz;
DistanceCullSetup.Radius = length( LocalBoxExtent * NonUniformScale.xyz );
if( bTestViewDistance )
{
bIsVisible = length2( DistanceCullSetup.CenterTranslatedWorld ) <= Square( NaniteView.RangeBasedCullingDistance + DistanceCullSetup.Radius );
}
return DistanceCullSetup;
}
/**
* Only NaniteView.RangeBasedCullingDistance test, useful where there is no primitive available.
*/
void Distance()
{
BRANCH
if( bIsVisible && bDistanceCull )
{
ViewDistanceInternal(true);
}
}
/**
* Both NaniteView.RangeBasedCullingDistance and draw distance test based on center point
* as well as ProgrammableRasterDistance
*/
void Distance( FPrimitiveSceneData PrimitiveData )
{
BRANCH
if( bIsVisible )
{
FDistanceCullSetup DistanceCullSetup = ViewDistanceInternal(bDistanceCull);
const float InstanceDrawDistSq = length2( DistanceCullSetup.CenterTranslatedWorld - NaniteView.CullingViewOriginTranslatedWorld);
// Check to cull the instance based on its distance from the main view's location (per-primitive setting)
// Note: tests just the center-point.
const bool bDrawDistance = (PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_INSTANCE_DRAW_DISTANCE_CULL) != 0u;
if ( bIsVisible && bDrawDistance && !bSkipDrawDistance )
{
const float MinDistanceSq = PrimitiveData.InstanceDrawDistanceMinMaxSquared.x;
const float MaxDistanceSq = PrimitiveData.InstanceDrawDistanceMinMaxSquared.y;
bIsVisible = InstanceDrawDistSq >= MinDistanceSq && InstanceDrawDistSq <= MaxDistanceSq;
}
ProgrammableRasterDistanceInternal( PrimitiveData, DistanceCullSetup.CenterTranslatedWorld, DistanceCullSetup.Radius, InstanceDrawDistSq );
}
}
/**
* Both NaniteView.RangeBasedCullingDistance and conservative draw distance test including the radius (as opposed to using the center point).
* Does not perform the ProgrammableRasterDistance.
*/
void DistanceConservative( bool bDrawDistance, float MinDistanceSq, float MaxDistanceSq)
{
BRANCH
if( bIsVisible )
{
FDistanceCullSetup DistanceCullSetup = ViewDistanceInternal(bDistanceCull);
if ( bIsVisible && bDrawDistance && !bSkipDrawDistance )
{
const float InstanceDrawDist = length( DistanceCullSetup.CenterTranslatedWorld - NaniteView.CullingViewOriginTranslatedWorld);
bIsVisible = Square(InstanceDrawDist + DistanceCullSetup.Radius) >= MinDistanceSq && Square(InstanceDrawDist - DistanceCullSetup.Radius) <= MaxDistanceSq;
}
}
}
void ProgrammableRasterDistance( FPrimitiveSceneData PrimitiveData )
{
// Test programmable raster distance without instance draw distance
const float3 CenterTranslatedWorld = mul( float4( LocalBoxCenter, 1.0f ), LocalToTranslatedWorld ).xyz;
const float Radius = length( LocalBoxExtent * NonUniformScale.xyz );
const float InstanceDrawDistSq = length2( CenterTranslatedWorld - NaniteView.CullingViewOriginTranslatedWorld);
ProgrammableRasterDistanceInternal( PrimitiveData, CenterTranslatedWorld, Radius, InstanceDrawDistSq );
}
void ProgrammableRasterDistanceInternal(
FPrimitiveSceneData PrimitiveData,
float3 CenterTranslatedWorld,
float Radius,
float InstanceDrawDistSq )
{
const bool bEvaluateWPO = (PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_EVALUATE_WORLD_POSITION_OFFSET) != 0u;
const bool bWPODisableDist = (PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_WPO_DISABLE_DISTANCE) != 0u;
const float PixelProgrammableDistanceSq = PrimitiveData.PixelProgrammableDistanceSquared;
const float DisplacementFadeOutSize = PrimitiveData.MaterialDisplacementFadeOutSize;
const bool bUseFallbackForWPO = PixelProgrammableDistanceSq <= 0.0f && DisplacementFadeOutSize <= 0.0f;
// Ensure WPO can be disabled at the primitive level
bEnableWPO = bEvaluateWPO && bIsVisible;
bFallbackRaster = false;
BRANCH
if ( bIsVisible )
{
// Check to disable the instance's WPO based on distance from the main view's location
if ( bEnableWPO && bWPODisableDist && !bSkipWPODisableDistance )
{
bEnableWPO = InstanceDrawDistSq < PrimitiveData.InstanceWPODisableDistanceSquared;
bFallbackRaster = bUseFallbackForWPO && !bEnableWPO;
}
// Determine if we should use fallback raster bins due to disabling pixel programmable raster
if ( PixelProgrammableDistanceSq > 0.0f && !bSkipPixelProgrammableDistance )
{
bFallbackRaster = InstanceDrawDistSq > PixelProgrammableDistanceSq;
}
// Also check to use fallback raster bins as a method of disabling tessellation when the object
// is at a safe enough distance for all displacement to have been completely faded out
if ( !bFallbackRaster && DisplacementFadeOutSize > 0.0f && !bSkipDisplacementFadeOutDistance )
{
const float SurfaceDepth = max(dot( NaniteView.ViewForward, CenterTranslatedWorld ) - Radius, 0.0f);
const float MaxDisplacementProj = GetProjectedEdgeLengthAtDepth(
NonUniformScale.w * GetAbsMaxMaterialDisplacement(PrimitiveData),
SurfaceDepth,
NaniteView );
bFallbackRaster = MaxDisplacementProj <= DisplacementFadeOutSize;
}
}
}
FFrustumCullData Frustum()
{
// Frustum test against current frame
FFrustumCullData FrustumCull = BoxCullFrustum( LocalBoxCenter, LocalBoxExtent, LocalToTranslatedWorld, NaniteView.TranslatedWorldToClip, NaniteView.ViewToClip, bIsOrtho, bNearClip, bSkipCullFrustum );
bIsVisible = bIsVisible && FrustumCull.bIsVisible;
bNeedsClipping = bNeedsClipping || FrustumCull.bCrossesNearPlane || FrustumCull.bCrossesFarPlane;
#if VIRTUAL_TEXTURE_TARGET
if (bIsVisible && !bIsStaticGeometry)
{
bIsVisible = FrustumCull.RectMax.z > DynamicDepthCullRange.x && FrustumCull.RectMin.z < DynamicDepthCullRange.y;
}
#endif
return FrustumCull;
}
bool ScreenSize(float MinScreenSize, float MaxScreenSize)
{
BRANCH
if (bIsVisible && (MinScreenSize != MaxScreenSize || bMinScreenRadiusCull))
{
// Needs to match C++ logic in ComputeLODForMeshes() and ComputeBoundsScreenRadiusSquared() so that culling matches the submitted range of Lods.
// Differences to that code which shouldn't affect the result are:
// * ScreenMultiple doesn't include the factor of 0.5f, and so it doesn't need applying to the ScreenSize.
// * ScreenMultiple has the inverse LODDistanceScale baked in, and so it doesn't need applying to the ScreenSize.
float3 CenterTranslatedWorld = mul(float4(LocalBoxCenter, 1.0f), LocalToTranslatedWorld).xyz;
float InstanceDrawDistSq = length2(CenterTranslatedWorld - NaniteView.CullingViewOriginTranslatedWorld);
const float RadiusSq = length2( LocalBoxExtent * NonUniformScale.xyz );
#if NANITE_CULLING_ENABLE_MIN_RADIUS_CULL
if (bMinScreenRadiusCull)
{
// implements: ScreenRadius < MinRadius, where ScreenRadius = Radius / (LodDistanceFactor * Distance)
if (RadiusSq < NaniteView.CullingViewMinRadiusTestFactorSq * InstanceDrawDistSq)
{
bIsVisible = false;
return bIsVisible;
}
}
// Only perform this test if enabled
if (MinScreenSize != MaxScreenSize)
#endif
{
float ScreenSizeSq = NaniteView.CullingViewScreenMultipleSq * RadiusSq / max(InstanceDrawDistSq, 1.0f);
float MinScreenSizeSq = MinScreenSize * MinScreenSize;
float MaxScreenSizeSq = MaxScreenSize * MaxScreenSize;
bIsVisible = ScreenSizeSq >= MinScreenSizeSq && (MaxScreenSize == 0 || ScreenSizeSq < MaxScreenSizeSq);
}
}
return bIsVisible;
}
void HZB( FFrustumCullData FrustumCull, bool bClampToPageLevel )
{
// Our screen rect needs to be in HZB space, which is 0,0-based for all views, unless we're doing multiple views in one nanite pass
#if NANITE_MULTI_VIEW
FScreenRect Rect = GetScreenRect( NaniteView.ViewRect, FrustumCull, 4 );
#else
int4 HZBRect = int4( 0, 0, NaniteView.ViewRect.z - NaniteView.ViewRect.x, NaniteView.ViewRect.w - NaniteView.ViewRect.y );
FScreenRect Rect = GetScreenRect( HZBRect, FrustumCull, 4 );
#endif
bIsVisible = bIsVisible && Rect.bOverlapsPixelCenter;
#if VIRTUAL_TEXTURE_TARGET
BRANCH
if( bIsVisible )
{
FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(NaniteView.TargetLayerIndex);
// Clamp the screen rect to the mapped page area.
Rect = VirtualShadowMapGetUncachedScreenRect( Rect, VirtualShadowMapHandle, NaniteView.TargetMipLevel );
bIsVisible = OverlapsAnyValidPage( VirtualShadowMapHandle, NaniteView.TargetMipLevel, Rect, PageFlagMask, bDetailGeometry, bUseReceiverMask );
// purvey the rect unto any further uses (cluster culling does)
RectPages = VirtualShadowMapGetPageRect( Rect );
}
#endif
// VSM supports one-pass occlusion culling hijacking CULLING_PASS_NO_OCCLUSION (using only previous-frame with artifacts as a result), hence the || here
#if (CULLING_PASS == CULLING_PASS_NO_OCCLUSION && VIRTUAL_TEXTURE_TARGET) || CULLING_PASS == CULLING_PASS_OCCLUSION_MAIN
const bool bTreatUnmappedAsOccluded = (CULLING_PASS == CULLING_PASS_OCCLUSION_MAIN);
// HZB occlusion test against previous frame
BRANCH
if( bIsVisible && !bSkipCullHZB && bViewHZB )
{
// Still HZB test if previously outside frustum. Clamped rect HZB provides a better guess for occlusion than assuming true or false.
// Post pass will clean up bad guesses anyways. This improves overdraw at edges of screen for moving cameras.
const bool bSkipPrevCullFrustum = true;
const bool bPrevIsOrtho = IsOrthoProjection( NaniteView.PrevViewToClip );
FFrustumCullData PrevFrustumCull = BoxCullFrustum( LocalBoxCenter, LocalBoxExtent, PrevLocalToTranslatedWorld, NaniteView.PrevTranslatedWorldToClip, NaniteView.PrevViewToClip, bPrevIsOrtho, bNearClip, bSkipPrevCullFrustum );
BRANCH
if (PrevFrustumCull.bIsVisible && !PrevFrustumCull.bCrossesNearPlane)
{
#if VIRTUAL_TEXTURE_TARGET
FScreenRect PrevRect = GetScreenRect(NaniteView.ViewRect, PrevFrustumCull, 4); // Assume HZBTestViewRect=ViewRect for VSM. Don't load it redundantly.
bWasOccluded = !IsVisibleMaskedHZB(NaniteView.TargetPrevLayerIndex, NaniteView.TargetMipLevel, PrevRect, bClampToPageLevel, bTreatUnmappedAsOccluded, 0U, 0xFFFFFFFF, bIsStaticGeometry);
#else
FScreenRect PrevRect = GetScreenRect(NaniteView.HZBTestViewRect, PrevFrustumCull, 4);
bWasOccluded = !IsVisibleHZB(PrevRect, true);
#endif
}
}
#elif CULLING_PASS == CULLING_PASS_OCCLUSION_POST
// Retest what was occluded last frame against current frame HZB
BRANCH
if( bIsVisible && !bSkipCullHZB )
{
BRANCH
if (!FrustumCull.bCrossesNearPlane)
{
#if VIRTUAL_TEXTURE_TARGET
// The logic here is that if the thing has moved it should test against all hzb pages, whereas if it is static we can skip the cached ones (since we'd not render into those anyway).
bWasOccluded = !IsVisibleMaskedHZB(NaniteView.TargetLayerIndex, NaniteView.TargetMipLevel, Rect, bClampToPageLevel, true, 0U, PageFlagMask, bIsStaticGeometry);
#else
bWasOccluded = !IsVisibleHZB(Rect, true);
#endif
}
}
#endif
}
void FrustumHZB( bool bClampToPageLevel )
{
#if !MATERIAL_CACHE
BRANCH
if( bIsVisible )
{
FFrustumCullData FrustumCull = Frustum();
HZB( FrustumCull, bClampToPageLevel );
}
#endif // MATERIAL_CACHE
}
};