Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteHZBCull.ush
2025-05-18 13:04:45 +08:00

704 lines
26 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#define USE_HZB_HALF_TYPE 1
#include "../Common.ush"
#include "../HZB.ush"
#ifndef PLATFORM_SPECIFIC_ISOLATE
#define PLATFORM_SPECIFIC_ISOLATE ISOLATE
#endif
Texture2DArray< half > HZBTextureArray;
struct FFrustumCullData
{
float3 RectMin;
float3 RectMax;
bool bCrossesFarPlane;
bool bCrossesNearPlane;
bool bFrustumSideCulled;
bool bIsVisible;
};
struct FScreenRect
{
int4 Pixels;
bool bOverlapsPixelCenter;
// For HZB sampling
int4 HZBTexels;
int HZBLevel;
float Depth;
};
// Rect is inclusive [Min.xy, Max.xy]
int MipLevelForRect( int4 RectPixels, int DesiredFootprintPixels )
{
const int MaxPixelOffset = DesiredFootprintPixels - 1;
const int MipOffset = (int)log2((float)DesiredFootprintPixels) - 1;
// Calculate lowest mip level that allows us to cover footprint of the desired size in pixels.
// Start by calculating separate x and y mip level requirements.
// 2 pixels of mip k cover 2^(k+1) pixels of mip 0. To cover at least n pixels of mip 0 by two pixels of mip k we need k to be at least k = ceil( log2( n ) ) - 1.
// For integer n>1: ceil( log2( n ) ) = floor( log2( n - 1 ) ) + 1.
// So k = floor( log2( n - 1 )
// For integer n>1: floor( log2( n ) ) = firstbithigh( n )
// So k = firstbithigh( n - 1 )
// As RectPixels min/max are both inclusive their difference is one less than number of pixels (n - 1), so applying firstbithigh to this difference gives the minimum required mip.
// NOTE: firstbithigh is a FULL rate instruction on GCN while log2 is QUARTER rate instruction.
int2 MipLevelXY = firstbithigh(RectPixels.zw - RectPixels.xy);
// Mip level needs to be big enough to cover both x and y requirements. Go one extra level down for 4x4 sampling.
// firstbithigh(0) = -1, so clamping with 0 here also handles the n=1 case where mip 0 footprint is just 1 pixel wide/tall.
int MipLevel = max(max(MipLevelXY.x, MipLevelXY.y) - MipOffset, 0);
// MipLevel now contains the minimum MipLevel that can cover a number of pixels equal to the size of the rectangle footprint, but the HZB footprint alignments are quantized to powers of two.
// The quantization can translate down the start of the represented range by up to 2^k-1 pixels, which can decrease the number of usable pixels down to 2^(k+1) - 2^k-1.
// Depending on the alignment of the rectangle this might require us to pick one level higher to cover all rectangle footprint pixels.
// Note that testing one level higher is always enough as this guarantees 2^(k+2) - 2^k usable pixels after alignment, which is more than the 2^(k+1) required pixels.
// Transform coordinates down to coordinates of selected mip level and if they are not within reach increase level by one.
MipLevel += any((RectPixels.zw >> MipLevel) - (RectPixels.xy >> MipLevel) > MaxPixelOffset) ? 1 : 0;
return MipLevel;
}
FScreenRect GetScreenRect( int4 ViewRect, float3 CullRectMin, float3 CullRectMax, int DesiredFootprintPixels )
{
FScreenRect Rect;
Rect.Depth = CullRectMax.z;
// Map from NDC [-1,1] to target 'texture UV' [0,1] space, X[-1,1] -> [0,1], Y[-1,1] -> [1, 0]
// CF DX11.3 Functional Spec 3.3.1 Pixel Coordinate System
float4 RectUV = saturate( float4( CullRectMin.xy, CullRectMax.xy ) * float2(0.5, -0.5).xyxy + 0.5 ).xwzy;
// Calculate pixel footprint of rectangle in full resolution.
// To make the bounds as tight as possible we only consider a pixel part of the footprint when its pixel center is covered by the rectangle.
// Only when the pixel center is covered can that pixel be rasterized by anything inside the rectangle.
// Using pixel centers instead of conservative floor/ceil bounds of pixel seems to typically result in ~5% fewer clusters being drawn.
// NOTE: This assumes anything inside RectMin/RectMax gets rasterized with one centered sample. This will have to be adjusted for conservative rasterization, MSAA or similar features.
float2 ViewSize = ViewRect.zw - ViewRect.xy;
Rect.Pixels = int4( RectUV * ViewSize.xyxy + ViewRect.xyxy + float4(0.5f, 0.5f, -0.5f, -0.5f) );
Rect.Pixels.xy = max(Rect.Pixels.xy, ViewRect.xy);
Rect.Pixels.zw = min(Rect.Pixels.zw, ViewRect.zw - 1);
// Otherwise rectangle has zero area or falls between pixel centers resulting in no rasterized pixels.
Rect.bOverlapsPixelCenter = all( Rect.Pixels.zw >= Rect.Pixels.xy );
// Make sure rect is valid even if !bOverlapsPixelCenter
// Should this be inclusive rounding instead?
Rect.HZBTexels = int4( Rect.Pixels.xy, max( Rect.Pixels.xy, Rect.Pixels.zw ) );
// First level of HZB is hard-coded to start at half resolution.
// (x,y) in HZB mip 0 covers (2x+0, 2y+0), (2x+1, 2y+0), (2x+0, 2y+1), (2x+1, 2y+1) in full resolution target.
Rect.HZBTexels = Rect.HZBTexels >> 1;
Rect.HZBLevel = MipLevelForRect( Rect.HZBTexels, DesiredFootprintPixels );
// Transform HZB Mip 0 coordinates to coordinates of selected Mip level.
Rect.HZBTexels >>= Rect.HZBLevel;
return Rect;
}
FScreenRect GetScreenRect( int4 ViewRect, FFrustumCullData Cull, int DesiredFootprintPixels )
{
return GetScreenRect(ViewRect, Cull.RectMin, Cull.RectMax, DesiredFootprintPixels);
}
#if !COMPILER_SUPPORTS_GATHER_LOD_RED
float4 LoadHZBRow4(float4 XCoords, float YCoord, float MipLevel)
{
return float4(
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.x, YCoord), MipLevel).r,
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.y, YCoord), MipLevel).r,
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.z, YCoord), MipLevel).r,
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.w, YCoord), MipLevel).r);
}
float4 LoadHZBRow4(float4 XCoords, float YCoord, float MipLevel, float ArrayIndex)
{
return float4(
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.x, YCoord, ArrayIndex), MipLevel).r,
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.y, YCoord, ArrayIndex), MipLevel).r,
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.z, YCoord, ArrayIndex), MipLevel).r,
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.w, YCoord, ArrayIndex), MipLevel).r);
}
#endif
float GetMinDepthFromHZB(FScreenRect Rect, bool bSample4x4)
{
// Calculate HZB Texel size.
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
float MipLevel = (float)Rect.HZBLevel;
float MinDepth;
if( bSample4x4 )
{
#if COMPILER_SUPPORTS_GATHER_LOD_RED
float4 GatherCoords;
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * PixelSize.xy
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
float4 Depth00 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xy, MipLevel);
float4 Depth01 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zy, MipLevel);
float4 Depth10 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xw, MipLevel);
float4 Depth11 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zw, MipLevel);
float4 Depth = min(min3(Depth00, Depth01, Depth10), Depth11);
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
#else
float4 XCoords = (min(Rect.HZBTexels.x + int4(0, 1, 2, 3), Rect.HZBTexels.z) + 0.5f) * TexelSize.x;
float4 YCoords = (min(Rect.HZBTexels.y + int4(0, 1, 2, 3), Rect.HZBTexels.w) + 0.5f) * TexelSize.y;
float4 Depth0 = LoadHZBRow4( XCoords, YCoords.x, MipLevel);
float4 Depth1 = LoadHZBRow4( XCoords, YCoords.y, MipLevel);
float4 Depth2 = LoadHZBRow4( XCoords, YCoords.z, MipLevel);
float4 Depth3 = LoadHZBRow4( XCoords, YCoords.w, MipLevel);
float4 Depth = min(min3(Depth0, Depth1, Depth2), Depth3);
#endif
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
}
else
{
float4 Depth;
#if COMPILER_SUPPORTS_GATHER_LOD_RED
float2 Coords = Rect.HZBTexels.xy * TexelSize + TexelSize; // (RectPixels + 1.0f) * TexelSize
Depth = HZBTexture.GatherLODRed(HZBSampler, Coords.xy, MipLevel);
#else
float4 Coords = (Rect.HZBTexels + 0.5f) * TexelSize.xyxy;
Depth.x = HZBTexture.SampleLevel(HZBSampler, Coords.xw, MipLevel).r; // (-,+)
Depth.y = HZBTexture.SampleLevel(HZBSampler, Coords.zw, MipLevel).r; // (+,+)
Depth.z = HZBTexture.SampleLevel(HZBSampler, Coords.zy, MipLevel).r; // (+,-)
Depth.w = HZBTexture.SampleLevel(HZBSampler, Coords.xy, MipLevel).r; // (-,-)
#endif
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
}
return MinDepth;
}
bool IsVisibleHZB( FScreenRect Rect, bool bSample4x4 )
{
const float MinDepth = GetMinDepthFromHZB(Rect, bSample4x4);
// Inverted Z buffer
return Rect.Depth >= MinDepth;
}
float GetMinDepthFromHZBArray(FScreenRect Rect, bool bSample4x4, float ArrayIndex)
{
// Calculate HZB Texel size.
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
float MipLevel = (float)Rect.HZBLevel;
float MinDepth;
if( bSample4x4 )
{
#if COMPILER_SUPPORTS_GATHER_LOD_RED
float4 GatherCoords;
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * PixelSize.xy
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
float4 Depth00 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.xy, ArrayIndex), MipLevel);
float4 Depth01 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.zy, ArrayIndex), MipLevel);
float4 Depth10 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.xw, ArrayIndex), MipLevel);
float4 Depth11 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.zw, ArrayIndex), MipLevel);
float4 Depth = min(min3(Depth00, Depth01, Depth10), Depth11);
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
#else
float4 XCoords = (min(Rect.HZBTexels.x + int4(0, 1, 2, 3), Rect.HZBTexels.z) + 0.5f) * TexelSize.x;
float4 YCoords = (min(Rect.HZBTexels.y + int4(0, 1, 2, 3), Rect.HZBTexels.w) + 0.5f) * TexelSize.y;
float4 Depth0 = LoadHZBRow4( XCoords, YCoords.x, MipLevel, ArrayIndex);
float4 Depth1 = LoadHZBRow4( XCoords, YCoords.y, MipLevel, ArrayIndex);
float4 Depth2 = LoadHZBRow4( XCoords, YCoords.z, MipLevel, ArrayIndex);
float4 Depth3 = LoadHZBRow4( XCoords, YCoords.w, MipLevel, ArrayIndex);
float4 Depth = min(min3(Depth0, Depth1, Depth2), Depth3);
#endif
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
}
else
{
float4 Depth;
#if COMPILER_SUPPORTS_GATHER_LOD_RED
float2 Coords = Rect.HZBTexels.xy * TexelSize + TexelSize; // (RectPixels + 1.0f) * TexelSize
Depth = HZBTextureArray.GatherLODRed(HZBSampler, float3(Coords.xy, ArrayIndex), MipLevel);
#else
float4 Coords = (Rect.HZBTexels + 0.5f) * TexelSize.xyxy;
Depth.x = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.xw, ArrayIndex), MipLevel).r; // (-,+)
Depth.y = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.zw, ArrayIndex), MipLevel).r; // (+,+)
Depth.z = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.zy, ArrayIndex), MipLevel).r; // (+,-)
Depth.w = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.xy, ArrayIndex), MipLevel).r; // (-,-)
#endif
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
}
return MinDepth;
}
bool IsVisibleHZBArray( FScreenRect Rect, bool bSample4x4, int ArrayIndex)
{
const float MinDepth = GetMinDepthFromHZBArray(Rect, bSample4x4, float(ArrayIndex));
// Inverted Z buffer
return Rect.Depth >= MinDepth;
}
bool IsVisibleHZB( FScreenRect Rect, float3 PlaneHZB )
{
// Calculate HZB Texel size.
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
float MipLevel = (float)Rect.HZBLevel;
#if COMPILER_SUPPORTS_GATHER_LOD_RED
//float4 GatherCoords = int4(RectPixels.xy, max(RectPixels.zw - 1, RectPixels.xy)) * PixelSize.xyxy + PixelSize.xyxy;
float4 GatherCoords;
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * TexelSize.xy
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
float4 HZBDepth00 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xy, MipLevel);
float4 HZBDepth01 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zy, MipLevel);
float4 HZBDepth10 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xw, MipLevel);
float4 HZBDepth11 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zw, MipLevel);
float2 PlaneDepthDelta = 0.5 * TexelSize.xy * PlaneHZB.xy;
float4 PlaneDepthBase4;
PlaneDepthBase4.x = -PlaneDepthDelta.x + PlaneDepthDelta.y + PlaneHZB.z;
PlaneDepthBase4.y = PlaneDepthDelta.x + PlaneDepthDelta.y + PlaneHZB.z;
PlaneDepthBase4.z = PlaneDepthDelta.x - PlaneDepthDelta.y + PlaneHZB.z;
PlaneDepthBase4.w = -PlaneDepthDelta.x - PlaneDepthDelta.y + PlaneHZB.z;
float4 PlaneDepth00 = dot( PlaneHZB.xy, GatherCoords.xy ) + PlaneDepthBase4;
float4 PlaneDepth01 = dot( PlaneHZB.xy, GatherCoords.zy ) + PlaneDepthBase4;
float4 PlaneDepth10 = dot( PlaneHZB.xy, GatherCoords.xw ) + PlaneDepthBase4;
float4 PlaneDepth11 = dot( PlaneHZB.xy, GatherCoords.zw ) + PlaneDepthBase4;
bool4 bIsVisible00 = min( PlaneDepth00, Rect.Depth ) >= HZBDepth00;
bool4 bIsVisible01 = min( PlaneDepth01, Rect.Depth ) >= HZBDepth01;
bool4 bIsVisible10 = min( PlaneDepth10, Rect.Depth ) >= HZBDepth10;
bool4 bIsVisible11 = min( PlaneDepth11, Rect.Depth ) >= HZBDepth11;
bool4 bIsVisible = or(or(bIsVisible00, bIsVisible01), or(bIsVisible10, bIsVisible11));
bIsVisible.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? false : bIsVisible.yz; // Mask off right pixels
bIsVisible.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? false : bIsVisible.xy; // Mask off bottom pixels
return bIsVisible.x || bIsVisible.y || bIsVisible.z || bIsVisible.w;
#else
bool bIsVisible = false;
for( int y = Rect.HZBTexels.y; y <= Rect.HZBTexels.w; y++ )
{
for( int x = Rect.HZBTexels.x; x <= Rect.HZBTexels.z; x++ )
{
float2 UV = ( float2(x,y) + 0.5 ) * TexelSize;
float HZBDepth = HZBTexture.SampleLevel( HZBSampler, UV, MipLevel ).r;
float PlaneDepth = dot( PlaneHZB, float3( UV, 1 ) );
bIsVisible = bIsVisible || min( PlaneDepth, Rect.Depth ) >= HZBDepth;
}
}
return bIsVisible;
#endif
}
uint GetCubeFaceIndex( float3 Dir )
{
#if 1
uint Face;
if( abs(Dir.x) >= abs(Dir.y) && abs(Dir.x) >= abs(Dir.z) )
Face = Dir.x > 0 ? 0 : 1;
else if( abs(Dir.y) > abs(Dir.z) )
Face = Dir.y > 0 ? 2 : 3;
else
Face = Dir.z > 0 ? 4 : 5;
#else
// TODO GCN
uint Face = v_cubeid_f32( Dir );
#endif
return Face;
}
// Normalized cube [-1,1]
float4 GetCubeFacePlane( float3 Dir )
{
uint Face = GetCubeFaceIndex( Dir );
uint Axis = Face >> 1;
float Sign = (Face & 1) ? -1 : 1;
float3 FaceNormal;
FaceNormal.x = Axis == 0 ? Sign : 0;
FaceNormal.y = Axis == 1 ? Sign : 0;
FaceNormal.z = Axis == 2 ? Sign : 0;
return float4( FaceNormal, -1 );
}
float4 GetDominantFacePlane( float3 ViewOrigin, float3 BoundsCenter, float3 BoundsExtent )
{
float3 UnitCubeDir = ( ViewOrigin - BoundsCenter ) / BoundsExtent;
uint Face = GetCubeFaceIndex( UnitCubeDir );
uint Axis = Face >> 1;
float Sign = (Face & 1) ? -1 : 1;
float3 FaceNormal;
FaceNormal.x = Axis == 0 ? Sign : 0;
FaceNormal.y = Axis == 1 ? Sign : 0;
FaceNormal.z = Axis == 2 ? Sign : 0;
float3 ClosestCorner = BoundsCenter + select(UnitCubeDir > 0, BoundsExtent, -BoundsExtent);
return float4( FaceNormal, dot( FaceNormal, -ClosestCorner ) );
}
float3 GetFacePlaneHZB( float3 ViewOriginLocal, float4x4 ClipToLocal, float3 BoundsCenter, float3 BoundsExtent )
{
float4 PlaneLocal = GetDominantFacePlane( ViewOriginLocal, BoundsCenter, BoundsExtent );
//float4 PlaneLocal = GetCubeFacePlane( ViewOriginLocal );
// Transform to post projective space. Inverse transpose for plane projection.
// https://stackoverflow.com/questions/7685495/transforming-a-3d-plane-using-a-4x4-matrix
float4 PlaneClip = mul( ClipToLocal, PlaneLocal );
// 2D plane in screen space
// Solve for PointClip.z where:
// dot( PlaneClip, float4( PointClip, 1 ) ) = 0
float3 PlaneScreen = -PlaneClip.xyw / PlaneClip.z;
// Bake UV to ScreenPos transform into planes to pull outside of loop
// float2 ScreenPos = UV * float2( 2, -2 ) + float2( -1, 1 );
float3 PlaneUV = float3( PlaneScreen.xy * float2( 2, -2 ), dot( PlaneScreen, float3( -1, 1, 1 ) ) );
// HZB UVs to viewport UVs
float3 PlaneHZB = float3( PlaneUV.xy * 2 * HZBSize.xy / HZBViewSize.xy, PlaneUV.z );
// Sample from max depth corner (inverted z) for conservative depth over pixel.
PlaneHZB.z += dot( abs( PlaneHZB.xy ), 0.5 / HZBViewSize.xy );
return PlaneHZB;
}
FFrustumCullData BoxCullFrustumOrtho(float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, bool bNearClip, bool bSkipFrustumCull)
{
FFrustumCullData Cull;
float3 CenterClip = mul( mul( float4( Center, 1 ), LocalToWorld ), WorldToClip ).xyz;
float3 ClipDelta = abs( Extent.x * mul(LocalToWorld[0], WorldToClip).xyz ) + abs( Extent.y * mul(LocalToWorld[1], WorldToClip).xyz ) + abs( Extent.z * mul(LocalToWorld[2], WorldToClip).xyz );
Cull.RectMin = CenterClip - ClipDelta;
Cull.RectMax = CenterClip + ClipDelta;
Cull.bCrossesFarPlane = Cull.RectMin.z < 0.0f;
Cull.bCrossesNearPlane = Cull.RectMax.z > 1.0f;
Cull.bIsVisible = Cull.RectMax.z > 0.0f; // Far clip
if (bNearClip)
{
Cull.bIsVisible = Cull.bIsVisible && Cull.RectMin.z < 1.0f;
}
Cull.bFrustumSideCulled = false;
if (!bSkipFrustumCull) // for debugging, will compile out
{
const bool bFrustumCull = any(or(Cull.RectMax.xy < -1.0f, Cull.RectMin.xy > 1.0f));
Cull.bFrustumSideCulled = Cull.bIsVisible && bFrustumCull;
Cull.bIsVisible = Cull.bIsVisible && !bFrustumCull;
}
return Cull;
}
FFrustumCullData BoxCullFrustumPerspective(float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, float4x4 ViewToClip, bool bSkipFrustumCull)
{
FFrustumCullData Cull;
float4 DX = (2.0f * Extent.x) * mul(LocalToWorld[0], WorldToClip);
float4 DY = (2.0f * Extent.y) * mul(LocalToWorld[1], WorldToClip);
float MinW = +INFINITE_FLOAT;
float MaxW = -INFINITE_FLOAT;
float4 PlanesMin = 1.0f;
Cull.RectMin = float3(+1, +1, +1);
Cull.RectMax = float3(-1, -1, -1);
// To discourage the compiler from overlapping the entire calculation, which uses an excessive number of VGPRs, the evaluation is split into 4 isolated passes with two corners per pass.
// There seems to be no additional benefit from evaluating just one corner per pass and it prevents the use of fast min3/max3 intrinsics.
#define EVAL_POINTS(PC0, PC1) \
MinW = min3(MinW, PC0.w, PC1.w); \
MaxW = max3(MaxW, PC0.w, PC1.w); \
PlanesMin = min3(PlanesMin, float4(PC0.xy, -PC0.xy) - PC0.w, float4(PC1.xy, -PC1.xy) - PC1.w); \
float2 PS0 = PC0.xy / PC0.w; \
float2 PS1 = PC1.xy / PC1.w; \
Cull.RectMin.xy = min3(Cull.RectMin.xy, PS0, PS1); \
Cull.RectMax.xy = max3(Cull.RectMax.xy, PS0, PS1);
float4 PC000, PC100;
PLATFORM_SPECIFIC_ISOLATE
{
float4 DZ = (2.0f * Extent.z) * mul(LocalToWorld[2], WorldToClip);
PC000 = mul(mul(float4(Center - Extent, 1.0), LocalToWorld), WorldToClip);
PC100 = PC000 + DZ;
EVAL_POINTS(PC000, PC100);
}
float4 PC001, PC101;
PLATFORM_SPECIFIC_ISOLATE
{
PC001 = PC000 + DX;
PC101 = PC100 + DX;
EVAL_POINTS(PC001, PC101);
}
float4 PC011, PC111;
PLATFORM_SPECIFIC_ISOLATE
{
PC011 = PC001 + DY;
PC111 = PC101 + DY;
EVAL_POINTS(PC011, PC111);
}
float4 PC010, PC110;
PLATFORM_SPECIFIC_ISOLATE
{
PC010 = PC011 - DX;
PC110 = PC111 - DX;
EVAL_POINTS(PC010, PC110);
}
#undef EVAL_POINTS
float MinZ = MaxW * ViewToClip[2][2] + ViewToClip[3][2];
float MaxZ = MinW * ViewToClip[2][2] + ViewToClip[3][2];
// Near is z=1
bool bInFrontNearPlane = MinW <= MaxZ;
bool bBehindNearPlane = MaxW > MinZ;
// Far is z=0
bool bInFrontFarPlane = 0 < MaxZ;
bool bBehindFarPlane = 0 >= MinZ;
Cull.bCrossesNearPlane = bInFrontNearPlane;
Cull.bCrossesFarPlane = bBehindFarPlane;
Cull.bIsVisible = bBehindNearPlane && bInFrontFarPlane;
if (MinW <= 0.0f && MaxW > 0.0f)
{
Cull.RectMin = float3(-1, -1, -1);
Cull.RectMax = float3(+1, +1, +1);
}
else
{
Cull.RectMin.z = MinZ / MaxW;
Cull.RectMax.z = MaxZ / MinW;
}
Cull.bFrustumSideCulled = false;
if (!bSkipFrustumCull)
{
const bool bFrustumCull = any(PlanesMin > 0.0f);
Cull.bFrustumSideCulled = Cull.bIsVisible && bFrustumCull;
Cull.bIsVisible = Cull.bIsVisible && !bFrustumCull;
}
return Cull;
}
// Splitting the transform in two generates much better code on DXC when WorldToClip is scalar.
FFrustumCullData BoxCullFrustum( float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, float4x4 ViewToClip, bool bIsOrtho, bool bNearClip, bool bSkipFrustumCull )
{
// NOTE: We assume here that if near clipping is disabled the projection is orthographic, as disabling near clipping is
// a feature for directional light shadows, and disabling near clipping for a perspective projection doesn't make much sense.
// Checking both also serves to help out DCE when either is a compile-time constant.
checkSlow(bIsOrtho || bNearClip);
if (bIsOrtho || !bNearClip)
{
return BoxCullFrustumOrtho( Center, Extent, LocalToWorld, WorldToClip, bNearClip, bSkipFrustumCull );
}
else
{
return BoxCullFrustumPerspective( Center, Extent, LocalToWorld, WorldToClip, ViewToClip, bSkipFrustumCull );
}
}
FFrustumCullData BoxCullFrustum(float3 Center, float3 Extent, float4x4 LocalToClip, float4x4 ViewToClip, bool bIsOrtho, bool bNearClip, bool bSkipFrustumCull)
{
return BoxCullFrustum(Center, Extent, float4x4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1), LocalToClip, ViewToClip, bIsOrtho, bNearClip, bSkipFrustumCull);
}
float2 ProjectSphere( float x, float z, float r, float ResultScale )
{
#if 1
float t = sqrt( x*x + z*z - r*r );
float A = ( t*z + r*x );
float B = ( t*z - r*x );
ResultScale /= ( A * B ); // Divide by common denominator instead of dividing twice
float Min = ( t*x - r*z ) * B;
float Max = ( t*x + r*z ) * A;
return float2( Min, Max ) * ResultScale;
#else
float t = sqrt( x*x + z*z - r*r );
float Min = ( t*x - r*z ) / ( t*z + r*x );
float Max = ( t*x + r*z ) / ( t*z - r*x );
return float2( Min, Max ) * ResultScale;
#endif
}
// [ Mara & Morgan 2013, "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" ]
float4 SphereToScreenRect( float3 Center, float Radius, float4x4 ViewToClip )
{
float2 ExtentX = ProjectSphere( Center.x, Center.z, Radius, ViewToClip[0][0] ) + ViewToClip[2][0];
float2 ExtentY = ProjectSphere( Center.y, Center.z, Radius, ViewToClip[1][1] ) + ViewToClip[2][1];
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
}
// Assumes we have already handled the z+r < ZNear case
float2 ProjectSphereNearClip( float x, float z, float r, float ResultScale, float ZNear )
{
float clen2 = x * x + z * z;
float t2 = clen2 - r * r;
float t = sqrt( t2 );
float scaled_cos_theta = t2;
float scaled_sin_theta = r * t;
// obtain B by rotating by Theta
float2 B = float2( scaled_cos_theta * x + scaled_sin_theta * z, -scaled_sin_theta * x + scaled_cos_theta * z );
// obtain T by rotating by -Theta
float2 T = float2( scaled_cos_theta * x - scaled_sin_theta * z, scaled_sin_theta * x + scaled_cos_theta * z );
float h = ZNear - z;
float d = sqrt( r * r - h * h );
if ( t2 < 0.0 || T.y < ZNear * clen2 )
{
T.x = x - d;
T.y = ZNear;
}
if ( t2 < 0.0 || B.y < ZNear * clen2 )
{
B.x = x + d;
B.y = ZNear;
}
return float2( T.x / T.y, B.x / B.y ) * ResultScale;
}
// [ Mara & Morgan 2013, "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" ]
float4 SphereToScreenRectNearClip( float3 Center, float Radius, float4x4 ViewToClip, float ZNear )
{
float2 ExtentX = ProjectSphereNearClip( Center.x, Center.z, Radius, ViewToClip[0][0], ZNear ) + ViewToClip[2][0];
float2 ExtentY = ProjectSphereNearClip( Center.y, Center.z, Radius, ViewToClip[1][1], ZNear ) + ViewToClip[2][1];
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
}
float4 SphereToScreenRectOrtho( float3 Center, float Radius, float4x4 ViewToClip )
{
float2 ExtentX = float2( Center.x - Radius, Center.x + Radius ) * ViewToClip[0][0] + ViewToClip[2][0];
float2 ExtentY = float2( Center.y - Radius, Center.y + Radius ) * ViewToClip[1][1] + ViewToClip[2][1];
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
}
FFrustumCullData SphereCullFrustum(float3 Center, float Radius, float4x4 ViewToClip, float ZNear, bool bNearClip, bool bSkipFrustumCull)
{
FFrustumCullData Cull;
float SphereMaxZ = Center.z + Radius;
float SphereMinZ = Center.z - Radius;
Cull.bCrossesNearPlane = false;
Cull.bCrossesFarPlane = false;
Cull.bIsVisible = true;
if (bNearClip)
{
// Near plane cull
if (Cull.bIsVisible)
{
Cull.bIsVisible = (SphereMaxZ > ZNear);
}
}
// Far plane cull
if (Cull.bIsVisible)
{
Cull.bIsVisible = (SphereMinZ * ViewToClip[2][2] + ViewToClip[3][2] > 0);
}
Cull.bCrossesNearPlane = ( SphereMinZ < ZNear );
Cull.bCrossesFarPlane = ( SphereMaxZ * ViewToClip[2][2] + ViewToClip[3][2] < 0 );
float Depth = ViewToClip[2][2] + ViewToClip[3][2] / SphereMinZ;
float4 Rect;
if( ViewToClip[3][3] >= 1 )
{
Rect = SphereToScreenRectOrtho( Center, Radius, ViewToClip );
Depth = SphereMinZ * ViewToClip[2][2] + ViewToClip[3][2];
}
else if( Cull.bCrossesNearPlane )
{
Rect = SphereToScreenRectNearClip( Center, Radius, ViewToClip, ZNear );
}
else
{
Rect = SphereToScreenRect( Center, Radius, ViewToClip );
}
Cull.RectMin = float3( Rect.xy, 0 );
Cull.RectMax = float3( Rect.zw, Depth );
// Frustum cull
if (Cull.bIsVisible && !bSkipFrustumCull /* for debugging */)
{
Cull.bIsVisible = all(Cull.RectMin.xy < 1) && all(Cull.RectMax.xy > -1);
}
return Cull;
}