704 lines
26 KiB
HLSL
704 lines
26 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#define USE_HZB_HALF_TYPE 1
|
|
|
|
#include "../Common.ush"
|
|
#include "../HZB.ush"
|
|
|
|
#ifndef PLATFORM_SPECIFIC_ISOLATE
|
|
#define PLATFORM_SPECIFIC_ISOLATE ISOLATE
|
|
#endif
|
|
|
|
Texture2DArray< half > HZBTextureArray;
|
|
|
|
struct FFrustumCullData
|
|
{
|
|
float3 RectMin;
|
|
float3 RectMax;
|
|
|
|
bool bCrossesFarPlane;
|
|
bool bCrossesNearPlane;
|
|
bool bFrustumSideCulled;
|
|
bool bIsVisible;
|
|
};
|
|
|
|
struct FScreenRect
|
|
{
|
|
int4 Pixels;
|
|
bool bOverlapsPixelCenter;
|
|
|
|
// For HZB sampling
|
|
int4 HZBTexels;
|
|
int HZBLevel;
|
|
|
|
float Depth;
|
|
};
|
|
|
|
// Rect is inclusive [Min.xy, Max.xy]
|
|
int MipLevelForRect( int4 RectPixels, int DesiredFootprintPixels )
|
|
{
|
|
const int MaxPixelOffset = DesiredFootprintPixels - 1;
|
|
const int MipOffset = (int)log2((float)DesiredFootprintPixels) - 1;
|
|
|
|
// Calculate lowest mip level that allows us to cover footprint of the desired size in pixels.
|
|
// Start by calculating separate x and y mip level requirements.
|
|
// 2 pixels of mip k cover 2^(k+1) pixels of mip 0. To cover at least n pixels of mip 0 by two pixels of mip k we need k to be at least k = ceil( log2( n ) ) - 1.
|
|
// For integer n>1: ceil( log2( n ) ) = floor( log2( n - 1 ) ) + 1.
|
|
// So k = floor( log2( n - 1 )
|
|
// For integer n>1: floor( log2( n ) ) = firstbithigh( n )
|
|
// So k = firstbithigh( n - 1 )
|
|
// As RectPixels min/max are both inclusive their difference is one less than number of pixels (n - 1), so applying firstbithigh to this difference gives the minimum required mip.
|
|
// NOTE: firstbithigh is a FULL rate instruction on GCN while log2 is QUARTER rate instruction.
|
|
int2 MipLevelXY = firstbithigh(RectPixels.zw - RectPixels.xy);
|
|
|
|
// Mip level needs to be big enough to cover both x and y requirements. Go one extra level down for 4x4 sampling.
|
|
// firstbithigh(0) = -1, so clamping with 0 here also handles the n=1 case where mip 0 footprint is just 1 pixel wide/tall.
|
|
int MipLevel = max(max(MipLevelXY.x, MipLevelXY.y) - MipOffset, 0);
|
|
|
|
// MipLevel now contains the minimum MipLevel that can cover a number of pixels equal to the size of the rectangle footprint, but the HZB footprint alignments are quantized to powers of two.
|
|
// The quantization can translate down the start of the represented range by up to 2^k-1 pixels, which can decrease the number of usable pixels down to 2^(k+1) - 2^k-1.
|
|
// Depending on the alignment of the rectangle this might require us to pick one level higher to cover all rectangle footprint pixels.
|
|
// Note that testing one level higher is always enough as this guarantees 2^(k+2) - 2^k usable pixels after alignment, which is more than the 2^(k+1) required pixels.
|
|
|
|
// Transform coordinates down to coordinates of selected mip level and if they are not within reach increase level by one.
|
|
MipLevel += any((RectPixels.zw >> MipLevel) - (RectPixels.xy >> MipLevel) > MaxPixelOffset) ? 1 : 0;
|
|
|
|
return MipLevel;
|
|
}
|
|
|
|
FScreenRect GetScreenRect( int4 ViewRect, float3 CullRectMin, float3 CullRectMax, int DesiredFootprintPixels )
|
|
{
|
|
FScreenRect Rect;
|
|
Rect.Depth = CullRectMax.z;
|
|
|
|
// Map from NDC [-1,1] to target 'texture UV' [0,1] space, X[-1,1] -> [0,1], Y[-1,1] -> [1, 0]
|
|
// CF DX11.3 Functional Spec 3.3.1 Pixel Coordinate System
|
|
float4 RectUV = saturate( float4( CullRectMin.xy, CullRectMax.xy ) * float2(0.5, -0.5).xyxy + 0.5 ).xwzy;
|
|
|
|
// Calculate pixel footprint of rectangle in full resolution.
|
|
// To make the bounds as tight as possible we only consider a pixel part of the footprint when its pixel center is covered by the rectangle.
|
|
// Only when the pixel center is covered can that pixel be rasterized by anything inside the rectangle.
|
|
// Using pixel centers instead of conservative floor/ceil bounds of pixel seems to typically result in ~5% fewer clusters being drawn.
|
|
// NOTE: This assumes anything inside RectMin/RectMax gets rasterized with one centered sample. This will have to be adjusted for conservative rasterization, MSAA or similar features.
|
|
float2 ViewSize = ViewRect.zw - ViewRect.xy;
|
|
Rect.Pixels = int4( RectUV * ViewSize.xyxy + ViewRect.xyxy + float4(0.5f, 0.5f, -0.5f, -0.5f) );
|
|
Rect.Pixels.xy = max(Rect.Pixels.xy, ViewRect.xy);
|
|
Rect.Pixels.zw = min(Rect.Pixels.zw, ViewRect.zw - 1);
|
|
|
|
// Otherwise rectangle has zero area or falls between pixel centers resulting in no rasterized pixels.
|
|
Rect.bOverlapsPixelCenter = all( Rect.Pixels.zw >= Rect.Pixels.xy );
|
|
|
|
// Make sure rect is valid even if !bOverlapsPixelCenter
|
|
// Should this be inclusive rounding instead?
|
|
Rect.HZBTexels = int4( Rect.Pixels.xy, max( Rect.Pixels.xy, Rect.Pixels.zw ) );
|
|
|
|
// First level of HZB is hard-coded to start at half resolution.
|
|
// (x,y) in HZB mip 0 covers (2x+0, 2y+0), (2x+1, 2y+0), (2x+0, 2y+1), (2x+1, 2y+1) in full resolution target.
|
|
Rect.HZBTexels = Rect.HZBTexels >> 1;
|
|
|
|
Rect.HZBLevel = MipLevelForRect( Rect.HZBTexels, DesiredFootprintPixels );
|
|
|
|
// Transform HZB Mip 0 coordinates to coordinates of selected Mip level.
|
|
Rect.HZBTexels >>= Rect.HZBLevel;
|
|
|
|
return Rect;
|
|
}
|
|
|
|
FScreenRect GetScreenRect( int4 ViewRect, FFrustumCullData Cull, int DesiredFootprintPixels )
|
|
{
|
|
return GetScreenRect(ViewRect, Cull.RectMin, Cull.RectMax, DesiredFootprintPixels);
|
|
}
|
|
|
|
#if !COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
|
|
float4 LoadHZBRow4(float4 XCoords, float YCoord, float MipLevel)
|
|
{
|
|
return float4(
|
|
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.x, YCoord), MipLevel).r,
|
|
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.y, YCoord), MipLevel).r,
|
|
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.z, YCoord), MipLevel).r,
|
|
HZBTexture.SampleLevel(HZBSampler, float2(XCoords.w, YCoord), MipLevel).r);
|
|
}
|
|
|
|
float4 LoadHZBRow4(float4 XCoords, float YCoord, float MipLevel, float ArrayIndex)
|
|
{
|
|
return float4(
|
|
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.x, YCoord, ArrayIndex), MipLevel).r,
|
|
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.y, YCoord, ArrayIndex), MipLevel).r,
|
|
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.z, YCoord, ArrayIndex), MipLevel).r,
|
|
HZBTextureArray.SampleLevel(HZBSampler, float3(XCoords.w, YCoord, ArrayIndex), MipLevel).r);
|
|
}
|
|
#endif
|
|
|
|
float GetMinDepthFromHZB(FScreenRect Rect, bool bSample4x4)
|
|
{
|
|
// Calculate HZB Texel size.
|
|
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
|
|
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
|
|
|
|
float MipLevel = (float)Rect.HZBLevel;
|
|
|
|
float MinDepth;
|
|
if( bSample4x4 )
|
|
{
|
|
#if COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
float4 GatherCoords;
|
|
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * PixelSize.xy
|
|
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
|
|
|
|
float4 Depth00 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xy, MipLevel);
|
|
float4 Depth01 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zy, MipLevel);
|
|
float4 Depth10 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xw, MipLevel);
|
|
float4 Depth11 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zw, MipLevel);
|
|
|
|
float4 Depth = min(min3(Depth00, Depth01, Depth10), Depth11);
|
|
|
|
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
|
|
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
|
|
#else
|
|
float4 XCoords = (min(Rect.HZBTexels.x + int4(0, 1, 2, 3), Rect.HZBTexels.z) + 0.5f) * TexelSize.x;
|
|
float4 YCoords = (min(Rect.HZBTexels.y + int4(0, 1, 2, 3), Rect.HZBTexels.w) + 0.5f) * TexelSize.y;
|
|
|
|
float4 Depth0 = LoadHZBRow4( XCoords, YCoords.x, MipLevel);
|
|
float4 Depth1 = LoadHZBRow4( XCoords, YCoords.y, MipLevel);
|
|
float4 Depth2 = LoadHZBRow4( XCoords, YCoords.z, MipLevel);
|
|
float4 Depth3 = LoadHZBRow4( XCoords, YCoords.w, MipLevel);
|
|
|
|
float4 Depth = min(min3(Depth0, Depth1, Depth2), Depth3);
|
|
#endif
|
|
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
|
|
}
|
|
else
|
|
{
|
|
float4 Depth;
|
|
#if COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
float2 Coords = Rect.HZBTexels.xy * TexelSize + TexelSize; // (RectPixels + 1.0f) * TexelSize
|
|
Depth = HZBTexture.GatherLODRed(HZBSampler, Coords.xy, MipLevel);
|
|
#else
|
|
float4 Coords = (Rect.HZBTexels + 0.5f) * TexelSize.xyxy;
|
|
Depth.x = HZBTexture.SampleLevel(HZBSampler, Coords.xw, MipLevel).r; // (-,+)
|
|
Depth.y = HZBTexture.SampleLevel(HZBSampler, Coords.zw, MipLevel).r; // (+,+)
|
|
Depth.z = HZBTexture.SampleLevel(HZBSampler, Coords.zy, MipLevel).r; // (+,-)
|
|
Depth.w = HZBTexture.SampleLevel(HZBSampler, Coords.xy, MipLevel).r; // (-,-)
|
|
#endif
|
|
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
|
|
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
|
|
|
|
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
|
|
}
|
|
|
|
return MinDepth;
|
|
}
|
|
|
|
bool IsVisibleHZB( FScreenRect Rect, bool bSample4x4 )
|
|
{
|
|
const float MinDepth = GetMinDepthFromHZB(Rect, bSample4x4);
|
|
|
|
// Inverted Z buffer
|
|
return Rect.Depth >= MinDepth;
|
|
}
|
|
|
|
float GetMinDepthFromHZBArray(FScreenRect Rect, bool bSample4x4, float ArrayIndex)
|
|
{
|
|
// Calculate HZB Texel size.
|
|
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
|
|
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
|
|
|
|
float MipLevel = (float)Rect.HZBLevel;
|
|
|
|
float MinDepth;
|
|
if( bSample4x4 )
|
|
{
|
|
#if COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
float4 GatherCoords;
|
|
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * PixelSize.xy
|
|
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
|
|
|
|
float4 Depth00 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.xy, ArrayIndex), MipLevel);
|
|
float4 Depth01 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.zy, ArrayIndex), MipLevel);
|
|
float4 Depth10 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.xw, ArrayIndex), MipLevel);
|
|
float4 Depth11 = HZBTextureArray.GatherLODRed(HZBSampler, float3(GatherCoords.zw, ArrayIndex), MipLevel);
|
|
|
|
float4 Depth = min(min3(Depth00, Depth01, Depth10), Depth11);
|
|
|
|
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
|
|
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
|
|
|
|
#else
|
|
float4 XCoords = (min(Rect.HZBTexels.x + int4(0, 1, 2, 3), Rect.HZBTexels.z) + 0.5f) * TexelSize.x;
|
|
float4 YCoords = (min(Rect.HZBTexels.y + int4(0, 1, 2, 3), Rect.HZBTexels.w) + 0.5f) * TexelSize.y;
|
|
|
|
|
|
float4 Depth0 = LoadHZBRow4( XCoords, YCoords.x, MipLevel, ArrayIndex);
|
|
float4 Depth1 = LoadHZBRow4( XCoords, YCoords.y, MipLevel, ArrayIndex);
|
|
float4 Depth2 = LoadHZBRow4( XCoords, YCoords.z, MipLevel, ArrayIndex);
|
|
float4 Depth3 = LoadHZBRow4( XCoords, YCoords.w, MipLevel, ArrayIndex);
|
|
|
|
float4 Depth = min(min3(Depth0, Depth1, Depth2), Depth3);
|
|
#endif
|
|
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
|
|
}
|
|
else
|
|
{
|
|
float4 Depth;
|
|
#if COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
float2 Coords = Rect.HZBTexels.xy * TexelSize + TexelSize; // (RectPixels + 1.0f) * TexelSize
|
|
Depth = HZBTextureArray.GatherLODRed(HZBSampler, float3(Coords.xy, ArrayIndex), MipLevel);
|
|
#else
|
|
float4 Coords = (Rect.HZBTexels + 0.5f) * TexelSize.xyxy;
|
|
Depth.x = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.xw, ArrayIndex), MipLevel).r; // (-,+)
|
|
Depth.y = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.zw, ArrayIndex), MipLevel).r; // (+,+)
|
|
Depth.z = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.zy, ArrayIndex), MipLevel).r; // (+,-)
|
|
Depth.w = HZBTextureArray.SampleLevel(HZBSampler, float3(Coords.xy, ArrayIndex), MipLevel).r; // (-,-)
|
|
#endif
|
|
Depth.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? 1.0f : Depth.yz; // Mask off right pixels, if footprint is only one pixel wide.
|
|
Depth.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? 1.0f : Depth.xy; // Mask off bottom pixels, if footprint is only one pixel tall.
|
|
|
|
MinDepth = min(min3(Depth.x, Depth.y, Depth.z), Depth.w);
|
|
}
|
|
|
|
return MinDepth;
|
|
}
|
|
|
|
bool IsVisibleHZBArray( FScreenRect Rect, bool bSample4x4, int ArrayIndex)
|
|
{
|
|
const float MinDepth = GetMinDepthFromHZBArray(Rect, bSample4x4, float(ArrayIndex));
|
|
|
|
// Inverted Z buffer
|
|
return Rect.Depth >= MinDepth;
|
|
}
|
|
|
|
bool IsVisibleHZB( FScreenRect Rect, float3 PlaneHZB )
|
|
{
|
|
// Calculate HZB Texel size.
|
|
// TexelSize = (1 / HZBSize) * exp2(MipLevel);
|
|
float2 TexelSize = asfloat(0x7F000000 - asint(HZBSize) + (Rect.HZBLevel << 23)); // Assumes HZB is po2
|
|
|
|
float MipLevel = (float)Rect.HZBLevel;
|
|
|
|
#if COMPILER_SUPPORTS_GATHER_LOD_RED
|
|
//float4 GatherCoords = int4(RectPixels.xy, max(RectPixels.zw - 1, RectPixels.xy)) * PixelSize.xyxy + PixelSize.xyxy;
|
|
float4 GatherCoords;
|
|
GatherCoords.xy = (float2)Rect.HZBTexels.xy * TexelSize.xy + TexelSize.xy; // (RectPixels.xy + 1) * TexelSize.xy
|
|
GatherCoords.zw = max((float2)Rect.HZBTexels.zw * TexelSize.xy, GatherCoords.xy);
|
|
|
|
float4 HZBDepth00 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xy, MipLevel);
|
|
float4 HZBDepth01 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zy, MipLevel);
|
|
float4 HZBDepth10 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.xw, MipLevel);
|
|
float4 HZBDepth11 = HZBTexture.GatherLODRed(HZBSampler, GatherCoords.zw, MipLevel);
|
|
|
|
float2 PlaneDepthDelta = 0.5 * TexelSize.xy * PlaneHZB.xy;
|
|
float4 PlaneDepthBase4;
|
|
PlaneDepthBase4.x = -PlaneDepthDelta.x + PlaneDepthDelta.y + PlaneHZB.z;
|
|
PlaneDepthBase4.y = PlaneDepthDelta.x + PlaneDepthDelta.y + PlaneHZB.z;
|
|
PlaneDepthBase4.z = PlaneDepthDelta.x - PlaneDepthDelta.y + PlaneHZB.z;
|
|
PlaneDepthBase4.w = -PlaneDepthDelta.x - PlaneDepthDelta.y + PlaneHZB.z;
|
|
|
|
float4 PlaneDepth00 = dot( PlaneHZB.xy, GatherCoords.xy ) + PlaneDepthBase4;
|
|
float4 PlaneDepth01 = dot( PlaneHZB.xy, GatherCoords.zy ) + PlaneDepthBase4;
|
|
float4 PlaneDepth10 = dot( PlaneHZB.xy, GatherCoords.xw ) + PlaneDepthBase4;
|
|
float4 PlaneDepth11 = dot( PlaneHZB.xy, GatherCoords.zw ) + PlaneDepthBase4;
|
|
|
|
bool4 bIsVisible00 = min( PlaneDepth00, Rect.Depth ) >= HZBDepth00;
|
|
bool4 bIsVisible01 = min( PlaneDepth01, Rect.Depth ) >= HZBDepth01;
|
|
bool4 bIsVisible10 = min( PlaneDepth10, Rect.Depth ) >= HZBDepth10;
|
|
bool4 bIsVisible11 = min( PlaneDepth11, Rect.Depth ) >= HZBDepth11;
|
|
|
|
bool4 bIsVisible = or(or(bIsVisible00, bIsVisible01), or(bIsVisible10, bIsVisible11));
|
|
|
|
bIsVisible.yz = (Rect.HZBTexels.x == Rect.HZBTexels.z) ? false : bIsVisible.yz; // Mask off right pixels
|
|
bIsVisible.xy = (Rect.HZBTexels.y == Rect.HZBTexels.w) ? false : bIsVisible.xy; // Mask off bottom pixels
|
|
|
|
return bIsVisible.x || bIsVisible.y || bIsVisible.z || bIsVisible.w;
|
|
#else
|
|
bool bIsVisible = false;
|
|
for( int y = Rect.HZBTexels.y; y <= Rect.HZBTexels.w; y++ )
|
|
{
|
|
for( int x = Rect.HZBTexels.x; x <= Rect.HZBTexels.z; x++ )
|
|
{
|
|
float2 UV = ( float2(x,y) + 0.5 ) * TexelSize;
|
|
|
|
float HZBDepth = HZBTexture.SampleLevel( HZBSampler, UV, MipLevel ).r;
|
|
float PlaneDepth = dot( PlaneHZB, float3( UV, 1 ) );
|
|
|
|
bIsVisible = bIsVisible || min( PlaneDepth, Rect.Depth ) >= HZBDepth;
|
|
}
|
|
}
|
|
|
|
return bIsVisible;
|
|
#endif
|
|
}
|
|
|
|
|
|
uint GetCubeFaceIndex( float3 Dir )
|
|
{
|
|
#if 1
|
|
uint Face;
|
|
if( abs(Dir.x) >= abs(Dir.y) && abs(Dir.x) >= abs(Dir.z) )
|
|
Face = Dir.x > 0 ? 0 : 1;
|
|
else if( abs(Dir.y) > abs(Dir.z) )
|
|
Face = Dir.y > 0 ? 2 : 3;
|
|
else
|
|
Face = Dir.z > 0 ? 4 : 5;
|
|
#else
|
|
// TODO GCN
|
|
uint Face = v_cubeid_f32( Dir );
|
|
#endif
|
|
return Face;
|
|
}
|
|
|
|
// Normalized cube [-1,1]
|
|
float4 GetCubeFacePlane( float3 Dir )
|
|
{
|
|
uint Face = GetCubeFaceIndex( Dir );
|
|
uint Axis = Face >> 1;
|
|
|
|
float Sign = (Face & 1) ? -1 : 1;
|
|
|
|
float3 FaceNormal;
|
|
FaceNormal.x = Axis == 0 ? Sign : 0;
|
|
FaceNormal.y = Axis == 1 ? Sign : 0;
|
|
FaceNormal.z = Axis == 2 ? Sign : 0;
|
|
|
|
return float4( FaceNormal, -1 );
|
|
}
|
|
|
|
float4 GetDominantFacePlane( float3 ViewOrigin, float3 BoundsCenter, float3 BoundsExtent )
|
|
{
|
|
float3 UnitCubeDir = ( ViewOrigin - BoundsCenter ) / BoundsExtent;
|
|
|
|
uint Face = GetCubeFaceIndex( UnitCubeDir );
|
|
uint Axis = Face >> 1;
|
|
|
|
float Sign = (Face & 1) ? -1 : 1;
|
|
|
|
float3 FaceNormal;
|
|
FaceNormal.x = Axis == 0 ? Sign : 0;
|
|
FaceNormal.y = Axis == 1 ? Sign : 0;
|
|
FaceNormal.z = Axis == 2 ? Sign : 0;
|
|
|
|
float3 ClosestCorner = BoundsCenter + select(UnitCubeDir > 0, BoundsExtent, -BoundsExtent);
|
|
|
|
return float4( FaceNormal, dot( FaceNormal, -ClosestCorner ) );
|
|
}
|
|
|
|
float3 GetFacePlaneHZB( float3 ViewOriginLocal, float4x4 ClipToLocal, float3 BoundsCenter, float3 BoundsExtent )
|
|
{
|
|
float4 PlaneLocal = GetDominantFacePlane( ViewOriginLocal, BoundsCenter, BoundsExtent );
|
|
//float4 PlaneLocal = GetCubeFacePlane( ViewOriginLocal );
|
|
|
|
// Transform to post projective space. Inverse transpose for plane projection.
|
|
// https://stackoverflow.com/questions/7685495/transforming-a-3d-plane-using-a-4x4-matrix
|
|
float4 PlaneClip = mul( ClipToLocal, PlaneLocal );
|
|
|
|
// 2D plane in screen space
|
|
// Solve for PointClip.z where:
|
|
// dot( PlaneClip, float4( PointClip, 1 ) ) = 0
|
|
float3 PlaneScreen = -PlaneClip.xyw / PlaneClip.z;
|
|
|
|
// Bake UV to ScreenPos transform into planes to pull outside of loop
|
|
// float2 ScreenPos = UV * float2( 2, -2 ) + float2( -1, 1 );
|
|
float3 PlaneUV = float3( PlaneScreen.xy * float2( 2, -2 ), dot( PlaneScreen, float3( -1, 1, 1 ) ) );
|
|
|
|
// HZB UVs to viewport UVs
|
|
float3 PlaneHZB = float3( PlaneUV.xy * 2 * HZBSize.xy / HZBViewSize.xy, PlaneUV.z );
|
|
|
|
// Sample from max depth corner (inverted z) for conservative depth over pixel.
|
|
PlaneHZB.z += dot( abs( PlaneHZB.xy ), 0.5 / HZBViewSize.xy );
|
|
|
|
return PlaneHZB;
|
|
}
|
|
|
|
FFrustumCullData BoxCullFrustumOrtho(float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, bool bNearClip, bool bSkipFrustumCull)
|
|
{
|
|
FFrustumCullData Cull;
|
|
|
|
float3 CenterClip = mul( mul( float4( Center, 1 ), LocalToWorld ), WorldToClip ).xyz;
|
|
|
|
float3 ClipDelta = abs( Extent.x * mul(LocalToWorld[0], WorldToClip).xyz ) + abs( Extent.y * mul(LocalToWorld[1], WorldToClip).xyz ) + abs( Extent.z * mul(LocalToWorld[2], WorldToClip).xyz );
|
|
Cull.RectMin = CenterClip - ClipDelta;
|
|
Cull.RectMax = CenterClip + ClipDelta;
|
|
|
|
Cull.bCrossesFarPlane = Cull.RectMin.z < 0.0f;
|
|
Cull.bCrossesNearPlane = Cull.RectMax.z > 1.0f;
|
|
Cull.bIsVisible = Cull.RectMax.z > 0.0f; // Far clip
|
|
|
|
if (bNearClip)
|
|
{
|
|
Cull.bIsVisible = Cull.bIsVisible && Cull.RectMin.z < 1.0f;
|
|
}
|
|
|
|
Cull.bFrustumSideCulled = false;
|
|
if (!bSkipFrustumCull) // for debugging, will compile out
|
|
{
|
|
const bool bFrustumCull = any(or(Cull.RectMax.xy < -1.0f, Cull.RectMin.xy > 1.0f));
|
|
Cull.bFrustumSideCulled = Cull.bIsVisible && bFrustumCull;
|
|
Cull.bIsVisible = Cull.bIsVisible && !bFrustumCull;
|
|
}
|
|
|
|
return Cull;
|
|
}
|
|
|
|
FFrustumCullData BoxCullFrustumPerspective(float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, float4x4 ViewToClip, bool bSkipFrustumCull)
|
|
{
|
|
FFrustumCullData Cull;
|
|
|
|
float4 DX = (2.0f * Extent.x) * mul(LocalToWorld[0], WorldToClip);
|
|
float4 DY = (2.0f * Extent.y) * mul(LocalToWorld[1], WorldToClip);
|
|
|
|
float MinW = +INFINITE_FLOAT;
|
|
float MaxW = -INFINITE_FLOAT;
|
|
float4 PlanesMin = 1.0f;
|
|
|
|
Cull.RectMin = float3(+1, +1, +1);
|
|
Cull.RectMax = float3(-1, -1, -1);
|
|
|
|
// To discourage the compiler from overlapping the entire calculation, which uses an excessive number of VGPRs, the evaluation is split into 4 isolated passes with two corners per pass.
|
|
// There seems to be no additional benefit from evaluating just one corner per pass and it prevents the use of fast min3/max3 intrinsics.
|
|
|
|
#define EVAL_POINTS(PC0, PC1) \
|
|
MinW = min3(MinW, PC0.w, PC1.w); \
|
|
MaxW = max3(MaxW, PC0.w, PC1.w); \
|
|
PlanesMin = min3(PlanesMin, float4(PC0.xy, -PC0.xy) - PC0.w, float4(PC1.xy, -PC1.xy) - PC1.w); \
|
|
float2 PS0 = PC0.xy / PC0.w; \
|
|
float2 PS1 = PC1.xy / PC1.w; \
|
|
Cull.RectMin.xy = min3(Cull.RectMin.xy, PS0, PS1); \
|
|
Cull.RectMax.xy = max3(Cull.RectMax.xy, PS0, PS1);
|
|
|
|
float4 PC000, PC100;
|
|
PLATFORM_SPECIFIC_ISOLATE
|
|
{
|
|
float4 DZ = (2.0f * Extent.z) * mul(LocalToWorld[2], WorldToClip);
|
|
PC000 = mul(mul(float4(Center - Extent, 1.0), LocalToWorld), WorldToClip);
|
|
PC100 = PC000 + DZ;
|
|
EVAL_POINTS(PC000, PC100);
|
|
}
|
|
|
|
float4 PC001, PC101;
|
|
PLATFORM_SPECIFIC_ISOLATE
|
|
{
|
|
PC001 = PC000 + DX;
|
|
PC101 = PC100 + DX;
|
|
EVAL_POINTS(PC001, PC101);
|
|
}
|
|
|
|
float4 PC011, PC111;
|
|
PLATFORM_SPECIFIC_ISOLATE
|
|
{
|
|
PC011 = PC001 + DY;
|
|
PC111 = PC101 + DY;
|
|
EVAL_POINTS(PC011, PC111);
|
|
}
|
|
|
|
float4 PC010, PC110;
|
|
PLATFORM_SPECIFIC_ISOLATE
|
|
{
|
|
PC010 = PC011 - DX;
|
|
PC110 = PC111 - DX;
|
|
EVAL_POINTS(PC010, PC110);
|
|
}
|
|
|
|
#undef EVAL_POINTS
|
|
|
|
float MinZ = MaxW * ViewToClip[2][2] + ViewToClip[3][2];
|
|
float MaxZ = MinW * ViewToClip[2][2] + ViewToClip[3][2];
|
|
|
|
// Near is z=1
|
|
bool bInFrontNearPlane = MinW <= MaxZ;
|
|
bool bBehindNearPlane = MaxW > MinZ;
|
|
|
|
// Far is z=0
|
|
bool bInFrontFarPlane = 0 < MaxZ;
|
|
bool bBehindFarPlane = 0 >= MinZ;
|
|
|
|
Cull.bCrossesNearPlane = bInFrontNearPlane;
|
|
Cull.bCrossesFarPlane = bBehindFarPlane;
|
|
Cull.bIsVisible = bBehindNearPlane && bInFrontFarPlane;
|
|
|
|
if (MinW <= 0.0f && MaxW > 0.0f)
|
|
{
|
|
Cull.RectMin = float3(-1, -1, -1);
|
|
Cull.RectMax = float3(+1, +1, +1);
|
|
}
|
|
else
|
|
{
|
|
Cull.RectMin.z = MinZ / MaxW;
|
|
Cull.RectMax.z = MaxZ / MinW;
|
|
}
|
|
|
|
Cull.bFrustumSideCulled = false;
|
|
if (!bSkipFrustumCull)
|
|
{
|
|
const bool bFrustumCull = any(PlanesMin > 0.0f);
|
|
Cull.bFrustumSideCulled = Cull.bIsVisible && bFrustumCull;
|
|
Cull.bIsVisible = Cull.bIsVisible && !bFrustumCull;
|
|
}
|
|
|
|
return Cull;
|
|
}
|
|
|
|
// Splitting the transform in two generates much better code on DXC when WorldToClip is scalar.
|
|
FFrustumCullData BoxCullFrustum( float3 Center, float3 Extent, float4x4 LocalToWorld, float4x4 WorldToClip, float4x4 ViewToClip, bool bIsOrtho, bool bNearClip, bool bSkipFrustumCull )
|
|
{
|
|
// NOTE: We assume here that if near clipping is disabled the projection is orthographic, as disabling near clipping is
|
|
// a feature for directional light shadows, and disabling near clipping for a perspective projection doesn't make much sense.
|
|
// Checking both also serves to help out DCE when either is a compile-time constant.
|
|
checkSlow(bIsOrtho || bNearClip);
|
|
|
|
if (bIsOrtho || !bNearClip)
|
|
{
|
|
return BoxCullFrustumOrtho( Center, Extent, LocalToWorld, WorldToClip, bNearClip, bSkipFrustumCull );
|
|
}
|
|
else
|
|
{
|
|
return BoxCullFrustumPerspective( Center, Extent, LocalToWorld, WorldToClip, ViewToClip, bSkipFrustumCull );
|
|
}
|
|
}
|
|
|
|
FFrustumCullData BoxCullFrustum(float3 Center, float3 Extent, float4x4 LocalToClip, float4x4 ViewToClip, bool bIsOrtho, bool bNearClip, bool bSkipFrustumCull)
|
|
{
|
|
return BoxCullFrustum(Center, Extent, float4x4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1), LocalToClip, ViewToClip, bIsOrtho, bNearClip, bSkipFrustumCull);
|
|
}
|
|
|
|
float2 ProjectSphere( float x, float z, float r, float ResultScale )
|
|
{
|
|
#if 1
|
|
float t = sqrt( x*x + z*z - r*r );
|
|
|
|
float A = ( t*z + r*x );
|
|
float B = ( t*z - r*x );
|
|
ResultScale /= ( A * B ); // Divide by common denominator instead of dividing twice
|
|
|
|
float Min = ( t*x - r*z ) * B;
|
|
float Max = ( t*x + r*z ) * A;
|
|
|
|
return float2( Min, Max ) * ResultScale;
|
|
#else
|
|
float t = sqrt( x*x + z*z - r*r );
|
|
|
|
float Min = ( t*x - r*z ) / ( t*z + r*x );
|
|
float Max = ( t*x + r*z ) / ( t*z - r*x );
|
|
|
|
return float2( Min, Max ) * ResultScale;
|
|
#endif
|
|
}
|
|
|
|
// [ Mara & Morgan 2013, "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" ]
|
|
float4 SphereToScreenRect( float3 Center, float Radius, float4x4 ViewToClip )
|
|
{
|
|
float2 ExtentX = ProjectSphere( Center.x, Center.z, Radius, ViewToClip[0][0] ) + ViewToClip[2][0];
|
|
float2 ExtentY = ProjectSphere( Center.y, Center.z, Radius, ViewToClip[1][1] ) + ViewToClip[2][1];
|
|
|
|
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
|
|
}
|
|
|
|
// Assumes we have already handled the z+r < ZNear case
|
|
float2 ProjectSphereNearClip( float x, float z, float r, float ResultScale, float ZNear )
|
|
{
|
|
float clen2 = x * x + z * z;
|
|
float t2 = clen2 - r * r;
|
|
float t = sqrt( t2 );
|
|
|
|
float scaled_cos_theta = t2;
|
|
float scaled_sin_theta = r * t;
|
|
|
|
// obtain B by rotating by Theta
|
|
float2 B = float2( scaled_cos_theta * x + scaled_sin_theta * z, -scaled_sin_theta * x + scaled_cos_theta * z );
|
|
// obtain T by rotating by -Theta
|
|
float2 T = float2( scaled_cos_theta * x - scaled_sin_theta * z, scaled_sin_theta * x + scaled_cos_theta * z );
|
|
|
|
float h = ZNear - z;
|
|
float d = sqrt( r * r - h * h );
|
|
|
|
if ( t2 < 0.0 || T.y < ZNear * clen2 )
|
|
{
|
|
T.x = x - d;
|
|
T.y = ZNear;
|
|
}
|
|
|
|
if ( t2 < 0.0 || B.y < ZNear * clen2 )
|
|
{
|
|
B.x = x + d;
|
|
B.y = ZNear;
|
|
}
|
|
|
|
return float2( T.x / T.y, B.x / B.y ) * ResultScale;
|
|
}
|
|
|
|
|
|
// [ Mara & Morgan 2013, "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" ]
|
|
float4 SphereToScreenRectNearClip( float3 Center, float Radius, float4x4 ViewToClip, float ZNear )
|
|
{
|
|
float2 ExtentX = ProjectSphereNearClip( Center.x, Center.z, Radius, ViewToClip[0][0], ZNear ) + ViewToClip[2][0];
|
|
float2 ExtentY = ProjectSphereNearClip( Center.y, Center.z, Radius, ViewToClip[1][1], ZNear ) + ViewToClip[2][1];
|
|
|
|
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
|
|
}
|
|
|
|
float4 SphereToScreenRectOrtho( float3 Center, float Radius, float4x4 ViewToClip )
|
|
{
|
|
float2 ExtentX = float2( Center.x - Radius, Center.x + Radius ) * ViewToClip[0][0] + ViewToClip[2][0];
|
|
float2 ExtentY = float2( Center.y - Radius, Center.y + Radius ) * ViewToClip[1][1] + ViewToClip[2][1];
|
|
|
|
return float4( ExtentX.x, ExtentY.x, ExtentX.y, ExtentY.y );
|
|
}
|
|
|
|
FFrustumCullData SphereCullFrustum(float3 Center, float Radius, float4x4 ViewToClip, float ZNear, bool bNearClip, bool bSkipFrustumCull)
|
|
{
|
|
FFrustumCullData Cull;
|
|
|
|
float SphereMaxZ = Center.z + Radius;
|
|
float SphereMinZ = Center.z - Radius;
|
|
|
|
Cull.bCrossesNearPlane = false;
|
|
Cull.bCrossesFarPlane = false;
|
|
Cull.bIsVisible = true;
|
|
|
|
if (bNearClip)
|
|
{
|
|
// Near plane cull
|
|
if (Cull.bIsVisible)
|
|
{
|
|
Cull.bIsVisible = (SphereMaxZ > ZNear);
|
|
}
|
|
}
|
|
|
|
// Far plane cull
|
|
if (Cull.bIsVisible)
|
|
{
|
|
Cull.bIsVisible = (SphereMinZ * ViewToClip[2][2] + ViewToClip[3][2] > 0);
|
|
}
|
|
|
|
Cull.bCrossesNearPlane = ( SphereMinZ < ZNear );
|
|
Cull.bCrossesFarPlane = ( SphereMaxZ * ViewToClip[2][2] + ViewToClip[3][2] < 0 );
|
|
|
|
float Depth = ViewToClip[2][2] + ViewToClip[3][2] / SphereMinZ;
|
|
|
|
float4 Rect;
|
|
if( ViewToClip[3][3] >= 1 )
|
|
{
|
|
Rect = SphereToScreenRectOrtho( Center, Radius, ViewToClip );
|
|
Depth = SphereMinZ * ViewToClip[2][2] + ViewToClip[3][2];
|
|
}
|
|
else if( Cull.bCrossesNearPlane )
|
|
{
|
|
Rect = SphereToScreenRectNearClip( Center, Radius, ViewToClip, ZNear );
|
|
}
|
|
else
|
|
{
|
|
Rect = SphereToScreenRect( Center, Radius, ViewToClip );
|
|
}
|
|
|
|
Cull.RectMin = float3( Rect.xy, 0 );
|
|
Cull.RectMax = float3( Rect.zw, Depth );
|
|
|
|
// Frustum cull
|
|
if (Cull.bIsVisible && !bSkipFrustumCull /* for debugging */)
|
|
{
|
|
Cull.bIsVisible = all(Cull.RectMin.xy < 1) && all(Cull.RectMax.xy > -1);
|
|
}
|
|
|
|
return Cull;
|
|
} |