Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteTessellation.ush
2025-05-18 13:04:45 +08:00

359 lines
11 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "../Common.ush"
#include "NaniteSceneCommon.ush"
#define NANITE_TESSELLATION_PATCH_REFS 1
#define NANITE_TESSELLATION_NORMALIZE 0
#if NANITE_TESSELLATION
static const uint BarycentricMax = (1 << 15); // Must be pow2
uint EncodeBarycentrics( float3 Barycentrics )
{
Barycentrics = round( Barycentrics * BarycentricMax );
// Normalize after rounding
FLATTEN
if( Barycentrics.x > max( Barycentrics.y, Barycentrics.z ) )
Barycentrics.x = BarycentricMax - Barycentrics.y - Barycentrics.z;
else if( Barycentrics.y > Barycentrics.z )
Barycentrics.y = BarycentricMax - Barycentrics.x - Barycentrics.z;
else
Barycentrics.z = BarycentricMax - Barycentrics.x - Barycentrics.y;
uint Encoded;
Encoded = uint( Barycentrics.x );
Encoded |= uint( Barycentrics.y ) << 16;
return Encoded;
}
float3 DecodeBarycentrics( uint Encoded )
{
float3 Barycentrics;
Barycentrics.x = float(Encoded & 0xffff) / BarycentricMax;
Barycentrics.y = float(Encoded >> 16) / BarycentricMax;
Barycentrics.z = 1.0f - Barycentrics.x - Barycentrics.y;
return Barycentrics;
}
ByteAddressBuffer TessellationTable_Offsets;
ByteAddressBuffer TessellationTable_VertsAndIndexes;
float CalcDisplacementLowTessDistance(FPrimitiveSceneData PrimitiveData, FInstanceSceneData InstanceData, FNaniteView NaniteView)
{
const float LowTessSize = PrimitiveData.MaterialDisplacementFadeOutSize;
if (LowTessSize == 0.0f)
{
return 0.0f;
}
// Solve for the view depth at which max displacement equals the fade out size
const float MaxDisplacement = GetAbsMaxMaterialDisplacement(PrimitiveData);
return (MaxDisplacement * InstanceData.NonUniformScale.w * NaniteView.LODScale) / LowTessSize;
}
float3 GetTessFactors( FNaniteView NaniteView, float3 PointView[3], float LowTessDistance )
{
#if 0
float2 PointPixels[3];
for( uint i = 0; i < 3; i++ )
{
float4 PointClip = mul( float4( PointView[i], 1 ), NaniteView.ViewToClip );
PointPixels[i] = PointClip.xy / PointClip.w * float2(0.5, -0.5) * NaniteView.ViewSizeAndInvSize.xy;
}
float3 TessFactors =
{
length( PointPixels[0] - PointPixels[1] ),
length( PointPixels[1] - PointPixels[2] ),
length( PointPixels[2] - PointPixels[0] )
};
TessFactors *= InvDiceRate;
#else
const bool bOrtho = NaniteView.ViewToClip[3][3] >= 1;
bool3 bDistant = false;
float3 TessFactors;
for( uint i = 0; i < 3; i++ )
{
uint i0 = i;
uint i1 = (i + 1) % 3;
float EdgeScale = min( PointView[ i0 ].z, PointView[ i1 ].z );
float EdgeLength = length( PointView[ i0 ] - PointView[ i1 ] );
EdgeScale = bOrtho ? 1.0f : max( EdgeScale, NaniteView.NearPlane );
TessFactors[i] = EdgeLength / EdgeScale;
bDistant[i] = !bOrtho && LowTessDistance > 0.0f && EdgeScale > LowTessDistance;
}
const float LowTessMultiple = 0.25f; // hard-coded for now
TessFactors *= NaniteView.LODScale * InvDiceRate * select(bDistant, LowTessMultiple, 1.0f);
#endif
return TessFactors;
}
float3 GetSplitFactors( float3 TessFactors )
{
#if 1
return TessFactors / NANITE_TESSELLATION_TABLE_SIZE;
// TODO: Fix other modes to work with NANITE_TESSELLATION_TABLE_SIZE
#elif 1
const float SizeLog2 = log2( TessellationTable_Size );
float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z );
float ChildLevels = ceil( log2( MaxTessFactor ) / SizeLog2 ) - 1;
float Scale = saturate( exp2( -ChildLevels * SizeLog2 ) );
return TessFactors * Scale;
#else
TessFactors /= TessellationTable_Size;
float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z );
if( MaxTessFactor > TessellationTable_Size )
TessFactors *= TessellationTable_Size / MaxTessFactor;
return TessFactors;
#endif
}
float2 CalculateUVDerivativeForDomainPoint(
FNaniteView NaniteView,
float ViewZ,
float UniformScale,
float UVDensity )
{
const float dUV_dTri = rcp(GetProjectedEdgeLengthAtDepth(UVDensity * UniformScale, ViewZ, NaniteView) * InvDiceRate);
return dUV_dTri.xx;
}
struct FTessellatedPatch
{
uint TableOffset;
uint Pattern_NumVerts_NumTris;
//uint Swizzle;
void Init( float3 TessFactors, inout uint3 VertData, bool bImmediateTable )
{
TessFactors = ceil( TessFactors );
TessFactors = clamp( TessFactors, 1, NANITE_TESSELLATION_TABLE_SIZE );
float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z );
//Swizzle = 0b011010;
// TessFactors in descending order to reduce size of table.
// Rotate patch so TessFactors.x == MaxTessFactor
if( TessFactors.y == MaxTessFactor )
{
VertData = VertData.yzx;
TessFactors = TessFactors.yzx;
//Swizzle = 0b110100;
}
else if( TessFactors.z == MaxTessFactor )
{
VertData = VertData.zxy;
TessFactors = TessFactors.zxy;
//Swizzle = 0b101001;
}
// Sorting can flip winding which we need to undo later.
bool bFlipWinding = false;
if( TessFactors.y < TessFactors.z )
{
VertData.xy = VertData.yx;
TessFactors.yz = TessFactors.zy;
bFlipWinding = true;
//Swizzle ^= 0b000111;
}
uint Pattern = uint(
TessFactors.x +
TessFactors.y * NANITE_TESSELLATION_TABLE_PO2_SIZE +
TessFactors.z * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE -
(1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) );
Pattern |= bFlipWinding ? 0x1000 : 0;
Init( Pattern, bImmediateTable );
}
void Init( uint InPattern, bool bImmediateTable)
{
const uint Offset = bImmediateTable ? (NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) : 0u;
const uint2 Tmp = TessellationTable_Offsets.Load2( 4 * 2 * ( Offset + ( InPattern & 0xfff ) ) );
TableOffset = Tmp.x;
Pattern_NumVerts_NumTris = InPattern | Tmp.y;
}
uint GetPattern() { return Pattern_NumVerts_NumTris & 0x1FFFu; }
uint GetNumVerts() { return ( Pattern_NumVerts_NumTris >> 13 ) & 0x1FFu; }
uint GetNumTris() { return Pattern_NumVerts_NumTris >> 22; }
float3 GetVert( uint VertIndex )
{
uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndex ) );
return DecodeBarycentrics( BarycentricsEncoded );
}
uint3 GetIndexes( uint TriIndex )
{
uint PackedIndexes = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + GetNumVerts() + TriIndex ) );
uint3 VertIndexes;
VertIndexes[0] = ( PackedIndexes >> 0 ) & 1023;
VertIndexes[1] = ( PackedIndexes >> 10 ) & 1023;
VertIndexes[2] = ( PackedIndexes >> 20 ) & 1023;
// Sorting TessFactors might have flipped the patch winding.
//bool bFlipWinding = Pattern > 0xfff;
bool bFlipWinding = Pattern_NumVerts_NumTris & 0x1000;
if( bFlipWinding )
VertIndexes.yz = VertIndexes.zy;
return VertIndexes;
}
uint3 GetTriangleEncoded( uint TriIndex )
{
uint3 VertIndexes = GetIndexes( TriIndex );
uint Pattern = GetPattern();
uint3 Shift, Mask;
for( int i = 0; i < 3; i++ )
{
Shift[i] = BitFieldExtractU32( Pattern, 1, i + 25 ) << 4;
Mask[i] = BitFieldExtractU32( Pattern, 1, i + 28 ) << 4;
Mask[i] = BitFieldMaskU32( Mask[i], 0 );
}
uint3 Encoded;
for( int i = 0; i < 3; i++ )
{
uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndexes[i] ) );
uint3 Barycentrics;
Barycentrics.x = BarycentricsEncoded & 0xffff;
Barycentrics.y = BarycentricsEncoded >> 16;
Barycentrics.z = BarycentricMax - Barycentrics.x - Barycentrics.y;
Barycentrics = ( Barycentrics & Mask ) << Shift;
Encoded[i] = Barycentrics.x | Barycentrics.y | Barycentrics.z;
}
return Encoded;
}
uint3 GetTessFactors()
{
uint Packed = GetPattern() & 0xfff;
Packed -= (1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE);
uint3 TessFactors;
for( int i = 0; i < 3; i++ )
{
TessFactors[i] = Packed % NANITE_TESSELLATION_TABLE_PO2_SIZE;
Packed /= NANITE_TESSELLATION_TABLE_PO2_SIZE;
}
return TessFactors;
}
};
HLSL_STATIC_ASSERT(sizeof(FTessellatedPatch) == 8, "Unexpected size of FTessellatedPatch. Update WaveReadLaneAt to reflect changes.");
FTessellatedPatch WaveReadLaneAt( FTessellatedPatch In, uint SrcIndex )
{
FTessellatedPatch Out;
Out.TableOffset = WaveReadLaneAt( In.TableOffset, SrcIndex );
Out.Pattern_NumVerts_NumTris = WaveReadLaneAt( In.Pattern_NumVerts_NumTris, SrcIndex );
return Out;
}
struct FSplitPatch
{
uint VisibleClusterIndex;
uint TriIndex;
float3 Barycentrics[3];
void Decode( uint4 Encoded )
{
VisibleClusterIndex = Encoded.x >> 7;
TriIndex = Encoded.x & 0x7F;
for( int i = 0; i < 3; i++ )
Barycentrics[i] = DecodeBarycentrics( Encoded[ i + 1 ] );
}
float3 TransformBarycentrics( float3 Local )
{
return INVARIANT(
Barycentrics[0] * Local.x +
Barycentrics[1] * Local.y +
Barycentrics[2] * Local.z );
}
FBarycentrics TransformBarycentrics( FBarycentrics Local )
{
FBarycentrics Global;
Global.Value = TransformBarycentrics( Local.Value );
Global.Value_dx = TransformBarycentrics( Local.Value_dx );
Global.Value_dy = TransformBarycentrics( Local.Value_dy );
return Global;
}
};
HLSL_STATIC_ASSERT(sizeof(FSplitPatch) == 44, "Unexpected size of FSplitPatch. Update WaveReadLaneAt to reflect changes.");
FSplitPatch WaveReadLaneAt(FSplitPatch In, uint SrcIndex)
{
FSplitPatch Result;
Result.VisibleClusterIndex = WaveReadLaneAt(In.VisibleClusterIndex, SrcIndex);
Result.TriIndex = WaveReadLaneAt(In.TriIndex, SrcIndex);
Result.Barycentrics[0] = WaveReadLaneAt(In.Barycentrics[0], SrcIndex);
Result.Barycentrics[1] = WaveReadLaneAt(In.Barycentrics[1], SrcIndex);
Result.Barycentrics[2] = WaveReadLaneAt(In.Barycentrics[2], SrcIndex);
return Result;
}
#define THREADGROUP_SIZE 32
#include "../WorkDistribution.ush"
#if SPLIT_WORK_QUEUE // TODO: Remove once shader rewriter has been fixed (UE-202409)
RWByteAddressBuffer SplitWorkQueue_DataBuffer;
RWStructuredBuffer< FWorkQueueState > SplitWorkQueue_StateBuffer;
// TODO: Use function to return a non-const variable before setting it in the queue to work around issue in bindless, remove when fixed in DXC
RWByteAddressBuffer GetSplitWorkQueue_DataBuffer() { return SplitWorkQueue_DataBuffer; }
RWStructuredBuffer< FWorkQueueState > GetSplitWorkQueue_StateBuffer() { return SplitWorkQueue_StateBuffer; }
static const FOutputQueue SplitWorkQueue = { GetSplitWorkQueue_DataBuffer(), GetSplitWorkQueue_StateBuffer(), 0, NaniteRaster.MaxCandidatePatches };
#endif
RWByteAddressBuffer OccludedPatches_DataBuffer;
RWStructuredBuffer< FWorkQueueState > OccludedPatches_StateBuffer;
#if OCCLUDED_PATCHES_QUEUE // TODO: Remove once shader rewriter has been fixed (UE-202409)
// TODO: Use function to return a non-const variable before setting it in the queue to work around issue in bindless, remove when fixed in DXC
RWByteAddressBuffer GetOccludedPatches_DataBuffer() { return OccludedPatches_DataBuffer; }
RWStructuredBuffer< FWorkQueueState > GetOccludedPatches_StateBuffer() { return OccludedPatches_StateBuffer; }
static const FOutputQueue OccludedPatches = { GetOccludedPatches_DataBuffer(), GetOccludedPatches_StateBuffer(), 0, NaniteRaster.MaxCandidatePatches };
#endif
ByteAddressBuffer VisiblePatches;
Buffer< uint > VisiblePatchesArgs;
uint VisiblePatchesSize;
RWByteAddressBuffer RWVisiblePatches;
RWBuffer< uint > RWVisiblePatchesArgs;
#endif