// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "../Common.ush" #include "NaniteSceneCommon.ush" #define NANITE_TESSELLATION_PATCH_REFS 1 #define NANITE_TESSELLATION_NORMALIZE 0 #if NANITE_TESSELLATION static const uint BarycentricMax = (1 << 15); // Must be pow2 uint EncodeBarycentrics( float3 Barycentrics ) { Barycentrics = round( Barycentrics * BarycentricMax ); // Normalize after rounding FLATTEN if( Barycentrics.x > max( Barycentrics.y, Barycentrics.z ) ) Barycentrics.x = BarycentricMax - Barycentrics.y - Barycentrics.z; else if( Barycentrics.y > Barycentrics.z ) Barycentrics.y = BarycentricMax - Barycentrics.x - Barycentrics.z; else Barycentrics.z = BarycentricMax - Barycentrics.x - Barycentrics.y; uint Encoded; Encoded = uint( Barycentrics.x ); Encoded |= uint( Barycentrics.y ) << 16; return Encoded; } float3 DecodeBarycentrics( uint Encoded ) { float3 Barycentrics; Barycentrics.x = float(Encoded & 0xffff) / BarycentricMax; Barycentrics.y = float(Encoded >> 16) / BarycentricMax; Barycentrics.z = 1.0f - Barycentrics.x - Barycentrics.y; return Barycentrics; } ByteAddressBuffer TessellationTable_Offsets; ByteAddressBuffer TessellationTable_VertsAndIndexes; float CalcDisplacementLowTessDistance(FPrimitiveSceneData PrimitiveData, FInstanceSceneData InstanceData, FNaniteView NaniteView) { const float LowTessSize = PrimitiveData.MaterialDisplacementFadeOutSize; if (LowTessSize == 0.0f) { return 0.0f; } // Solve for the view depth at which max displacement equals the fade out size const float MaxDisplacement = GetAbsMaxMaterialDisplacement(PrimitiveData); return (MaxDisplacement * InstanceData.NonUniformScale.w * NaniteView.LODScale) / LowTessSize; } float3 GetTessFactors( FNaniteView NaniteView, float3 PointView[3], float LowTessDistance ) { #if 0 float2 PointPixels[3]; for( uint i = 0; i < 3; i++ ) { float4 PointClip = mul( float4( PointView[i], 1 ), NaniteView.ViewToClip ); PointPixels[i] = PointClip.xy / PointClip.w * float2(0.5, -0.5) * NaniteView.ViewSizeAndInvSize.xy; } float3 TessFactors = { length( PointPixels[0] - PointPixels[1] ), length( PointPixels[1] - PointPixels[2] ), length( PointPixels[2] - PointPixels[0] ) }; TessFactors *= InvDiceRate; #else const bool bOrtho = NaniteView.ViewToClip[3][3] >= 1; bool3 bDistant = false; float3 TessFactors; for( uint i = 0; i < 3; i++ ) { uint i0 = i; uint i1 = (i + 1) % 3; float EdgeScale = min( PointView[ i0 ].z, PointView[ i1 ].z ); float EdgeLength = length( PointView[ i0 ] - PointView[ i1 ] ); EdgeScale = bOrtho ? 1.0f : max( EdgeScale, NaniteView.NearPlane ); TessFactors[i] = EdgeLength / EdgeScale; bDistant[i] = !bOrtho && LowTessDistance > 0.0f && EdgeScale > LowTessDistance; } const float LowTessMultiple = 0.25f; // hard-coded for now TessFactors *= NaniteView.LODScale * InvDiceRate * select(bDistant, LowTessMultiple, 1.0f); #endif return TessFactors; } float3 GetSplitFactors( float3 TessFactors ) { #if 1 return TessFactors / NANITE_TESSELLATION_TABLE_SIZE; // TODO: Fix other modes to work with NANITE_TESSELLATION_TABLE_SIZE #elif 1 const float SizeLog2 = log2( TessellationTable_Size ); float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z ); float ChildLevels = ceil( log2( MaxTessFactor ) / SizeLog2 ) - 1; float Scale = saturate( exp2( -ChildLevels * SizeLog2 ) ); return TessFactors * Scale; #else TessFactors /= TessellationTable_Size; float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z ); if( MaxTessFactor > TessellationTable_Size ) TessFactors *= TessellationTable_Size / MaxTessFactor; return TessFactors; #endif } float2 CalculateUVDerivativeForDomainPoint( FNaniteView NaniteView, float ViewZ, float UniformScale, float UVDensity ) { const float dUV_dTri = rcp(GetProjectedEdgeLengthAtDepth(UVDensity * UniformScale, ViewZ, NaniteView) * InvDiceRate); return dUV_dTri.xx; } struct FTessellatedPatch { uint TableOffset; uint Pattern_NumVerts_NumTris; //uint Swizzle; void Init( float3 TessFactors, inout uint3 VertData, bool bImmediateTable ) { TessFactors = ceil( TessFactors ); TessFactors = clamp( TessFactors, 1, NANITE_TESSELLATION_TABLE_SIZE ); float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z ); //Swizzle = 0b011010; // TessFactors in descending order to reduce size of table. // Rotate patch so TessFactors.x == MaxTessFactor if( TessFactors.y == MaxTessFactor ) { VertData = VertData.yzx; TessFactors = TessFactors.yzx; //Swizzle = 0b110100; } else if( TessFactors.z == MaxTessFactor ) { VertData = VertData.zxy; TessFactors = TessFactors.zxy; //Swizzle = 0b101001; } // Sorting can flip winding which we need to undo later. bool bFlipWinding = false; if( TessFactors.y < TessFactors.z ) { VertData.xy = VertData.yx; TessFactors.yz = TessFactors.zy; bFlipWinding = true; //Swizzle ^= 0b000111; } uint Pattern = uint( TessFactors.x + TessFactors.y * NANITE_TESSELLATION_TABLE_PO2_SIZE + TessFactors.z * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE - (1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) ); Pattern |= bFlipWinding ? 0x1000 : 0; Init( Pattern, bImmediateTable ); } void Init( uint InPattern, bool bImmediateTable) { const uint Offset = bImmediateTable ? (NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) : 0u; const uint2 Tmp = TessellationTable_Offsets.Load2( 4 * 2 * ( Offset + ( InPattern & 0xfff ) ) ); TableOffset = Tmp.x; Pattern_NumVerts_NumTris = InPattern | Tmp.y; } uint GetPattern() { return Pattern_NumVerts_NumTris & 0x1FFFu; } uint GetNumVerts() { return ( Pattern_NumVerts_NumTris >> 13 ) & 0x1FFu; } uint GetNumTris() { return Pattern_NumVerts_NumTris >> 22; } float3 GetVert( uint VertIndex ) { uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndex ) ); return DecodeBarycentrics( BarycentricsEncoded ); } uint3 GetIndexes( uint TriIndex ) { uint PackedIndexes = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + GetNumVerts() + TriIndex ) ); uint3 VertIndexes; VertIndexes[0] = ( PackedIndexes >> 0 ) & 1023; VertIndexes[1] = ( PackedIndexes >> 10 ) & 1023; VertIndexes[2] = ( PackedIndexes >> 20 ) & 1023; // Sorting TessFactors might have flipped the patch winding. //bool bFlipWinding = Pattern > 0xfff; bool bFlipWinding = Pattern_NumVerts_NumTris & 0x1000; if( bFlipWinding ) VertIndexes.yz = VertIndexes.zy; return VertIndexes; } uint3 GetTriangleEncoded( uint TriIndex ) { uint3 VertIndexes = GetIndexes( TriIndex ); uint Pattern = GetPattern(); uint3 Shift, Mask; for( int i = 0; i < 3; i++ ) { Shift[i] = BitFieldExtractU32( Pattern, 1, i + 25 ) << 4; Mask[i] = BitFieldExtractU32( Pattern, 1, i + 28 ) << 4; Mask[i] = BitFieldMaskU32( Mask[i], 0 ); } uint3 Encoded; for( int i = 0; i < 3; i++ ) { uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndexes[i] ) ); uint3 Barycentrics; Barycentrics.x = BarycentricsEncoded & 0xffff; Barycentrics.y = BarycentricsEncoded >> 16; Barycentrics.z = BarycentricMax - Barycentrics.x - Barycentrics.y; Barycentrics = ( Barycentrics & Mask ) << Shift; Encoded[i] = Barycentrics.x | Barycentrics.y | Barycentrics.z; } return Encoded; } uint3 GetTessFactors() { uint Packed = GetPattern() & 0xfff; Packed -= (1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE); uint3 TessFactors; for( int i = 0; i < 3; i++ ) { TessFactors[i] = Packed % NANITE_TESSELLATION_TABLE_PO2_SIZE; Packed /= NANITE_TESSELLATION_TABLE_PO2_SIZE; } return TessFactors; } }; HLSL_STATIC_ASSERT(sizeof(FTessellatedPatch) == 8, "Unexpected size of FTessellatedPatch. Update WaveReadLaneAt to reflect changes."); FTessellatedPatch WaveReadLaneAt( FTessellatedPatch In, uint SrcIndex ) { FTessellatedPatch Out; Out.TableOffset = WaveReadLaneAt( In.TableOffset, SrcIndex ); Out.Pattern_NumVerts_NumTris = WaveReadLaneAt( In.Pattern_NumVerts_NumTris, SrcIndex ); return Out; } struct FSplitPatch { uint VisibleClusterIndex; uint TriIndex; float3 Barycentrics[3]; void Decode( uint4 Encoded ) { VisibleClusterIndex = Encoded.x >> 7; TriIndex = Encoded.x & 0x7F; for( int i = 0; i < 3; i++ ) Barycentrics[i] = DecodeBarycentrics( Encoded[ i + 1 ] ); } float3 TransformBarycentrics( float3 Local ) { return INVARIANT( Barycentrics[0] * Local.x + Barycentrics[1] * Local.y + Barycentrics[2] * Local.z ); } FBarycentrics TransformBarycentrics( FBarycentrics Local ) { FBarycentrics Global; Global.Value = TransformBarycentrics( Local.Value ); Global.Value_dx = TransformBarycentrics( Local.Value_dx ); Global.Value_dy = TransformBarycentrics( Local.Value_dy ); return Global; } }; HLSL_STATIC_ASSERT(sizeof(FSplitPatch) == 44, "Unexpected size of FSplitPatch. Update WaveReadLaneAt to reflect changes."); FSplitPatch WaveReadLaneAt(FSplitPatch In, uint SrcIndex) { FSplitPatch Result; Result.VisibleClusterIndex = WaveReadLaneAt(In.VisibleClusterIndex, SrcIndex); Result.TriIndex = WaveReadLaneAt(In.TriIndex, SrcIndex); Result.Barycentrics[0] = WaveReadLaneAt(In.Barycentrics[0], SrcIndex); Result.Barycentrics[1] = WaveReadLaneAt(In.Barycentrics[1], SrcIndex); Result.Barycentrics[2] = WaveReadLaneAt(In.Barycentrics[2], SrcIndex); return Result; } #define THREADGROUP_SIZE 32 #include "../WorkDistribution.ush" #if SPLIT_WORK_QUEUE // TODO: Remove once shader rewriter has been fixed (UE-202409) RWByteAddressBuffer SplitWorkQueue_DataBuffer; RWStructuredBuffer< FWorkQueueState > SplitWorkQueue_StateBuffer; // TODO: Use function to return a non-const variable before setting it in the queue to work around issue in bindless, remove when fixed in DXC RWByteAddressBuffer GetSplitWorkQueue_DataBuffer() { return SplitWorkQueue_DataBuffer; } RWStructuredBuffer< FWorkQueueState > GetSplitWorkQueue_StateBuffer() { return SplitWorkQueue_StateBuffer; } static const FOutputQueue SplitWorkQueue = { GetSplitWorkQueue_DataBuffer(), GetSplitWorkQueue_StateBuffer(), 0, NaniteRaster.MaxCandidatePatches }; #endif RWByteAddressBuffer OccludedPatches_DataBuffer; RWStructuredBuffer< FWorkQueueState > OccludedPatches_StateBuffer; #if OCCLUDED_PATCHES_QUEUE // TODO: Remove once shader rewriter has been fixed (UE-202409) // TODO: Use function to return a non-const variable before setting it in the queue to work around issue in bindless, remove when fixed in DXC RWByteAddressBuffer GetOccludedPatches_DataBuffer() { return OccludedPatches_DataBuffer; } RWStructuredBuffer< FWorkQueueState > GetOccludedPatches_StateBuffer() { return OccludedPatches_StateBuffer; } static const FOutputQueue OccludedPatches = { GetOccludedPatches_DataBuffer(), GetOccludedPatches_StateBuffer(), 0, NaniteRaster.MaxCandidatePatches }; #endif ByteAddressBuffer VisiblePatches; Buffer< uint > VisiblePatchesArgs; uint VisiblePatchesSize; RWByteAddressBuffer RWVisiblePatches; RWBuffer< uint > RWVisiblePatchesArgs; #endif