1227 lines
45 KiB
HLSL
1227 lines
45 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "../Common.ush"
|
|
#include "../BitPacking.ush"
|
|
#include "../SceneData.ush"
|
|
#include "../WaveOpUtil.ush"
|
|
#include "../BoneTransform.ush"
|
|
#include "/Engine/Shared/NaniteDefinitions.h"
|
|
#include "/Engine/Shared/SkinningDefinitions.h"
|
|
#include "/Engine/Shared/HLSLStaticAssert.h"
|
|
#include "NanitePackedNaniteView.ush"
|
|
|
|
#ifndef DEBUG_FLAGS
|
|
#define DEBUG_FLAGS 0
|
|
#endif
|
|
|
|
uint GetHWClusterCounterIndex(uint InRenderFlags)
|
|
{
|
|
// Ensure rasterizer uses compile time constants.
|
|
#ifdef NANITE_HW_COUNTER_INDEX
|
|
return NANITE_HW_COUNTER_INDEX;
|
|
#else
|
|
// Other passes use a uniform branch to minimize permutations.
|
|
return CondMask(InRenderFlags & (NANITE_RENDER_FLAG_MESH_SHADER | NANITE_RENDER_FLAG_PRIMITIVE_SHADER), 4u, 5u);
|
|
#endif
|
|
}
|
|
|
|
struct FVisibleCluster
|
|
{
|
|
uint Flags;
|
|
uint ViewId;
|
|
uint InstanceId;
|
|
uint PageIndex;
|
|
uint ClusterIndex;
|
|
uint AssemblyTransformIndex;
|
|
uint DepthBucket;
|
|
uint2 vPage;
|
|
uint2 vPageEnd; // Last page to render (inclusive). Only used during SW rasterization currently
|
|
};
|
|
|
|
struct FPageHeader
|
|
{
|
|
uint NumClusters;
|
|
uint MaxClusterBoneInfluences;
|
|
uint MaxVoxelBoneInfluences;
|
|
};
|
|
|
|
struct FCluster
|
|
{
|
|
uint PageBaseAddress;
|
|
|
|
uint NumVerts;
|
|
uint PositionOffset;
|
|
|
|
uint NumTris;
|
|
uint IndexOffset;
|
|
|
|
int3 PosStart;
|
|
uint BitsPerIndex;
|
|
int PosPrecision;
|
|
uint3 PosBits;
|
|
uint NormalPrecision;
|
|
uint TangentPrecision;
|
|
float PosScale;
|
|
float PosRcpScale;
|
|
|
|
float4 LODBounds;
|
|
|
|
float3 BoxBoundsCenter;
|
|
float LODError;
|
|
float EdgeLength;
|
|
|
|
float3 BoxBoundsExtent;
|
|
uint Flags;
|
|
|
|
uint AttributeOffset;
|
|
uint BitsPerAttribute;
|
|
uint DecodeInfoOffset;
|
|
bool bHasTangents;
|
|
bool bSkinning;
|
|
bool bVoxel;
|
|
uint NumUVs;
|
|
uint ColorMode;
|
|
uint UVBitOffsets;
|
|
|
|
uint ColorMin;
|
|
uint ColorBits;
|
|
uint GroupIndex; // Debug only
|
|
|
|
uint NumClusterBoneInfluences;
|
|
uint ClusterBoneInfluenceAddress;
|
|
uint ClusterBoneInfluenceStride;
|
|
|
|
// Material Slow path
|
|
uint MaterialTableOffset;
|
|
uint MaterialTableLength;
|
|
|
|
uint VertReuseBatchCountTableOffset; // dword offset from page base
|
|
uint VertReuseBatchCountTableSize; // number of entries, each 4-bit
|
|
|
|
// Material Fast path
|
|
uint Material0Length;
|
|
uint Material0Index;
|
|
uint Material1Length;
|
|
uint Material1Index;
|
|
uint Material2Index;
|
|
uint MaterialTotalLength;
|
|
|
|
uint4 VertReuseBatchInfo;
|
|
|
|
uint ExtendedDataOffset;
|
|
uint ExtendedDataNum;
|
|
|
|
uint BrickDataOffset;
|
|
uint BrickDataNum;
|
|
};
|
|
|
|
struct FBrick
|
|
{
|
|
uint2 ReverseBrickBits;
|
|
int3 StartPos;
|
|
uint3 BrickMax;
|
|
uint VertOffset;
|
|
};
|
|
|
|
struct FClusterBoneInfluence
|
|
{
|
|
uint BoneIndex;
|
|
#if NANITE_USE_PRECISE_SKINNING_BOUNDS
|
|
float MinWeight;
|
|
float MaxWeight;
|
|
float3 BoundMin;
|
|
float3 BoundMax;
|
|
#endif
|
|
};
|
|
|
|
struct FVoxelBoneInfluence
|
|
{
|
|
uint BoneIndex;
|
|
float Weight;
|
|
};
|
|
|
|
struct FHierarchyNodeSlice
|
|
{
|
|
float4 LODBounds;
|
|
float3 BoxBoundsCenter;
|
|
float3 BoxBoundsExtent;
|
|
float MinLODError;
|
|
float MaxParentLODError;
|
|
uint ChildStartReference; // Can be node (index) or cluster (page:cluster)
|
|
uint NumChildren;
|
|
uint StartPageIndex;
|
|
uint NumPages;
|
|
uint AssemblyTransformIndex;
|
|
bool bEnabled;
|
|
bool bLoaded;
|
|
bool bLeaf;
|
|
};
|
|
|
|
struct FInstanceDynamicData
|
|
{
|
|
float4x4 LocalToTranslatedWorld;
|
|
float4x4 PrevLocalToTranslatedWorld;
|
|
bool bHasMoved;
|
|
};
|
|
|
|
struct FNaniteView
|
|
{
|
|
float4x4 SVPositionToTranslatedWorld;
|
|
float4x4 ViewToTranslatedWorld;
|
|
|
|
float4x4 TranslatedWorldToView;
|
|
float4x4 TranslatedWorldToClip;
|
|
float4x4 ViewToClip;
|
|
FDFMatrix ClipToWorld;
|
|
|
|
float4x4 PrevTranslatedWorldToView;
|
|
float4x4 PrevTranslatedWorldToClip;
|
|
float4x4 PrevViewToClip;
|
|
FDFMatrix PrevClipToWorld;
|
|
|
|
float3x3 FirstPersonTransform;
|
|
|
|
float4 TranslatedGlobalClipPlane;
|
|
|
|
int4 ViewRect;
|
|
float4 ViewSizeAndInvSize;
|
|
float4 ClipSpaceScaleOffset;
|
|
float4 MaterialCacheUnwrapMinAndInvSize;
|
|
float4 MaterialCachePageAdvanceAndInvCount;
|
|
FDFVector3 PreViewTranslation;
|
|
FDFVector3 PrevPreViewTranslation;
|
|
FDFVector3 WorldCameraOrigin;
|
|
float3 CullingViewOriginTranslatedWorld;
|
|
float3 ViewForward;
|
|
float3 ViewOriginHigh;
|
|
float NearPlane;
|
|
float LODScale;
|
|
float LODScaleHW;
|
|
float CullingViewMinRadiusTestFactorSq;
|
|
uint StreamingPriorityCategory;
|
|
uint Flags;
|
|
int TargetLayerIndex;
|
|
int TargetMipLevel;
|
|
int TargetNumMipLevels;
|
|
int TargetPrevLayerIndex;
|
|
float RangeBasedCullingDistance;
|
|
int4 HZBTestViewRect;
|
|
float CullingViewScreenMultipleSq;
|
|
uint InstanceOcclusionQueryMask;
|
|
bool bUseLightingChannelMask;
|
|
uint LightingChannelMask;
|
|
int SceneRendererPrimaryViewId;
|
|
float2 DynamicDepthCullRange;
|
|
};
|
|
|
|
struct FInstanceDraw
|
|
{
|
|
uint InstanceId;
|
|
uint ViewId;
|
|
};
|
|
|
|
struct FNaniteFullscreenVSToPS
|
|
{
|
|
#if INSTANCED_STEREO
|
|
nointerpolation uint EyeIndex : PACKED_EYE_INDEX;
|
|
#endif
|
|
nointerpolation uint ViewIndex : PACKED_VIEW_INDEX;
|
|
nointerpolation uint TileIndex : MACRO_TILE_INDEX;
|
|
};
|
|
|
|
#if NANITE_USE_RAYTRACING_UNIFORM_BUFFER
|
|
#define PageConstants NaniteRayTracing.PageConstants
|
|
#define MaxNodes NaniteRayTracing.MaxNodes
|
|
#define ClusterPageData NaniteRayTracing.ClusterPageData
|
|
#define HierarchyBuffer NaniteRayTracing.HierarchyBuffer
|
|
#define RayTracingDataBuffer NaniteRayTracing.RayTracingDataBuffer
|
|
|
|
// These parameters shouldn't be used in RT shaders
|
|
#define RenderFlags 0
|
|
//uint MaxVisibleClusters;
|
|
//uint DebugFlags;
|
|
//ByteAddressBuffer VisibleClustersSWHW;
|
|
#else
|
|
|
|
#if NANITE_USE_RASTER_UNIFORM_BUFFER
|
|
#define PageConstants NaniteRaster.PageConstants
|
|
#define MaxNodes NaniteRaster.MaxNodes
|
|
#define MaxVisibleClusters NaniteRaster.MaxVisibleClusters
|
|
#define MaxPatchesPerGroup NaniteRaster.MaxPatchesPerGroup
|
|
#define MeshPass NaniteRaster.MeshPass
|
|
#define InvDiceRate NaniteRaster.InvDiceRate
|
|
#define RenderFlags NaniteRaster.RenderFlags
|
|
#define DebugFlags NaniteRaster.DebugFlags
|
|
#else
|
|
uint4 PageConstants;
|
|
uint MaxNodes;
|
|
uint MaxVisibleClusters;
|
|
uint MaxPatchesPerGroup;
|
|
uint MeshPass;
|
|
float InvDiceRate;
|
|
uint RenderFlags;
|
|
uint DebugFlags;
|
|
#endif
|
|
|
|
#if NANITE_USE_SHADING_UNIFORM_BUFFER
|
|
#define ClusterPageData NaniteShading.ClusterPageData
|
|
#define VisibleClustersSWHW NaniteShading.VisibleClustersSWHW
|
|
#define HierarchyBuffer NaniteShading.HierarchyBuffer
|
|
#else // !NANITE_USE_SHADING_UNIFORM_BUFFER
|
|
ByteAddressBuffer ClusterPageData;
|
|
ByteAddressBuffer VisibleClustersSWHW;
|
|
ByteAddressBuffer HierarchyBuffer;
|
|
StructuredBuffer<uint> RayTracingDataBuffer;
|
|
#endif // !NANITE_USE_SHADING_UNIFORM_BUFFER
|
|
|
|
#endif
|
|
|
|
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS // TODO: This header can be included from SM5 from BuildInstanceDrawCommands.usf. Refactor it.
|
|
HLSL_STATIC_ASSERT(sizeof(FInstanceDynamicData) == 132, "Unexpected size of FInstanceDynamicData. Update WaveReadLaneAt to reflect changes.");
|
|
FInstanceDynamicData WaveReadLaneAt(FInstanceDynamicData In, uint SrcIndex)
|
|
{
|
|
FInstanceDynamicData Result;
|
|
|
|
Result.LocalToTranslatedWorld = WaveReadLaneAtMatrix(In.LocalToTranslatedWorld, SrcIndex);
|
|
Result.PrevLocalToTranslatedWorld = WaveReadLaneAtMatrix(In.PrevLocalToTranslatedWorld, SrcIndex);
|
|
Result.bHasMoved = WaveReadLaneAt(In.bHasMoved, SrcIndex);
|
|
|
|
return Result;
|
|
}
|
|
#endif
|
|
|
|
float ClipZFromLinearZ(FNaniteView NaniteView, float LinearZ)
|
|
{
|
|
return LinearZ * NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2]; // TODO: Pack coefficients into single load?
|
|
}
|
|
|
|
// Packs a (PageIndex, ClusterIndex) pair into a flat index based on max clusters per page.
|
|
uint PackPoolClusterRef(uint PageIndex, uint ClusterIndex)
|
|
{
|
|
const uint MaxStreamingPages = PageConstants.y;
|
|
return (min(PageIndex, MaxStreamingPages) << NANITE_STREAMING_PAGE_MAX_CLUSTERS_BITS) +
|
|
((uint)max((int)PageIndex - (int)MaxStreamingPages, 0) << NANITE_ROOT_PAGE_MAX_CLUSTERS_BITS) +
|
|
ClusterIndex;
|
|
}
|
|
|
|
void UnpackPoolClusterRef(uint PackedClusterRef, inout uint PageIndex, inout uint ClusterIndex)
|
|
{
|
|
const uint MaxStreamingPages = PageConstants.y;
|
|
const uint MaxStreamingClusters = MaxStreamingPages << NANITE_STREAMING_PAGE_MAX_CLUSTERS_BITS;
|
|
if (PackedClusterRef < MaxStreamingClusters)
|
|
{
|
|
PageIndex = PackedClusterRef >> NANITE_STREAMING_PAGE_MAX_CLUSTERS_BITS;
|
|
ClusterIndex = PackedClusterRef & ((1u << NANITE_STREAMING_PAGE_MAX_CLUSTERS_BITS) - 1u);
|
|
}
|
|
else
|
|
{
|
|
PackedClusterRef -= MaxStreamingClusters;
|
|
PageIndex = MaxStreamingPages + (PackedClusterRef >> NANITE_ROOT_PAGE_MAX_CLUSTERS_BITS);
|
|
ClusterIndex = PackedClusterRef & ((1u << NANITE_ROOT_PAGE_MAX_CLUSTERS_BITS) - 1u);
|
|
}
|
|
}
|
|
|
|
uint4 PackVisibleCluster(FVisibleCluster VisibleCluster, bool bHasPageData)
|
|
{
|
|
uint4 RawData = 0;
|
|
uint BitPos = 0;
|
|
|
|
const uint PackedClusterRef = PackPoolClusterRef(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
|
|
WriteBits(RawData, BitPos, VisibleCluster.Flags, NANITE_NUM_CULLING_FLAG_BITS);
|
|
WriteBits(RawData, BitPos, VisibleCluster.ViewId, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS);
|
|
WriteBits(RawData, BitPos, VisibleCluster.InstanceId, NANITE_MAX_INSTANCES_BITS);
|
|
WriteBits(RawData, BitPos, PackedClusterRef, NANITE_POOL_CLUSTER_REF_BITS);
|
|
#if NANITE_EXTENDED_VISIBLE_CLUSTERS
|
|
WriteBits(RawData, BitPos, VisibleCluster.AssemblyTransformIndex, NANITE_ASSEMBLY_TRANSFORM_INDEX_BITS);
|
|
WriteBits(RawData, BitPos, VisibleCluster.DepthBucket, NANITE_NUM_DEPTH_BUCKETS_PER_BLOCK_BITS); // This is not needed for candidate clusters.
|
|
// We could make a separate CandidateCluster struct if/when this makes a difference.
|
|
#endif
|
|
|
|
if (bHasPageData)
|
|
{
|
|
WriteBits(RawData, BitPos, VisibleCluster.vPage.x, 13);
|
|
WriteBits(RawData, BitPos, VisibleCluster.vPage.y, 13);
|
|
uint2 Delta = (VisibleCluster.vPageEnd - VisibleCluster.vPage) & 0x7;
|
|
WriteBits(RawData, BitPos, Delta.x, 3);
|
|
WriteBits(RawData, BitPos, Delta.y, 3);
|
|
}
|
|
return RawData;
|
|
}
|
|
|
|
FVisibleCluster UnpackVisibleCluster(uint4 RawData, bool bHasPageData = false)
|
|
{
|
|
uint BitPos = 0;
|
|
FVisibleCluster VisibleCluster;
|
|
VisibleCluster.Flags = ReadBits( RawData, BitPos, NANITE_NUM_CULLING_FLAG_BITS );
|
|
VisibleCluster.ViewId = ReadBits( RawData, BitPos, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS );
|
|
VisibleCluster.InstanceId = ReadBits( RawData, BitPos, NANITE_MAX_INSTANCES_BITS );
|
|
const uint PackedClusterRef = ReadBits( RawData, BitPos, NANITE_POOL_CLUSTER_REF_BITS );
|
|
#if NANITE_EXTENDED_VISIBLE_CLUSTERS
|
|
VisibleCluster.AssemblyTransformIndex = ReadBits( RawData, BitPos, NANITE_ASSEMBLY_TRANSFORM_INDEX_BITS );
|
|
VisibleCluster.DepthBucket = ReadBits( RawData, BitPos, NANITE_NUM_DEPTH_BUCKETS_PER_BLOCK_BITS );
|
|
#else
|
|
VisibleCluster.AssemblyTransformIndex = 0xFFFFFFFFu;
|
|
VisibleCluster.DepthBucket = 0;
|
|
#endif
|
|
|
|
UnpackPoolClusterRef( PackedClusterRef, VisibleCluster.PageIndex, VisibleCluster.ClusterIndex );
|
|
|
|
if( bHasPageData )
|
|
{
|
|
VisibleCluster.vPage.x = ReadBits( RawData, BitPos, 13 );
|
|
VisibleCluster.vPage.y = ReadBits( RawData, BitPos, 13 );
|
|
VisibleCluster.vPageEnd.x = ReadBits( RawData, BitPos, 3 );
|
|
VisibleCluster.vPageEnd.y = ReadBits( RawData, BitPos, 3 );
|
|
VisibleCluster.vPageEnd += VisibleCluster.vPage;
|
|
}
|
|
else
|
|
{
|
|
VisibleCluster.vPage = 0;
|
|
}
|
|
|
|
return VisibleCluster;
|
|
}
|
|
|
|
FVisibleCluster GetVisibleCluster( ByteAddressBuffer VisibleClusters, uint ClusterIdx, bool bHasPageData = false )
|
|
{
|
|
uint4 RawData;
|
|
#if NANITE_EXTENDED_VISIBLE_CLUSTERS
|
|
if( bHasPageData )
|
|
RawData = VisibleClusters.Load4( ClusterIdx * 16 );
|
|
else
|
|
RawData = uint4( VisibleClusters.Load3( ClusterIdx * 12 ), 0 );
|
|
#else
|
|
if( bHasPageData )
|
|
RawData = uint4( VisibleClusters.Load3( ClusterIdx * 12 ), 0 );
|
|
else
|
|
RawData = uint4( VisibleClusters.Load2( ClusterIdx * 8 ), 0, 0 );
|
|
#endif
|
|
|
|
return UnpackVisibleCluster(RawData, bHasPageData);
|
|
}
|
|
|
|
FVisibleCluster GetVisibleCluster( uint ClusterIdx, bool bHasPageData )
|
|
{
|
|
#if NANITE_USE_RAYTRACING_UNIFORM_BUFFER
|
|
return (FVisibleCluster)0;
|
|
#else
|
|
return GetVisibleCluster( VisibleClustersSWHW, ClusterIdx, bHasPageData );
|
|
#endif
|
|
}
|
|
|
|
bool IsVisibleClusterIndexImposter(uint ClusterIndex)
|
|
{
|
|
#if NANITE_IMPOSTERS_SUPPORTED
|
|
return ClusterIndex >= (1 << 24);
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
FVisibleCluster GetVisibleCluster( uint ClusterIndex )
|
|
{
|
|
FVisibleCluster VisibleCluster;
|
|
|
|
#if NANITE_IMPOSTERS_SUPPORTED
|
|
if( IsVisibleClusterIndexImposter(ClusterIndex) )
|
|
{
|
|
// Couldn't have been stored so signals this is an imposter
|
|
VisibleCluster.Flags = 1 << NANITE_NUM_CULLING_FLAG_BITS;
|
|
VisibleCluster.ViewId = 0; // TODO
|
|
VisibleCluster.InstanceId = BitFieldExtractU32( ClusterIndex, NANITE_MAX_INSTANCES_BITS - 1, 1 );
|
|
VisibleCluster.AssemblyTransformIndex = uint32(-1);
|
|
VisibleCluster.PageIndex = 0;
|
|
VisibleCluster.ClusterIndex = ClusterIndex & 1;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
VisibleCluster = GetVisibleCluster( ClusterIndex, false );
|
|
}
|
|
|
|
return VisibleCluster;
|
|
}
|
|
|
|
bool IsValidAssemblyTransformIndex(uint AssemblyTransformIndex)
|
|
{
|
|
// NOTE: This should effectively cause all assembly transformation to DCE without having to #ifdef callsites
|
|
#if NANITE_ASSEMBLY_DATA
|
|
return AssemblyTransformIndex < NANITE_MAX_ASSEMBLY_TRANSFORMS;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool IsAssemblyPartCluster(FVisibleCluster VisibleCluster)
|
|
{
|
|
return IsValidAssemblyTransformIndex(VisibleCluster.AssemblyTransformIndex);
|
|
}
|
|
|
|
FCluster UnpackCluster(uint4 ClusterData[NANITE_NUM_PACKED_CLUSTER_FLOAT4S], FPageHeader PageHeader, uint PageBaseAddress, uint LocalClusterIndex)
|
|
{
|
|
FCluster Cluster;
|
|
Cluster.PageBaseAddress = PageBaseAddress;
|
|
|
|
Cluster.NumVerts = BitFieldExtractU32(ClusterData[0].x, 14, 0);
|
|
Cluster.PositionOffset = BitFieldExtractU32(ClusterData[0].x, 18, 14);
|
|
Cluster.NumTris = BitFieldExtractU32(ClusterData[0].y, 8, 0);
|
|
Cluster.IndexOffset = BitFieldExtractU32(ClusterData[0].y, 24, 8);
|
|
|
|
Cluster.ColorMin = ClusterData[0].z;
|
|
Cluster.ColorBits = BitFieldExtractU32(ClusterData[0].w, 16, 0);
|
|
Cluster.GroupIndex = BitFieldExtractU32(ClusterData[0].w, 16, 16); // Debug only
|
|
|
|
Cluster.PosStart = ClusterData[1].xyz;
|
|
Cluster.BitsPerIndex = BitFieldExtractU32(ClusterData[1].w, 3, 0) + 1;
|
|
Cluster.PosPrecision = (int)BitFieldExtractU32(ClusterData[1].w, 6, 3) + NANITE_MIN_POSITION_PRECISION;
|
|
Cluster.PosBits.x = BitFieldExtractU32(ClusterData[1].w, 5, 9);
|
|
Cluster.PosBits.y = BitFieldExtractU32(ClusterData[1].w, 5, 14);
|
|
Cluster.PosBits.z = BitFieldExtractU32(ClusterData[1].w, 5, 19);
|
|
Cluster.NormalPrecision = BitFieldExtractU32(ClusterData[1].w, 4, 24);
|
|
Cluster.TangentPrecision = BitFieldExtractU32(ClusterData[1].w, 4, 28);
|
|
Cluster.PosScale = asfloat(asint(1.0f) - (Cluster.PosPrecision << 23));
|
|
Cluster.PosRcpScale = asfloat(asint(1.0f) + (Cluster.PosPrecision << 23));
|
|
|
|
Cluster.LODBounds = asfloat(ClusterData[2]);
|
|
|
|
Cluster.BoxBoundsCenter = asfloat(ClusterData[3].xyz);
|
|
Cluster.LODError = f16tof32(ClusterData[3].w);
|
|
Cluster.EdgeLength = f16tof32(ClusterData[3].w >> 16);
|
|
|
|
Cluster.BoxBoundsExtent = asfloat(ClusterData[4].xyz);
|
|
Cluster.Flags = BitFieldExtractU32(ClusterData[4].w, 4, 0);
|
|
Cluster.NumClusterBoneInfluences = BitFieldExtractU32(ClusterData[4].w, 5, 4);
|
|
|
|
Cluster.AttributeOffset = BitFieldExtractU32(ClusterData[5].x, 22, 0);
|
|
Cluster.BitsPerAttribute = BitFieldExtractU32(ClusterData[5].x, 10, 22);
|
|
Cluster.DecodeInfoOffset = BitFieldExtractU32(ClusterData[5].y, 22, 0);
|
|
Cluster.bHasTangents = BitFieldExtractU32(ClusterData[5].y, 1, 22);
|
|
Cluster.bSkinning = BitFieldExtractU32(ClusterData[5].y, 1, 23);
|
|
Cluster.bVoxel = (Cluster.NumTris == 0) && NANITE_VOXEL_DATA;
|
|
Cluster.NumUVs = BitFieldExtractU32(ClusterData[5].y, 3, 24);
|
|
Cluster.ColorMode = BitFieldExtractU32(ClusterData[5].y, 1, 27);
|
|
Cluster.UVBitOffsets = ClusterData[5].z;
|
|
const uint MaterialEncoding = ClusterData[5].w;
|
|
|
|
Cluster.ExtendedDataOffset = BitFieldExtractU32(ClusterData[6].x, 22, 0);
|
|
Cluster.ExtendedDataNum = BitFieldExtractU32(ClusterData[6].x, 10, 22);
|
|
|
|
Cluster.BrickDataOffset = BitFieldExtractU32(ClusterData[6].y, 22, 0);
|
|
Cluster.BrickDataNum = BitFieldExtractU32(ClusterData[6].y, 10, 22);
|
|
|
|
// Material Table Range Encoding (32 bits)
|
|
// uint TriStart : 8; // max 128 triangles
|
|
// uint TriLength : 8; // max 128 triangles
|
|
// uint MaterialIndex : 6; // max 64 materials
|
|
// uint Padding : 10;
|
|
|
|
// Material Packed Range - Fast Path (32 bits)
|
|
// uint Material0Index : 6; // max 64 materials (0:Material0Length)
|
|
// uint Material1Index : 6; // max 64 materials (Material0Length:Material1Length)
|
|
// uint Material2Index : 6; // max 64 materials (remainder)
|
|
// uint Material0Length : 7; // max 128 triangles (num minus one)
|
|
// uint Material1Length : 7; // max 64 triangles (materials are sorted, so at most 128/2)
|
|
|
|
// Material Packed Range - Slow Path (32 bits)
|
|
// uint BufferIndex : 19; // 2^19 max value (tons, it's per prim)
|
|
// uint BufferLength : 6; // max 64 ranges (num minus one)
|
|
// uint Padding : 7; // always 127 for slow path. corresponds to Material1Length=127 in fast path
|
|
|
|
BRANCH
|
|
if (MaterialEncoding < 0xFE000000u)
|
|
{
|
|
// Fast inline path
|
|
Cluster.MaterialTableOffset = 0;
|
|
Cluster.MaterialTableLength = 0;
|
|
Cluster.Material0Index = BitFieldExtractU32(MaterialEncoding, 6, 0);
|
|
Cluster.Material1Index = BitFieldExtractU32(MaterialEncoding, 6, 6);
|
|
Cluster.Material2Index = BitFieldExtractU32(MaterialEncoding, 6, 12);
|
|
Cluster.Material0Length = BitFieldExtractU32(MaterialEncoding, 7, 18) + 1;
|
|
Cluster.Material1Length = BitFieldExtractU32(MaterialEncoding, 7, 25);
|
|
|
|
Cluster.VertReuseBatchCountTableOffset = 0;
|
|
Cluster.VertReuseBatchCountTableSize = 0;
|
|
Cluster.VertReuseBatchInfo = ClusterData[7];
|
|
}
|
|
else
|
|
{
|
|
// Slow global search path
|
|
Cluster.MaterialTableOffset = BitFieldExtractU32(MaterialEncoding, 19, 0);
|
|
Cluster.MaterialTableLength = BitFieldExtractU32(MaterialEncoding, 6, 19) + 1;
|
|
Cluster.Material0Index = 0;
|
|
Cluster.Material1Index = 0;
|
|
Cluster.Material2Index = 0;
|
|
Cluster.Material0Length = 0;
|
|
Cluster.Material1Length = 0;
|
|
|
|
Cluster.VertReuseBatchCountTableOffset = ClusterData[7].x;
|
|
Cluster.VertReuseBatchCountTableSize = ClusterData[7].y;
|
|
Cluster.VertReuseBatchInfo = 0;
|
|
}
|
|
|
|
Cluster.MaterialTotalLength = Cluster.bVoxel ? Cluster.BrickDataNum : Cluster.NumTris;
|
|
|
|
const uint ClusterBoneInfluenceAddress = PageBaseAddress + NANITE_GPU_PAGE_HEADER_SIZE + NANITE_NUM_PACKED_CLUSTER_FLOAT4S * 16 * PageHeader.NumClusters;
|
|
uint VoxelBoneInfluenceAddress = ClusterBoneInfluenceAddress + PageHeader.NumClusters * PageHeader.MaxClusterBoneInfluences * (uint)sizeof(FClusterBoneInfluence);
|
|
VoxelBoneInfluenceAddress = (VoxelBoneInfluenceAddress + 15) & ~15u; // Align to match builder behavior. //VOXELTODO: We don't seem to actually need more than 4 byte alignment. Fix the builder and save the ~8 bytes per page instead?
|
|
|
|
|
|
// TODO: Unify these two paths and formats into one if/when we finally kill NANITE_USE_PRECISE_SKINNING_BOUNDS
|
|
if (Cluster.bVoxel)
|
|
{
|
|
Cluster.ClusterBoneInfluenceAddress = VoxelBoneInfluenceAddress + LocalClusterIndex * 4;
|
|
Cluster.ClusterBoneInfluenceStride = PageHeader.NumClusters * 4;
|
|
}
|
|
else
|
|
{
|
|
Cluster.ClusterBoneInfluenceAddress = ClusterBoneInfluenceAddress + LocalClusterIndex * (uint)sizeof(FClusterBoneInfluence);
|
|
Cluster.ClusterBoneInfluenceStride = PageHeader.NumClusters * (uint)sizeof(FClusterBoneInfluence);
|
|
}
|
|
|
|
return Cluster;
|
|
}
|
|
|
|
uint GPUPageIndexToGPUOffset(uint PageIndex)
|
|
{
|
|
const uint MaxStreamingPages = PageConstants.y;
|
|
return (min(PageIndex, MaxStreamingPages) << NANITE_STREAMING_PAGE_GPU_SIZE_BITS) + ((uint)max((int)PageIndex - (int)MaxStreamingPages, 0) << NANITE_ROOT_PAGE_GPU_SIZE_BITS);
|
|
}
|
|
|
|
FPageHeader UnpackPageHeader(uint4 Data)
|
|
{
|
|
FPageHeader Header;
|
|
Header.NumClusters = BitFieldExtractU32(Data.x, 16, 0);
|
|
Header.MaxClusterBoneInfluences = BitFieldExtractU32(Data.x, 8, 16);
|
|
Header.MaxVoxelBoneInfluences = BitFieldExtractU32(Data.x, 8, 24);
|
|
return Header;
|
|
}
|
|
|
|
FPageHeader GetPageHeader(ByteAddressBuffer InputBuffer, uint PageAddress)
|
|
{
|
|
return UnpackPageHeader(InputBuffer.Load4(PageAddress));
|
|
}
|
|
|
|
FPageHeader GetPageHeader(RWByteAddressBuffer InputBuffer, uint PageAddress)
|
|
{
|
|
return UnpackPageHeader(InputBuffer.Load4(PageAddress));
|
|
}
|
|
|
|
FCluster GetCluster(ByteAddressBuffer InputBuffer, FPageHeader PageHeader, uint SrcBaseOffset, uint ClusterIndex)
|
|
{
|
|
const uint ClusterSOAStride = (PageHeader.NumClusters << 4);
|
|
const uint ClusterBaseAddress = SrcBaseOffset + ( ClusterIndex << 4 );
|
|
|
|
uint4 ClusterData[NANITE_NUM_PACKED_CLUSTER_FLOAT4S];
|
|
UNROLL
|
|
for(int i = 0; i < NANITE_NUM_PACKED_CLUSTER_FLOAT4S; i++)
|
|
{
|
|
ClusterData[i] = InputBuffer.Load4( ClusterBaseAddress + i * ClusterSOAStride + NANITE_GPU_PAGE_HEADER_SIZE ); // Adding NANITE_GPU_PAGE_HEADER_SIZE inside the loop prevents compiler confusion about offset modifier and generates better code
|
|
}
|
|
|
|
return UnpackCluster(ClusterData, PageHeader, SrcBaseOffset, ClusterIndex);
|
|
}
|
|
|
|
FCluster GetCluster(RWByteAddressBuffer InputBuffer, FPageHeader PageHeader, uint PageBaseAddress, uint ClusterIndex)
|
|
{
|
|
const uint ClusterSOAStride = (PageHeader.NumClusters << 4);
|
|
const uint ClusterBaseAddress = PageBaseAddress + (ClusterIndex << 4);
|
|
|
|
uint4 ClusterData[NANITE_NUM_PACKED_CLUSTER_FLOAT4S];
|
|
UNROLL
|
|
for (int i = 0; i < NANITE_NUM_PACKED_CLUSTER_FLOAT4S; i++)
|
|
{
|
|
ClusterData[i] = InputBuffer.Load4( ClusterBaseAddress + i * ClusterSOAStride + NANITE_GPU_PAGE_HEADER_SIZE ); // Adding NANITE_GPU_PAGE_HEADER_SIZE inside the loop prevents compiler confusion about offset modifier and generates better code
|
|
}
|
|
|
|
return UnpackCluster(ClusterData, PageHeader, PageBaseAddress, ClusterIndex);
|
|
}
|
|
|
|
FCluster GetCluster(uint PageIndex, uint ClusterIndex)
|
|
{
|
|
uint PageBaseAddress = GPUPageIndexToGPUOffset(PageIndex);
|
|
FPageHeader PageHeader = GetPageHeader(ClusterPageData, PageBaseAddress);
|
|
return GetCluster(ClusterPageData, PageHeader, PageBaseAddress, ClusterIndex);
|
|
}
|
|
|
|
FHierarchyNodeSlice UnpackHierarchyNodeSlice(uint4 RawData0, uint4 RawData1, uint4 RawData2, uint2 RawData3)
|
|
{
|
|
const uint4 Misc0 = RawData1;
|
|
const uint4 Misc1 = RawData2;
|
|
const uint2 Misc2 = RawData3;
|
|
|
|
FHierarchyNodeSlice Node;
|
|
Node.LODBounds = asfloat(RawData0);
|
|
|
|
Node.BoxBoundsCenter = asfloat(Misc0.xyz);
|
|
Node.BoxBoundsExtent = asfloat(Misc1.xyz);
|
|
|
|
Node.MinLODError = f16tof32(Misc0.w);
|
|
Node.MaxParentLODError = f16tof32(Misc0.w >> 16);
|
|
Node.ChildStartReference = Misc1.w; // When changing this, remember to also update StoreHierarchyNodeChildStartReference
|
|
Node.bLoaded = (Misc1.w != 0xFFFFFFFFu);
|
|
|
|
Node.NumChildren = BitFieldExtractU32(Misc2.x, NANITE_MAX_CLUSTERS_PER_GROUP_BITS, 0);
|
|
Node.NumPages = BitFieldExtractU32(Misc2.x, NANITE_MAX_GROUP_PARTS_BITS, NANITE_MAX_CLUSTERS_PER_GROUP_BITS);
|
|
Node.StartPageIndex = BitFieldExtractU32(Misc2.x, NANITE_MAX_RESOURCE_PAGES_BITS, NANITE_MAX_CLUSTERS_PER_GROUP_BITS + NANITE_MAX_GROUP_PARTS_BITS);
|
|
Node.bEnabled = Misc2.x != 0u;
|
|
Node.bLeaf = Misc2.x != 0xFFFFFFFFu;
|
|
Node.AssemblyTransformIndex = Misc2.y;
|
|
|
|
return Node;
|
|
}
|
|
|
|
uint GetHierarchyNodeOffset(uint RootOffset, uint NodeIndex)
|
|
{
|
|
return RootOffset + NodeIndex * NANITE_HIERARCHY_NODE_SLICE_SIZE_DWORDS;
|
|
}
|
|
|
|
FHierarchyNodeSlice GetHierarchyNodeSlice(ByteAddressBuffer InputBuffer, uint NodeOffset, uint ChildIndex)
|
|
{
|
|
// NOTE: Offset is expected in dwords
|
|
const uint BaseAddress = NodeOffset * 4;
|
|
|
|
const uint4 RawData0 = InputBuffer.Load4(BaseAddress + 16 * ChildIndex);
|
|
const uint4 RawData1 = InputBuffer.Load4(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 16) + 16 * ChildIndex);
|
|
const uint4 RawData2 = InputBuffer.Load4(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 32) + 16 * ChildIndex);
|
|
#if NANITE_ASSEMBLY_DATA
|
|
const uint2 RawData3 = InputBuffer.Load2(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 48) + 8 * ChildIndex);
|
|
#else
|
|
const uint2 RawData3 = uint2(InputBuffer.Load(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 48) + 4 * ChildIndex), 0xFFFFFFFFu);
|
|
#endif
|
|
|
|
return UnpackHierarchyNodeSlice(RawData0, RawData1, RawData2, RawData3);
|
|
}
|
|
|
|
FHierarchyNodeSlice GetHierarchyNodeSlice(RWByteAddressBuffer InputBuffer, uint NodeOffset, uint ChildIndex)
|
|
{
|
|
// NOTE: Offset is expected in dwords
|
|
const uint BaseAddress = NodeOffset * 4;
|
|
|
|
const uint4 RawData0 = InputBuffer.Load4(BaseAddress + 16 * ChildIndex);
|
|
const uint4 RawData1 = InputBuffer.Load4(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 16) + 16 * ChildIndex);
|
|
const uint4 RawData2 = InputBuffer.Load4(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 32) + 16 * ChildIndex);
|
|
#if NANITE_ASSEMBLY_DATA
|
|
const uint2 RawData3 = InputBuffer.Load2(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 48) + 8 * ChildIndex);
|
|
#else
|
|
const uint2 RawData3 = uint2(InputBuffer.Load(BaseAddress + (NANITE_MAX_BVH_NODE_FANOUT * 48) + 4 * ChildIndex), 0xFFFFFFFFu);
|
|
#endif
|
|
|
|
return UnpackHierarchyNodeSlice(RawData0, RawData1, RawData2, RawData3);
|
|
}
|
|
|
|
FHierarchyNodeSlice GetHierarchyNodeSlice(uint NodeOffset, uint ChildIndex)
|
|
{
|
|
return GetHierarchyNodeSlice(HierarchyBuffer, NodeOffset, ChildIndex);
|
|
}
|
|
|
|
void StoreHierarchyNodeChildStartReference(RWByteAddressBuffer OutputBuffer, uint NodeOffset, uint ChildIndex, uint ChildStartReference)
|
|
{
|
|
const uint Address = NodeOffset * 4 + NANITE_MAX_BVH_NODE_FANOUT * 32 + 16 * ChildIndex + 12;
|
|
OutputBuffer.Store(Address, ChildStartReference);
|
|
}
|
|
|
|
// Decode triangle that is represented by one base index and two 5-bit offsets.
|
|
uint3 DecodeTriangleIndices(FCluster Cluster, uint TriIndex)
|
|
{
|
|
if( Cluster.bVoxel )
|
|
return uint3( TriIndex, TriIndex, TriIndex );
|
|
|
|
const uint BitsPerTriangle = Cluster.BitsPerIndex + 2 * 5;
|
|
|
|
FBitStreamReaderState BitStreamReader = BitStreamReader_Create_Aligned(Cluster.PageBaseAddress + Cluster.IndexOffset, TriIndex * BitsPerTriangle, 8 + 2*5);
|
|
|
|
uint BaseIndex = BitStreamReader_Read_RO(ClusterPageData, BitStreamReader, Cluster.BitsPerIndex, 8);
|
|
uint Delta0 = BitStreamReader_Read_RO(ClusterPageData, BitStreamReader, 5, 5);
|
|
uint Delta1 = BitStreamReader_Read_RO(ClusterPageData, BitStreamReader, 5, 5);
|
|
|
|
return BaseIndex + uint3(0, Delta0, Delta1);
|
|
}
|
|
|
|
FBrick DecodeBrick(FCluster Cluster, uint BrickIndex)
|
|
{
|
|
FBrick Brick;
|
|
const uint4 BrickData0 = ClusterPageData.Load4( Cluster.PageBaseAddress + Cluster.BrickDataOffset + 20 * BrickIndex );
|
|
const uint BrickData1 = ClusterPageData.Load ( Cluster.PageBaseAddress + Cluster.BrickDataOffset + 20 * BrickIndex + 16 );
|
|
|
|
Brick.ReverseBrickBits = BrickData0.xy;
|
|
|
|
Brick.BrickMax = uint3(BitFieldExtractU32( BrickData0.z, 2, 0 ),
|
|
BitFieldExtractU32( BrickData0.z, 2, 2 ),
|
|
BitFieldExtractU32( BrickData0.z, 2, 4 ) ) + 1u;
|
|
|
|
Brick.StartPos = int3( BitFieldExtractI32( BrickData0.z, 19, 6 ),
|
|
BitFieldExtractI32( BitAlignU32( BrickData0.w, BrickData0.z, 25 ), 19, 0 ),
|
|
BitFieldExtractI32( BrickData0.w, 19, 12 ) );
|
|
Brick.VertOffset = BrickData1;
|
|
|
|
return Brick;
|
|
}
|
|
|
|
struct FShadingMask
|
|
{
|
|
bool bIsNanitePixel;
|
|
bool bIsDecalReceiver;
|
|
bool bHasDistanceField;
|
|
uint LightingChannels;
|
|
uint ShadingBin;
|
|
uint ShadingRate;
|
|
};
|
|
|
|
uint PackShadingMask(
|
|
uint ShadingBin,
|
|
uint ShadingRate,
|
|
bool bIsDecalReceiver,
|
|
bool bHasDistanceField,
|
|
uint LightingChannels
|
|
)
|
|
{
|
|
uint Packed = 0x1; // Is Nanite
|
|
Packed |= (BitFieldMaskU32(3, 1) & (LightingChannels << 1u)); // 3 bits for channels 0,1,2
|
|
Packed |= (BitFieldMaskU32(14, 4) & (ShadingBin << 4u));
|
|
Packed |= select(bIsDecalReceiver, 1u << 18u, 0u);
|
|
Packed |= select(bHasDistanceField, 1u << 19u, 0u);
|
|
Packed |= (BitFieldMaskU32(4, 20) & (ShadingRate << 20u)); // 4 bits for 2x2 tier2 VRS
|
|
return Packed;
|
|
}
|
|
|
|
uint PackShadingMask(FShadingMask Mask)
|
|
{
|
|
return PackShadingMask(
|
|
Mask.ShadingBin,
|
|
Mask.ShadingRate,
|
|
Mask.bIsDecalReceiver,
|
|
Mask.bHasDistanceField,
|
|
Mask.LightingChannels
|
|
);
|
|
}
|
|
|
|
FShadingMask UnpackShadingMask(uint Packed)
|
|
{
|
|
FShadingMask UnpackedMask;
|
|
UnpackedMask.bIsNanitePixel = BitFieldExtractU32(Packed.x, 1, 0) != 0;
|
|
UnpackedMask.LightingChannels = BitFieldExtractU32(Packed.x, 3, 1);
|
|
UnpackedMask.ShadingBin = BitFieldExtractU32(Packed.x, 14, 4);
|
|
UnpackedMask.bIsDecalReceiver = BitFieldExtractU32(Packed.x, 1, 18) != 0;
|
|
UnpackedMask.bHasDistanceField = BitFieldExtractU32(Packed.x, 1, 19) != 0;
|
|
UnpackedMask.ShadingRate = BitFieldExtractU32(Packed.x, 4, 20);
|
|
return UnpackedMask;
|
|
}
|
|
|
|
void UnpackVisPixel(
|
|
UlongType Pixel,
|
|
out uint DepthInt,
|
|
out uint VisibleClusterIndex,
|
|
out uint TriIndex
|
|
)
|
|
{
|
|
const uint2 Unpacked = UnpackUlongType(Pixel);
|
|
VisibleClusterIndex = Unpacked.x >> 7;
|
|
TriIndex = Unpacked.x & 0x7F;
|
|
DepthInt = Unpacked.y;
|
|
|
|
VisibleClusterIndex--;
|
|
}
|
|
|
|
void UnpackVisPixel(
|
|
UlongType Pixel,
|
|
out uint DepthInt,
|
|
out uint VisibleClusterIndex,
|
|
out uint TriIndex,
|
|
out bool bIsImposter
|
|
)
|
|
{
|
|
const uint2 Unpacked = UnpackUlongType(Pixel);
|
|
VisibleClusterIndex = Unpacked.x >> 7;
|
|
TriIndex = Unpacked.x & 0x7F;
|
|
DepthInt = Unpacked.y;
|
|
#if NANITE_IMPOSTERS_SUPPORTED
|
|
bIsImposter = (Unpacked.x >> 31);
|
|
#else
|
|
bIsImposter = false;
|
|
#endif
|
|
|
|
VisibleClusterIndex--;
|
|
}
|
|
|
|
void UnpackDbgPixel(
|
|
UlongType Pixel,
|
|
out uint DepthInt,
|
|
out uint DebugValue
|
|
)
|
|
{
|
|
const uint2 Unpacked = UnpackUlongType(Pixel);
|
|
DebugValue = Unpacked.x;
|
|
DepthInt = Unpacked.y;
|
|
}
|
|
|
|
uint3 GetClusterPosition(uint VertIndex, FCluster Cluster)
|
|
{
|
|
const uint BitsPerVertex = Cluster.PosBits.x + Cluster.PosBits.y + Cluster.PosBits.z;
|
|
const uint BitOffset = MulU24( VertIndex, BitsPerVertex );
|
|
uint3 Data = ClusterPageData.Load3(Cluster.PageBaseAddress + Cluster.PositionOffset + ((BitOffset >> 5) << 2));
|
|
uint2 Packed = uint2(BitAlignU32(Data.y, Data.x, BitOffset), BitAlignU32(Data.z, Data.y, BitOffset));
|
|
|
|
uint3 Pos;
|
|
Pos.x = BitFieldExtractU32(Packed.x, Cluster.PosBits.x, 0);
|
|
Packed.x = BitAlignU32(Packed.y, Packed.x, Cluster.PosBits.x);
|
|
Packed.y >>= Cluster.PosBits.x;
|
|
Pos.y = BitFieldExtractU32(Packed.x, Cluster.PosBits.y, 0);
|
|
Packed.x = BitAlignU32(Packed.y, Packed.x, Cluster.PosBits.y);
|
|
Pos.z = BitFieldExtractU32(Packed.x, Cluster.PosBits.z, 0);
|
|
return Pos;
|
|
}
|
|
|
|
float3 DecodePosition(uint VertIndex, FCluster Cluster)
|
|
{
|
|
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
|
|
return asfloat(ClusterPageData.Load3(Cluster.PageBaseAddress + Cluster.PositionOffset + VertIndex * 12));
|
|
#else
|
|
const uint3 ClusterPos = GetClusterPosition(VertIndex, Cluster);
|
|
return ((int3)ClusterPos + Cluster.PosStart) * Cluster.PosScale;
|
|
#endif
|
|
}
|
|
|
|
FNaniteView UnpackNaniteView(FPackedNaniteView PackedView)
|
|
{
|
|
const float3 ViewOriginHigh =
|
|
{
|
|
PackedView.ViewOriginHighX,
|
|
PackedView.ViewOriginHighY,
|
|
PackedView.ViewOriginHighZ
|
|
};
|
|
|
|
FNaniteView NaniteView;
|
|
|
|
NaniteView.SVPositionToTranslatedWorld = PackedView.SVPositionToTranslatedWorld;
|
|
NaniteView.ViewToTranslatedWorld = PackedView.ViewToTranslatedWorld;
|
|
NaniteView.ViewOriginHigh = ViewOriginHigh;
|
|
|
|
NaniteView.TranslatedWorldToView = PackedView.TranslatedWorldToView;
|
|
NaniteView.TranslatedWorldToClip = PackedView.TranslatedWorldToClip;
|
|
NaniteView.ViewToClip = PackedView.ViewToClip;
|
|
NaniteView.ClipToWorld = MakeDFMatrix(ViewOriginHigh, PackedView.ClipToRelativeWorld);
|
|
|
|
NaniteView.PrevTranslatedWorldToView = PackedView.PrevTranslatedWorldToView;
|
|
NaniteView.PrevTranslatedWorldToClip = PackedView.PrevTranslatedWorldToClip;
|
|
NaniteView.PrevViewToClip = PackedView.PrevViewToClip;
|
|
NaniteView.PrevClipToWorld = MakeDFMatrix(ViewOriginHigh, PackedView.PrevClipToRelativeWorld);
|
|
|
|
NaniteView.TranslatedGlobalClipPlane = PackedView.TranslatedGlobalClipPlane;
|
|
|
|
NaniteView.ViewRect = PackedView.ViewRect;
|
|
NaniteView.ViewSizeAndInvSize = PackedView.ViewSizeAndInvSize;
|
|
NaniteView.ClipSpaceScaleOffset = PackedView.ClipSpaceScaleOffset;
|
|
NaniteView.MaterialCacheUnwrapMinAndInvSize = PackedView.MaterialCacheUnwrapMinAndInvSize;
|
|
NaniteView.MaterialCachePageAdvanceAndInvCount = PackedView.MaterialCachePageAdvanceAndInvCount;
|
|
NaniteView.PreViewTranslation = MakeDFVector3(PackedView.PreViewTranslationHigh, PackedView.PreViewTranslationLow);
|
|
NaniteView.PrevPreViewTranslation = MakeDFVector3(PackedView.PrevPreViewTranslationHigh, PackedView.PrevPreViewTranslationLow);
|
|
NaniteView.WorldCameraOrigin = MakeDFVector3(ViewOriginHigh, PackedView.ViewOriginLow);
|
|
NaniteView.CullingViewOriginTranslatedWorld = PackedView.CullingViewOriginTranslatedWorld;
|
|
NaniteView.ViewForward = PackedView.ViewForward;
|
|
NaniteView.NearPlane = PackedView.NearPlane;
|
|
NaniteView.LODScale = PackedView.LODScales.x;
|
|
NaniteView.LODScaleHW = PackedView.LODScales.y;
|
|
NaniteView.CullingViewMinRadiusTestFactorSq = PackedView.CullingViewMinRadiusTestFactorSq;
|
|
NaniteView.CullingViewScreenMultipleSq = PackedView.CullingViewScreenMultipleSq;
|
|
NaniteView.StreamingPriorityCategory = PackedView.StreamingPriorityCategory_AndFlags & NANITE_STREAMING_PRIORITY_CATEGORY_MASK;
|
|
NaniteView.Flags = PackedView.StreamingPriorityCategory_AndFlags >> NANITE_NUM_STREAMING_PRIORITY_CATEGORY_BITS;
|
|
|
|
NaniteView.TargetLayerIndex = PackedView.TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ.x;
|
|
NaniteView.TargetMipLevel = PackedView.TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ.y;
|
|
NaniteView.TargetNumMipLevels = PackedView.TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ.z;
|
|
NaniteView.TargetPrevLayerIndex = PackedView.TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ.w;
|
|
NaniteView.RangeBasedCullingDistance = PackedView.RangeBasedCullingDistance;
|
|
|
|
NaniteView.HZBTestViewRect = PackedView.HZBTestViewRect;
|
|
NaniteView.InstanceOcclusionQueryMask = PackedView.InstanceOcclusionQueryMask;
|
|
NaniteView.bUseLightingChannelMask = (PackedView.LightingChannelMask & 0x8u) > 0; // 0b1000 with COMPILER_SUPPORTS_HLSL2021
|
|
NaniteView.LightingChannelMask = (PackedView.LightingChannelMask & 0x7u); // 0b0111
|
|
NaniteView.FirstPersonTransform = float3x3(
|
|
f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.x ), f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.x >> 16u), f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.y ),
|
|
f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.y >> 16u), f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.z ), f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.z >> 16u),
|
|
f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.w ), f16tof32(PackedView.FirstPersonTransformRowsExceptRow2Z.w >> 16u), f16tof32(PackedView.FirstPersonTransformRow2Z));
|
|
|
|
NaniteView.SceneRendererPrimaryViewId = PackedView.SceneRendererPrimaryViewId;
|
|
NaniteView.DynamicDepthCullRange = PackedView.DynamicDepthCullRange;
|
|
return NaniteView;
|
|
}
|
|
|
|
StructuredBuffer< FPackedNaniteView > InViews;
|
|
FNaniteView GetNaniteView( uint ViewIndex )
|
|
{
|
|
#if NANITE_USE_VIEW_UNIFORM_BUFFER
|
|
#if INSTANCED_STEREO
|
|
ViewState LocalView = GetInstancedView(ViewIndex);
|
|
#else
|
|
ViewState LocalView = GetPrimaryView();
|
|
#endif
|
|
|
|
FNaniteView NaniteView;
|
|
|
|
NaniteView.SVPositionToTranslatedWorld = LocalView.SVPositionToTranslatedWorld;
|
|
NaniteView.ViewToTranslatedWorld = LocalView.ViewToTranslatedWorld;
|
|
NaniteView.ViewOriginHigh = LocalView.ViewOriginHigh;
|
|
|
|
NaniteView.TranslatedWorldToView = LocalView.TranslatedWorldToView;
|
|
NaniteView.TranslatedWorldToClip = LocalView.TranslatedWorldToClip;
|
|
NaniteView.ViewToClip = LocalView.ViewToClip;
|
|
NaniteView.ClipToWorld = LocalView.ClipToWorld;
|
|
|
|
NaniteView.PrevTranslatedWorldToView = LocalView.PrevTranslatedWorldToView;
|
|
NaniteView.PrevTranslatedWorldToClip = LocalView.PrevTranslatedWorldToClip;
|
|
NaniteView.PrevViewToClip = LocalView.PrevViewToClip;
|
|
NaniteView.PrevClipToWorld = LocalView.PrevClipToWorld;
|
|
|
|
NaniteView.TranslatedGlobalClipPlane = LocalView.GlobalClippingPlane;
|
|
NaniteView.ViewSizeAndInvSize = LocalView.ViewSizeAndInvSize;
|
|
NaniteView.ViewRect = int4(int2(LocalView.ViewRectMin.xy + 0.5f), int2(LocalView.ViewRectMin.xy + LocalView.ViewSizeAndInvSize.xy + 0.5f));
|
|
NaniteView.PreViewTranslation = LocalView.PreViewTranslation;
|
|
NaniteView.PrevPreViewTranslation = LocalView.PrevPreViewTranslation;
|
|
NaniteView.WorldCameraOrigin = LocalView.WorldCameraOrigin;
|
|
NaniteView.ViewForward = LocalView.ViewForward;
|
|
NaniteView.NearPlane = LocalView.NearPlane;
|
|
NaniteView.LODScale = 1.0f;
|
|
NaniteView.LODScaleHW = 1.0f;
|
|
NaniteView.CullingViewMinRadiusTestFactorSq = 0.0f;
|
|
NaniteView.StreamingPriorityCategory = 3;
|
|
NaniteView.Flags = NANITE_VIEW_FLAG_HZBTEST | NANITE_VIEW_FLAG_NEAR_CLIP;
|
|
|
|
NaniteView.TargetLayerIndex = -1; // INDEX_NONE
|
|
NaniteView.TargetMipLevel = 0;
|
|
NaniteView.TargetNumMipLevels = 0;
|
|
NaniteView.TargetPrevLayerIndex = -1; // INDEX_NONE
|
|
NaniteView.RangeBasedCullingDistance = 0.0f;
|
|
|
|
NaniteView.HZBTestViewRect = NaniteView.ViewRect;
|
|
NaniteView.InstanceOcclusionQueryMask = 0;
|
|
|
|
//This path isn't used for the shadow passes but initializing to same value as in GetDefaultLightingChannelMask() for consistency
|
|
NaniteView.LightingChannelMask = 0x1;
|
|
|
|
NaniteView.FirstPersonTransform = (float3x3)LocalView.FirstPersonTransform;
|
|
NaniteView.SceneRendererPrimaryViewId = LocalView.GPUSceneViewId;
|
|
|
|
#else // !NANITE_USE_VIEW_UNIFORM_BUFFER
|
|
|
|
#if NANITE_MULTI_VIEW
|
|
FPackedNaniteView PackedView = InViews[ViewIndex];
|
|
#else
|
|
FPackedNaniteView PackedView = InViews[0];
|
|
#endif
|
|
FNaniteView NaniteView = UnpackNaniteView(PackedView);
|
|
|
|
#endif // NANITE_USE_VIEW_UNIFORM_BUFFER
|
|
|
|
return NaniteView;
|
|
}
|
|
|
|
// Fill ViewState using data from a NaniteView
|
|
void PatchViewState(FNaniteView NaniteView, inout ViewState InOutView)
|
|
{
|
|
InOutView.SVPositionToTranslatedWorld = NaniteView.SVPositionToTranslatedWorld;
|
|
InOutView.ViewToTranslatedWorld = NaniteView.ViewToTranslatedWorld;
|
|
InOutView.ViewOriginHigh = NaniteView.ViewOriginHigh;
|
|
|
|
InOutView.TranslatedWorldToView = NaniteView.TranslatedWorldToView;
|
|
InOutView.TranslatedWorldToClip = NaniteView.TranslatedWorldToClip;
|
|
InOutView.ViewToClip = NaniteView.ViewToClip;
|
|
InOutView.ClipToWorld = NaniteView.ClipToWorld;
|
|
|
|
InOutView.PrevTranslatedWorldToView = NaniteView.PrevTranslatedWorldToView;
|
|
InOutView.PrevTranslatedWorldToClip = NaniteView.PrevTranslatedWorldToClip;
|
|
InOutView.PrevViewToClip = NaniteView.PrevViewToClip;
|
|
InOutView.PrevClipToWorld = NaniteView.PrevClipToWorld;
|
|
|
|
InOutView.ViewSizeAndInvSize = NaniteView.ViewSizeAndInvSize;
|
|
InOutView.ViewRectMin.xy = NaniteView.ViewRect.xy - 0.5f; // Convert from float2 with a half texel offset to an int2 texel coord
|
|
InOutView.PreViewTranslation = NaniteView.PreViewTranslation;
|
|
InOutView.PrevPreViewTranslation = NaniteView.PrevPreViewTranslation;
|
|
InOutView.WorldCameraOrigin = NaniteView.WorldCameraOrigin;
|
|
InOutView.ViewForward = NaniteView.ViewForward;
|
|
InOutView.NearPlane = NaniteView.NearPlane;
|
|
|
|
#if VIEW_HAS_TILEOFFSET_DATA
|
|
InOutView.TileOffset.PreViewTranslation = DFToTileOffset(InOutView.PreViewTranslation); //DF_TODO: should we upload TO data?
|
|
InOutView.TileOffset.PrevPreViewTranslation = DFToTileOffset(InOutView.PrevPreViewTranslation);
|
|
//InOutView.TileOffset.WorldViewOrigin = DFToTileOffset(InOutView.WorldViewOrigin);
|
|
//InOutView.TileOffset.PrevWorldViewOrigin = DFToTileOffset(InOutView.PrevWorldViewOrigin);
|
|
InOutView.TileOffset.WorldCameraOrigin = DFToTileOffset(InOutView.WorldCameraOrigin);
|
|
//InOutView.TileOffset.PrevWorldCameraOrigin = DFToTileOffset(InOutView.PrevWorldCameraOrigin);
|
|
#endif
|
|
}
|
|
|
|
float GetProjectedEdgeLengthAtDepth(float InLength, float ViewZ, FNaniteView NaniteView)
|
|
{
|
|
const bool bOrtho = NaniteView.ViewToClip[3][3] >= 1;
|
|
return (InLength * NaniteView.LODScale) / (bOrtho ? 1.0f : ViewZ);
|
|
}
|
|
|
|
void WriteDispatchArgsSWHW(RWBuffer<uint> RasterizerArgsSWHW, uint ArgsOffset, uint NumClustersSW, uint NumClustersHW)
|
|
{
|
|
RasterizerArgsSWHW[ArgsOffset + 0] = (NumClustersSW + 63u) / 64u; // SW: ThreadGroupCountX
|
|
RasterizerArgsSWHW[ArgsOffset + 1] = 1; // SW: ThreadGroupCountY
|
|
RasterizerArgsSWHW[ArgsOffset + 2] = 1; // SW: ThreadGroupCountZ
|
|
RasterizerArgsSWHW[ArgsOffset + 3] = 0; // padding
|
|
|
|
RasterizerArgsSWHW[ArgsOffset + 4] = (NumClustersHW + 63u) / 64u; // HW: ThreadGroupCountX
|
|
RasterizerArgsSWHW[ArgsOffset + 5] = 1; // HW: ThreadGroupCountY
|
|
RasterizerArgsSWHW[ArgsOffset + 6] = 1; // HW: ThreadGroupCountZ
|
|
RasterizerArgsSWHW[ArgsOffset + 7] = 0; // padding
|
|
}
|
|
|
|
void WriteRasterizerArgsSWHW(RWBuffer<uint> RasterizerArgsSWHW, uint ArgsOffset, uint NumClustersSW, uint NumClustersHW)
|
|
{
|
|
RasterizerArgsSWHW[ArgsOffset + 0] = NumClustersSW; // SW: ThreadGroupCountX
|
|
RasterizerArgsSWHW[ArgsOffset + 1] = 1; // SW: ThreadGroupCountY
|
|
RasterizerArgsSWHW[ArgsOffset + 2] = 1; // SW: ThreadGroupCountZ
|
|
RasterizerArgsSWHW[ArgsOffset + 3] = 0; // padding
|
|
|
|
uint3 HWArgs; // Assign to local before writing to RasterizerArgsSWHW to work around an FXC issue where the write to RasterizerArgsSWHW[ArgsOffset + 4] would be omitted
|
|
if (RenderFlags & NANITE_RENDER_FLAG_MESH_SHADER)
|
|
{
|
|
HWArgs.x = NumClustersHW; // HW: ThreadGroupCountX
|
|
HWArgs.y = 1; // HW: ThreadGroupCountY
|
|
HWArgs.z = 1; // HW: ThreadGroupCountZ
|
|
}
|
|
else if (RenderFlags & NANITE_RENDER_FLAG_PRIMITIVE_SHADER)
|
|
{
|
|
HWArgs.x = NumClustersHW; // HW: VertexCountPerInstance
|
|
HWArgs.y = 1; // HW: InstanceCount
|
|
HWArgs.z = 0; // HW: StartVertexLocation
|
|
}
|
|
else
|
|
{
|
|
HWArgs.x = NANITE_MAX_CLUSTER_TRIANGLES * 3; // HW: VertexCountPerInstance
|
|
HWArgs.y = NumClustersHW; // HW: InstanceCount
|
|
HWArgs.z = 0; // HW: StartVertexLocation
|
|
}
|
|
|
|
RasterizerArgsSWHW[ArgsOffset + 4] = HWArgs.x;
|
|
RasterizerArgsSWHW[ArgsOffset + 5] = HWArgs.y;
|
|
RasterizerArgsSWHW[ArgsOffset + 6] = HWArgs.z;
|
|
RasterizerArgsSWHW[ArgsOffset + 7] = 0; // HW: StartInstanceLocation
|
|
}
|
|
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
|
|
FNaniteSkinningHeader LoadNaniteSkinningHeader(uint InPrimitiveIndex)
|
|
{
|
|
const uint Offset = InPrimitiveIndex * (uint)sizeof(FNaniteSkinningHeader);
|
|
return SceneUB(NaniteSkinning).SkinningHeaders.Load<FNaniteSkinningHeader>(Offset);
|
|
}
|
|
|
|
#if USE_COMPRESSED_BONE_TRANSFORM
|
|
//TODO: Move these outside Nanite and use them for Non-Nanite animation sampling?
|
|
float4x3 UnpackCompressedBoneTransform(uint4 Data0, uint4 Data1)
|
|
{
|
|
float4x3 Result;
|
|
Result[0] = float3(f16tof32(Data0.w), f16tof32(Data0.w >> 16), f16tof32(Data1.x));
|
|
Result[1] = float3(f16tof32(Data1.x >> 16), f16tof32(Data1.y), f16tof32(Data1.y >> 16));
|
|
Result[2] = float3(f16tof32(Data1.z), f16tof32(Data1.z >> 16), f16tof32(Data1.w));
|
|
Result[3] = asfloat(Data0.xyz);
|
|
return Result;
|
|
}
|
|
|
|
float4x3 LoadCompressedBoneTransform(ByteAddressBuffer SrcBuffer, uint BaseOffset, uint BoneIndex)
|
|
{
|
|
const uint Offset = BaseOffset + BoneIndex * 32u;
|
|
|
|
const uint4 Data0 = SrcBuffer.Load4(Offset);
|
|
const uint4 Data1 = SrcBuffer.Load4(Offset + 16);
|
|
|
|
return UnpackCompressedBoneTransform(Data0, Data1);
|
|
}
|
|
|
|
float4x3 LoadCompressedBoneTransform(RWByteAddressBuffer SrcBuffer, uint BaseOffset, uint BoneIndex)
|
|
{
|
|
const uint Offset = BaseOffset + BoneIndex * 32u;
|
|
|
|
const uint4 Data0 = SrcBuffer.Load4(Offset);
|
|
const uint4 Data1 = SrcBuffer.Load4(Offset + 16);
|
|
|
|
return UnpackCompressedBoneTransform(Data0, Data1);
|
|
}
|
|
|
|
void StoreCompressedBoneTransform(RWByteAddressBuffer DstBuffer, uint BaseOffset, uint BoneIndex, float4x3 BoneTransform)
|
|
{
|
|
const uint Offset = BaseOffset + BoneIndex * 32u;
|
|
|
|
const uint3 XAxis = f32tof16(BoneTransform[0]);
|
|
const uint3 YAxis = f32tof16(BoneTransform[1]);
|
|
const uint3 ZAxis = f32tof16(BoneTransform[2]);
|
|
|
|
const uint4 Data0 = uint4(asuint(BoneTransform[3]), XAxis.x | (XAxis.y << 16));
|
|
const uint4 Data1 = uint4(XAxis.z | (YAxis.x << 16), YAxis.y | (YAxis.z << 16), ZAxis.x | (ZAxis.y << 16), ZAxis.z);
|
|
|
|
DstBuffer.Store4(Offset, Data0);
|
|
DstBuffer.Store4(Offset + 16, Data1);
|
|
}
|
|
|
|
#else
|
|
|
|
float4x3 LoadCompressedBoneTransform(ByteAddressBuffer SrcBuffer, uint BaseOffset, uint BoneIndex)
|
|
{
|
|
return transpose(SrcBuffer.Load<float3x4>(BaseOffset + BoneIndex * (uint)sizeof(float3x4)));
|
|
}
|
|
|
|
float4x3 LoadCompressedBoneTransform(RWByteAddressBuffer SrcBuffer, uint BaseOffset, uint BoneIndex)
|
|
{
|
|
return transpose(SrcBuffer.Load<float3x4>(BaseOffset + BoneIndex * (uint)sizeof(float3x4)));
|
|
}
|
|
|
|
void StoreCompressedBoneTransform(RWByteAddressBuffer DstBuffer, uint BaseOffset, uint BoneIndex, float4x3 BoneTransform)
|
|
{
|
|
const uint Address = BaseOffset + BoneIndex * (uint)sizeof(float3x4);
|
|
|
|
const float3x4 Tmp = transpose(BoneTransform);
|
|
#if COMPILER_SUPPORTS_TYPEDSTORE
|
|
DstBuffer.TypedStore<float3x4>(Address, Tmp);
|
|
#else
|
|
DstBuffer.Store<float3x4>(Address, Tmp);
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
float4x3 LoadNaniteBoneTransform(uint TransformIndex)
|
|
{
|
|
return LoadCompressedBoneTransform(SceneUB(NaniteSkinning).BoneTransforms, 0, TransformIndex);
|
|
}
|
|
|
|
FBoneTransformWithScale LoadNaniteBoneObjectSpaceWithScale(uint BufferOffset, uint BoneIndex)
|
|
{
|
|
const uint BufferOffsetBytes = BufferOffset * (uint)sizeof(float);
|
|
return SceneUB(NaniteSkinning).BoneObjectSpace.Load<FBoneTransformWithScale>(BufferOffsetBytes + BoneIndex * (uint)sizeof(FBoneTransformWithScale));
|
|
}
|
|
|
|
FBoneTransform LoadNaniteBoneObjectSpace(uint BufferOffset, uint BoneIndex)
|
|
{
|
|
const uint BufferOffsetBytes = BufferOffset * (uint)sizeof(float);
|
|
return SceneUB(NaniteSkinning).BoneObjectSpace.Load<FBoneTransform>(BufferOffsetBytes + BoneIndex * (uint)sizeof(FBoneTransform));
|
|
}
|
|
|
|
float4x4 LoadNaniteAssemblyTransform(uint HierarchyBufferOffset, uint TransformIndex)
|
|
{
|
|
#if NANITE_ASSEMBLY_DATA
|
|
const uint BufferAddress = HierarchyBufferOffset * 4u + TransformIndex * (uint)sizeof(float3x4);
|
|
const float3x4 TransposedTransform = HierarchyBuffer.Load<float3x4>(BufferAddress);
|
|
return transpose(float4x4(
|
|
TransposedTransform[0],
|
|
TransposedTransform[1],
|
|
TransposedTransform[2],
|
|
float4(0, 0, 0, 1)
|
|
));
|
|
#else
|
|
return float4x4(
|
|
float4(1, 0, 0, 0),
|
|
float4(0, 1, 0, 0),
|
|
float4(0, 0, 1, 0),
|
|
float4(0, 0, 0, 1)
|
|
);
|
|
#endif
|
|
}
|
|
|
|
#endif |