// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "NaniteDataDecode.ush" #include "../Barycentrics.ush" struct FNaniteAttributeData { /** Interpolated vertex color, in linear color space. */ TDual< half4 > VertexColor; TDual< float2 > TexCoords[NANITE_MAX_UVS]; /** Orthonormal rotation-only transform from tangent space to world space. */ half3x3 TangentToWorld; half UnMirrored; }; struct FNaniteRawAttributeData { float4 TangentXAndSign; float3 TangentZ; float4 Color; float2 TexCoords[NANITE_MAX_UVS]; }; struct FNaniteTangentBasis { float4 TangentXAndSign; // Tangent and Bitangent Sign Bit float3 TangentZ; // Normal float3 DeriveTangentY() { // Bitangent return cross(TangentZ, TangentXAndSign.xyz) * TangentXAndSign.w; } void RecalculateTangentX() { // Recalculate TangentX from the other two vectors - This can correct some quantization errors. // The error shows up most in specular off of a mesh with a smoothed UV seam (normal is smooth, but tangents vary across the seam) const float3 TangentY = DeriveTangentY(); TangentXAndSign.xyz = cross(TangentY, TangentZ) * TangentXAndSign.w; } void Normalize() { TangentXAndSign.xyz = normalize(TangentXAndSign.xyz); TangentZ = normalize(TangentZ); } }; FNaniteTangentBasis MakeTangentBasis(FNaniteRawAttributeData RawAttributeData) { FNaniteTangentBasis TangentBasis; TangentBasis.TangentXAndSign = RawAttributeData.TangentXAndSign; TangentBasis.TangentZ = RawAttributeData.TangentZ; return TangentBasis; } HLSL_STATIC_ASSERT(sizeof(FNaniteTangentBasis) == 28, "Unexpected size of FNaniteTangentBasis. Update WaveReadLaneAt to reflect changes."); FNaniteTangentBasis WaveReadLaneAt(FNaniteTangentBasis In, uint SrcIndex) { FNaniteTangentBasis Out; Out.TangentXAndSign = WaveReadLaneAt(In.TangentXAndSign, SrcIndex); Out.TangentZ = WaveReadLaneAt(In.TangentZ, SrcIndex); return Out; } HLSL_STATIC_ASSERT(sizeof(FNaniteRawAttributeData) == 44 + 8 * NANITE_MAX_UVS, "Unexpected size of FNaniteRawAttributeData. Update WaveReadLaneAt to reflect changes."); FNaniteRawAttributeData WaveReadLaneAt(FNaniteRawAttributeData In, uint SrcIndex) { FNaniteRawAttributeData Out; Out.TangentXAndSign = WaveReadLaneAt(In.TangentXAndSign, SrcIndex); Out.TangentZ = WaveReadLaneAt(In.TangentZ, SrcIndex); Out.Color = WaveReadLaneAt(In.Color, SrcIndex); UNROLL for (uint i = 0; i < NANITE_MAX_UVS; ++i) { Out.TexCoords[i] = WaveReadLaneAt(In.TexCoords[i], SrcIndex); } return Out; } #define SIZEOF_PACKED_UV_HEADER 8 struct FUVHeader { uint2 Min; uint2 NumBits; uint NumMantissaBits; }; #define SIZEOF_PACKED_BONE_INFLUENCE_HEADER 8 struct FBoneInfluenceHeader { uint DataAddress; uint NumVertexBoneInfluences; uint NumVertexBoneIndexBits; uint NumVertexBoneWeightBits; }; FUVHeader UnpackUVHeader(uint2 Data) { FUVHeader Range; Range.NumBits.x = BitFieldExtractU32(Data.x, 5, 0); Range.Min.x = Data.x >> 5; Range.NumBits.y = BitFieldExtractU32(Data.y, 5, 0); Range.Min.y = Data.y >> 5; Range.NumMantissaBits = NANITE_UV_FLOAT_NUM_MANTISSA_BITS; // Hardcode for now, but eventually make this a mesh setting. return Range; } FUVHeader GetUVHeader(ByteAddressBuffer InputBuffer, uint StartOffset, uint Index) { uint2 Data = InputBuffer.Load2(StartOffset + Index * SIZEOF_PACKED_UV_HEADER); return UnpackUVHeader(Data); } FUVHeader GetUVHeader(RWByteAddressBuffer InputBuffer, uint StartOffset, uint Index) { uint2 Data = InputBuffer.Load2(StartOffset + Index * SIZEOF_PACKED_UV_HEADER); return UnpackUVHeader(Data); } FBoneInfluenceHeader GetBoneInfluenceHeader(FCluster Cluster) { const uint2 Data = ClusterPageData.Load2(Cluster.PageBaseAddress + Cluster.DecodeInfoOffset + Cluster.NumUVs * SIZEOF_PACKED_UV_HEADER); FBoneInfluenceHeader Header; Header.DataAddress = Cluster.PageBaseAddress + BitFieldExtractU32(Data.x, 22, 0); Header.NumVertexBoneInfluences = BitFieldExtractU32(Data.x, 10, 22); Header.NumVertexBoneIndexBits = BitFieldExtractU32(Data.y, 6, 0); Header.NumVertexBoneWeightBits = BitFieldExtractU32(Data.y, 5, 6); return Header; } float DecodeUVFloat(uint EncodedValue, uint NumMantissaBits) { const uint ExponentAndMantissaMask = BitFieldMaskU32(NANITE_UV_FLOAT_NUM_EXPONENT_BITS + NumMantissaBits, 0); const bool bNeg = (EncodedValue <= ExponentAndMantissaMask); const uint ExponentAndMantissa = (bNeg ? ~EncodedValue : EncodedValue) & ExponentAndMantissaMask; float Result = asfloat(0x3F000000u + (ExponentAndMantissa << (23 - NumMantissaBits))); Result = min(Result * 2.0f - 1.0f, Result); // Stretch denormals from [0.5,1.0] to [0.0,1.0] return bNeg ? -Result : Result; } float2 UnpackTexCoord(uint2 Packed, FUVHeader UVHeader) { const uint2 GlobalUV = UVHeader.Min + Packed; return float2( DecodeUVFloat(GlobalUV.x, UVHeader.NumMantissaBits), DecodeUVFloat(GlobalUV.y, UVHeader.NumMantissaBits)); } float3 UnpackNormal(uint Packed, uint Bits) { uint Mask = BitFieldMaskU32(Bits, 0); float2 F = uint2(BitFieldExtractU32(Packed, Bits, 0), BitFieldExtractU32(Packed, Bits, Bits)) * (2.0f / Mask) - 1.0f; float3 N = float3(F.xy, 1.0 - abs(F.x) - abs(F.y)); float T = saturate(-N.z); N.xy += select(N.xy >= 0.0, -T, T); return normalize(N); } uint CalculateMaxAttributeBits(uint NumTexCoordInterpolators) { uint Size = 0u; Size += 2u * NANITE_MAX_NORMAL_QUANTIZATION_BITS; Size += 1u + NANITE_MAX_TANGENT_QUANTIZATION_BITS; Size += 4u * NANITE_MAX_COLOR_QUANTIZATION_BITS; Size += NumTexCoordInterpolators * (2u * NANITE_MAX_TEXCOORD_COMPONENT_BITS); return Size; } void DecodeMaterialRange(uint EncodedRange, out uint TriStart, out uint TriLength, out uint MaterialIndex) { // uint32 TriStart : 8; // max 128 triangles // uint32 TriLength : 8; // max 128 triangles // uint32 MaterialIndex : 6; // max 64 materials // uint32 Padding : 10; TriStart = BitFieldExtractU32(EncodedRange, 8, 0); TriLength = BitFieldExtractU32(EncodedRange, 8, 8); MaterialIndex = BitFieldExtractU32(EncodedRange, 6, 16); } bool IsMaterialFastPath(FCluster InCluster) { return (InCluster.Material0Length > 0); } uint GetMaterialCount(FCluster InCluster) { if (IsMaterialFastPath(InCluster)) { const uint Material2Length = InCluster.MaterialTotalLength - InCluster.Material0Length - InCluster.Material1Length; return 1 + (InCluster.Material1Length > 0) + (Material2Length > 0); } else { return InCluster.MaterialTableLength; } } uint GetRelativeMaterialIndex(FCluster InCluster, uint InTriIndex) { uint MaterialIndex = 0xFFFFFFFF; BRANCH if (IsMaterialFastPath(InCluster)) { if (InTriIndex < InCluster.Material0Length) { MaterialIndex = InCluster.Material0Index; } else if (InTriIndex < (InCluster.Material0Length + InCluster.Material1Length)) { MaterialIndex = InCluster.Material1Index; } else { MaterialIndex = InCluster.Material2Index; } } else { uint TableOffset = InCluster.PageBaseAddress + InCluster.MaterialTableOffset * 4; LOOP for (uint TableEntry = 0; TableEntry < InCluster.MaterialTableLength; ++TableEntry) { uint EncodedRange = ClusterPageData.Load(TableOffset); TableOffset += 4; uint TriStart; uint TriLength; uint TriMaterialIndex; DecodeMaterialRange(EncodedRange, TriStart, TriLength, TriMaterialIndex); if (InTriIndex >= TriStart && InTriIndex < (TriStart + TriLength)) { MaterialIndex = TriMaterialIndex; break; } } } return MaterialIndex; } struct FNaniteMaterialPrimitiveData { uint MaterialBufferOffset; uint MaterialMaxIndex; uint MeshPassMask; bool bHasUVDensities; uint HitProxyBufferOffset; float4 OverlayColor; }; FNaniteMaterialPrimitiveData UnpackNaniteMaterialPrimitiveData(uint4 Data) { FNaniteMaterialPrimitiveData Output; Output.MaterialBufferOffset = Data.x; Output.MaterialMaxIndex = BitFieldExtractU32(Data.y, 8u, 0u); Output.MeshPassMask = BitFieldExtractU32(Data.y, 8u, 8u); Output.bHasUVDensities = BitFieldExtractU32(Data.y, 1u, 16u); Output.HitProxyBufferOffset = Data.z; Output.OverlayColor = float4(UnpackToUint4(Data.w, 8)) * (1.0f / 255.0f); return Output; } FNaniteMaterialPrimitiveData LoadNaniteMaterialPrimitiveData(uint InPrimitiveIndex) { const uint ElementStride = Scene.NaniteMaterials.PrimitiveMaterialElementStride; const uint Offset = InPrimitiveIndex * ElementStride; uint4 Data = (uint4)0xFFFFFFFFu; #if USE_EDITOR_SHADERS // Check to load the hit proxy buffer offset if (ElementStride / 4u >= 4u) { Data = Scene.NaniteMaterials.PrimitiveMaterialData.Load4(Offset); } else #endif { checkSlow(ElementStride / 4u >= 2u); Data.xy = Scene.NaniteMaterials.PrimitiveMaterialData.Load2(InPrimitiveIndex * 2u * 4u); } return UnpackNaniteMaterialPrimitiveData(Data); } struct FNaniteMaterialSlot { uint ShadingBin; uint RasterBin; uint FallbackRasterBin; uint Unused; }; FNaniteMaterialSlot UnpackMaterialSlot(uint2 Data) { FNaniteMaterialSlot Output; Output.ShadingBin = Data.x >> 16u; Output.RasterBin = Data.x & 0xFFFFu; Output.Unused = Data.y >> 16u; Output.FallbackRasterBin = Data.y & 0xFFFFu; return Output; } FNaniteMaterialSlot LoadMaterialSlot(uint Offset) { uint2 Data = Scene.NaniteMaterials.MaterialData.Load2(Offset); return UnpackMaterialSlot(Data); } float4 LoadMaterialUVDensities(uint Offset) { if (Offset == 0xFFFFFFFFu) { return (float4)1.0f; } return asfloat(Scene.NaniteMaterials.MaterialData.Load4(Offset)); } uint GetMaterialSlotOffset(uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex) { FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex); const uint DwordsPerMaterialSlot = 2u; const uint MeshPassBit = (1u << InMeshPassIndex); const uint MaterialCount = PrimitiveData.MaterialMaxIndex + 1; const uint MeshPassSlotsOffset = MaterialCount * countbits(PrimitiveData.MeshPassMask & (MeshPassBit - 1u)); checkSlow(PrimitiveData.MaterialBufferOffset != 0xFFFFFFFFu); checkSlow((MeshPassBit & PrimitiveData.MeshPassMask) != 0); InRelativeMaterialIndex = min(InRelativeMaterialIndex, PrimitiveData.MaterialMaxIndex); return 4 * ( PrimitiveData.MaterialBufferOffset + DwordsPerMaterialSlot * (MeshPassSlotsOffset + InRelativeMaterialIndex) ); } uint GetMaterialUVDensitiesOffset(uint InRelativeMaterialIndex, uint InPrimitiveIndex) { FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex); checkSlow(PrimitiveData.MaterialBufferOffset != 0xFFFFFFFFu); if (!PrimitiveData.bHasUVDensities) { return 0xFFFFFFFFu; } const uint DwordsPerMaterialSlot = 2u; const uint DwordsPerUVDensities = 4u; const uint MaterialCount = PrimitiveData.MaterialMaxIndex + 1; const uint FirstUVDensitiesOffset = MaterialCount * countbits(PrimitiveData.MeshPassMask) * DwordsPerMaterialSlot; InRelativeMaterialIndex = min(InRelativeMaterialIndex, PrimitiveData.MaterialMaxIndex); return 4 * ( PrimitiveData.MaterialBufferOffset + FirstUVDensitiesOffset + (InRelativeMaterialIndex * DwordsPerUVDensities) ); } FNaniteMaterialSlot LoadMaterialSlot(uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex) { return LoadMaterialSlot(GetMaterialSlotOffset(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex)); } float4 LoadMaterialUVDensities(uint InRelativeMaterialIndex, uint InPrimitiveIndex) { return LoadMaterialUVDensities(GetMaterialUVDensitiesOffset(InRelativeMaterialIndex, InPrimitiveIndex)); } uint GetMaterialShadingBinFromIndex( uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex) { FNaniteMaterialSlot MaterialSlot = LoadMaterialSlot(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex); return MaterialSlot.ShadingBin; } uint RemapRasterBin(uint InBinIndex, uint InRenderFlags, FNaniteMaterialFlags MaterialFlags, bool bVoxel) { // Any bins within the fixed function bin mask are special cased const bool bFixedFunctionBin = InBinIndex <= NANITE_FIXED_FUNCTION_BIN_MASK; const bool bDisableProgrammable = (InRenderFlags & NANITE_RENDER_FLAG_DISABLE_PROGRAMMABLE) != 0; const bool bShadowPass = (InRenderFlags & NANITE_RENDER_FLAG_IS_SHADOW_PASS) != 0; if (bVoxel || bFixedFunctionBin || bDisableProgrammable) { // For non-shadow views, remap shadow casting fixed function to non-shadow casting (explicitly skipped) const bool bTwoSided = !bVoxel && MaterialFlags.bTwoSided; const bool bSplineMesh = !bVoxel && MaterialFlags.bSplineMesh; const bool bCastShadow = select(bShadowPass, MaterialFlags.bCastShadow, false); InBinIndex = NANITE_FIXED_FUNCTION_BIN; InBinIndex |= select(bTwoSided, NANITE_FIXED_FUNCTION_BIN_TWOSIDED, 0x0u); InBinIndex |= select(bSplineMesh, NANITE_FIXED_FUNCTION_BIN_SPLINE, 0x0u); InBinIndex |= select(MaterialFlags.bSkinnedMesh, NANITE_FIXED_FUNCTION_BIN_SKINNED, 0x0u); InBinIndex |= select(bCastShadow, NANITE_FIXED_FUNCTION_BIN_CAST_SHADOW, 0x0u); InBinIndex |= select(bVoxel, NANITE_FIXED_FUNCTION_BIN_VOXEL, 0x0u); } return InBinIndex; } uint GetMaterialRasterBinFromIndex( uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex, uint InRegularRasterBinCount, bool bFallbackRasterBin) { FNaniteMaterialSlot MaterialSlot = LoadMaterialSlot(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex); uint RasterBin = MaterialSlot.RasterBin; if (bFallbackRasterBin && MaterialSlot.FallbackRasterBin != 0xFFFFu) { RasterBin = MaterialSlot.FallbackRasterBin; } if (RasterBin >= InRegularRasterBinCount) { RasterBin = 0xFFFFu - RasterBin + InRegularRasterBinCount; } return RasterBin; } float4 GetMaterialUVDensities( FCluster InCluster, uint InPrimitiveIndex, uint InTriIndex) { const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex); return LoadMaterialUVDensities(RelativeMaterialIndex, InPrimitiveIndex); } uint GetMaterialShadingBin( FCluster InCluster, uint InPrimitiveIndex, uint InMeshPassIndex, uint InTriIndex) { const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex); return GetMaterialShadingBinFromIndex(RelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex); } uint GetMaterialRasterBin( FCluster InCluster, uint InPrimitiveIndex, uint InMeshPassIndex, uint InTriIndex, uint InRegularSlotCount, bool bFallbackRasterBin) { return GetMaterialRasterBinFromIndex( GetRelativeMaterialIndex(InCluster, InTriIndex), InPrimitiveIndex, InMeshPassIndex, InRegularSlotCount, bFallbackRasterBin ); } uint LoadMaterialHitProxyId(uint InPrimitiveIndex, uint InMaterialIndex, ByteAddressBuffer InMaterialHitProxyTable) { FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex); const uint InvisibleHitProxyID = uint(-2); uint HitProxyID = InvisibleHitProxyID; if (PrimitiveData.HitProxyBufferOffset != 0xFFFFFFFFu) { const uint OffsetDwords = PrimitiveData.HitProxyBufferOffset + min(InMaterialIndex, PrimitiveData.MaterialMaxIndex); HitProxyID = InMaterialHitProxyTable.Load(OffsetDwords * 4); } return HitProxyID; } uint GetMaterialHitProxyId( FCluster InCluster, uint InPrimitiveIndex, uint InTriIndex, ByteAddressBuffer InMaterialHitProxyTable) { const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex); const uint MaterialHitProxyId = LoadMaterialHitProxyId(InPrimitiveIndex, RelativeMaterialIndex, InMaterialHitProxyTable); return MaterialHitProxyId; } float4 LoadNaniteMaterialOverlayColor(uint InPrimitiveIndex) { FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex); return PrimitiveData.OverlayColor; } float3 UnpackTangentX(float3 TangentZ, uint TangentAngleBits, uint NumTangentBits) { const bool bSwapXZ = (abs(TangentZ.z) > abs(TangentZ.x)); if (bSwapXZ) TangentZ.xz = TangentZ.zx; const float3 TangentRefX = float3(-TangentZ.y, TangentZ.x, 0.0f); const float3 TangentRefY = cross(TangentZ, TangentRefX); const float Scale = rsqrt(dot(TangentRefX.xy, TangentRefX.xy)); const float TangentAngle = float(TangentAngleBits) * ((2.0f * PI) / (1u << NumTangentBits)); float3 TangentX = TangentRefX * (cos(TangentAngle) * Scale) + TangentRefY * (sin(TangentAngle) * Scale); if (bSwapXZ) TangentX.xz = TangentX.zx; return TangentX; } void DecodeVertexBoneInfluence(FBoneInfluenceHeader BoneInfluenceHeader, uint VertIndex, uint InfluenceIndex, inout uint OutBoneIndex, inout float OutBoneWeight) { if (InfluenceIndex >= BoneInfluenceHeader.NumVertexBoneInfluences) { OutBoneIndex = 0; OutBoneWeight = 0.0f; return; } const uint BitsPerInfluence = (BoneInfluenceHeader.NumVertexBoneIndexBits + BoneInfluenceHeader.NumVertexBoneWeightBits); const uint BitOffset = (VertIndex * BoneInfluenceHeader.NumVertexBoneInfluences + InfluenceIndex) * BitsPerInfluence; FBitStreamReaderState BoneDataStream = BitStreamReader_Create_Aligned(BoneInfluenceHeader.DataAddress, BitOffset, 32); const float WeightScale = 1.0f / ((1u << BoneInfluenceHeader.NumVertexBoneWeightBits) - 1u); OutBoneIndex = BitStreamReader_Read_RO(ClusterPageData, BoneDataStream, BoneInfluenceHeader.NumVertexBoneIndexBits, NANITE_MAX_BONE_INDEX_BITS); OutBoneWeight = (float)BitStreamReader_Read_RO(ClusterPageData, BoneDataStream, BoneInfluenceHeader.NumVertexBoneWeightBits, NANITE_MAX_BLEND_WEIGHT_BITS) * WeightScale; OutBoneWeight = BoneInfluenceHeader.NumVertexBoneWeightBits ? OutBoneWeight : 1.0f; } #if COMPILER_SUPPORTS_HLSL2021 FClusterBoneInfluence DecodeClusterBoneInfluence(FCluster Cluster, uint InfluenceIndex) { return ClusterPageData.Load(Cluster.ClusterBoneInfluenceAddress + InfluenceIndex * Cluster.ClusterBoneInfluenceStride); } #endif FVoxelBoneInfluence DecodeVoxelBoneInfluence(FCluster Cluster, uint InfluenceIndex) { const uint PackedBoneInfluence = ClusterPageData.Load(Cluster.ClusterBoneInfluenceAddress + InfluenceIndex * Cluster.ClusterBoneInfluenceStride); FVoxelBoneInfluence Influence; Influence.BoneIndex = PackedBoneInfluence >> 8; Influence.Weight = (PackedBoneInfluence & 0xFFu) * (1.0f / 255.0f); return Influence; } // Decodes vertex attributes for N vertices. N must be compile-time constant and <= 3. // Decoding multiple vertices from the same cluster simultaneously tends to generate better code than decoding them individually. void GetRawAttributeDataN(inout FNaniteRawAttributeData RawAttributeData[3], FCluster Cluster, uint3 TriIndices, uint CompileTimeN, uint CompileTimeMaxTexCoords ) { // Always process first UV set. Even if it isn't used, we might still need TangentToWorld. CompileTimeMaxTexCoords = max(1, min(NANITE_MAX_UVS, CompileTimeMaxTexCoords)); const uint DecodeInfoOffset = Cluster.PageBaseAddress + Cluster.DecodeInfoOffset; const uint AttributeDataOffset = Cluster.PageBaseAddress + Cluster.AttributeOffset; float2 TexCoords[NANITE_MAX_UVS]; uint i; UNROLL for (i = 0; i < CompileTimeN; i++) { RawAttributeData[i] = (FNaniteRawAttributeData)0; TexCoords[i] = 0.0f; } #if NANITE_USE_UNCOMPRESSED_VERTEX_DATA uint3 ReadOffset = AttributeDataOffset + TriIndices * Cluster.BitsPerAttribute / 8; UNROLL for(i = 0; i < CompileTimeN; i++) { RawAttributeData[i].TangentZ = asfloat(ClusterPageData.Load3(ReadOffset[i])); ReadOffset[i] += 12; if(Cluster.bHasTangents) { RawAttributeData[i].TangentXAndSign = asfloat(ClusterPageData.Load4(ReadOffset[i])); ReadOffset[i] += 16; } RawAttributeData[i].Color = float4(UnpackToUint4(ClusterPageData.Load(ReadOffset[i]), 8)) * (1.0f / 255.0f); ReadOffset[i] += 4; } UNROLL for (uint TexCoordIndex = 0; TexCoordIndex < CompileTimeMaxTexCoords; TexCoordIndex++) { if(TexCoordIndex < Cluster.NumUVs) { UNROLL for (uint i = 0; i < CompileTimeN; i++) { TexCoords[i] = asfloat(ClusterPageData.Load2(ReadOffset[i])); } ReadOffset += 8; } UNROLL for (uint i = 0; i < CompileTimeN; i++) { RawAttributeData[i].TexCoords[TexCoordIndex] = TexCoords[i]; } } #else const uint CompileTimeMaxAttributeBits = CalculateMaxAttributeBits(CompileTimeMaxTexCoords); // Watch out! Make sure control flow around BitStreamReader is always compile-time constant or codegen degrades significantly uint4 ColorMin = uint4(UnpackByte0(Cluster.ColorMin), UnpackByte1(Cluster.ColorMin), UnpackByte2(Cluster.ColorMin), UnpackByte3(Cluster.ColorMin)); const uint4 NumComponentBits = UnpackToUint4(Cluster.ColorBits, 4); FBitStreamReaderState AttributeStream[3]; UNROLL for (i = 0; i < CompileTimeN; i++) { AttributeStream[i] = BitStreamReader_Create_Aligned(AttributeDataOffset, TriIndices[i] * Cluster.BitsPerAttribute, CompileTimeMaxAttributeBits); const uint NormalBits = BitStreamReader_Read_RO(ClusterPageData, AttributeStream[i], 2 * Cluster.NormalPrecision, 2 * NANITE_MAX_NORMAL_QUANTIZATION_BITS); const float3 TangentZ = UnpackNormal(NormalBits, Cluster.NormalPrecision); RawAttributeData[i].TangentZ = TangentZ; const uint NumTangentBits = Cluster.bHasTangents ? (Cluster.TangentPrecision + 1) : 0u; const uint TangentAngleAndSignBits = BitStreamReader_Read_RO(ClusterPageData, AttributeStream[i], NumTangentBits, NANITE_MAX_TANGENT_QUANTIZATION_BITS + 1); BRANCH if (Cluster.bHasTangents) { const bool bTangentYSign = (TangentAngleAndSignBits & (1u << Cluster.TangentPrecision)) != 0; const uint TangentAngleBits = BitFieldExtractU32(TangentAngleAndSignBits, Cluster.TangentPrecision, 0); RawAttributeData[i].TangentXAndSign = float4(UnpackTangentX(TangentZ, TangentAngleBits, Cluster.TangentPrecision), bTangentYSign ? -1.0f : 1.0f); } else { RawAttributeData[i].TangentXAndSign = 0.0f; } const uint4 ColorDelta = BitStreamReader_Read4_RO(ClusterPageData, AttributeStream[i], NumComponentBits, NANITE_MAX_COLOR_QUANTIZATION_BITS); RawAttributeData[i].Color = float4(ColorMin + ColorDelta) * (1.0f / 255.0f); } UNROLL for (uint TexCoordIndex = 0; TexCoordIndex < CompileTimeMaxTexCoords; ++TexCoordIndex) { uint2 UVHeaderData = 0u; if (TexCoordIndex < Cluster.NumUVs) { UVHeaderData = ClusterPageData.Load2(DecodeInfoOffset + TexCoordIndex * SIZEOF_PACKED_UV_HEADER); } const FUVHeader UVHeader = UnpackUVHeader(UVHeaderData); uint2 UVBits[3]; UNROLL for (uint i = 0; i < CompileTimeN; i++) { UVBits[i] = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream[i], UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS); } BRANCH if (TexCoordIndex < Cluster.NumUVs) { UNROLL for (uint i = 0; i < CompileTimeN; i++) { TexCoords[i] = UnpackTexCoord(UVBits[i], UVHeader); } } UNROLL for (uint j = 0; j < CompileTimeN; j++) { RawAttributeData[j].TexCoords[TexCoordIndex] = TexCoords[j]; } } #endif } void GetRawAttributeData3(inout FNaniteRawAttributeData RawAttributeData[3], FCluster Cluster, uint3 VertexIndices, uint CompileTimeMaxTexCoords ) { GetRawAttributeDataN(RawAttributeData, Cluster, VertexIndices, 3, CompileTimeMaxTexCoords); } FNaniteRawAttributeData GetRawAttributeData( FCluster Cluster, uint VertexIndex, uint CompileTimeMaxTexCoords ) { FNaniteRawAttributeData RawAttributeData[3]; GetRawAttributeDataN(RawAttributeData, Cluster, VertexIndex, 1, CompileTimeMaxTexCoords); return RawAttributeData[0]; } half3x3 NaniteTangentToLocal(float4 TangentXAndSign, float3 UnnormalizedTangentZ) { const float3 TangentY = cross(UnnormalizedTangentZ.xyz, TangentXAndSign.xyz) * TangentXAndSign.w; return float3x3(TangentXAndSign.xyz, TangentY, UnnormalizedTangentZ); } FNaniteAttributeData GetAttributeData( FCluster Cluster, float3 PointLocal0, float3 PointLocal1, float3 PointLocal2, FNaniteRawAttributeData RawAttributeData0, FNaniteRawAttributeData RawAttributeData1, FNaniteRawAttributeData RawAttributeData2, FNaniteTangentBasis TangentBasis0, FNaniteTangentBasis TangentBasis1, FNaniteTangentBasis TangentBasis2, FBarycentrics Barycentrics, FInstanceSceneData InstanceData, uint CompileTimeMaxTexCoords ) { FNaniteAttributeData AttributeData = (FNaniteAttributeData)0; // Always process first UV set. Even if it isn't used, we might still need TangentToWorld. CompileTimeMaxTexCoords = max(1, min(NANITE_MAX_UVS, CompileTimeMaxTexCoords)); const float3 UnnormalizedTangentZ = Lerp(TangentBasis0.TangentZ, TangentBasis1.TangentZ, TangentBasis2.TangentZ, Barycentrics).Value; const float3 TangentZ = normalize(UnnormalizedTangentZ); // Decode vertex color // This needs to happen even if INTERPOLATE_VERTEX_COLOR is not defined as the data might be there regardless of what the shader needs. // When INTERPOLATE_VERTEX_COLOR is not defined, the results are not used and the code mostly disappears. AttributeData.UnMirrored = 1.0f; #if NANITE_USE_UNCOMPRESSED_VERTEX_DATA AttributeData.VertexColor.Value = Lerp( RawAttributeData0.Color, RawAttributeData1.Color, RawAttributeData2.Color, Barycentrics ).Value; #else AttributeData.VertexColor.Value = RawAttributeData0.Color; if (Cluster.ColorMode == NANITE_VERTEX_COLOR_MODE_VARIABLE) { AttributeData.VertexColor = Lerp( RawAttributeData0.Color, RawAttributeData1.Color, RawAttributeData2.Color, Barycentrics ); } #endif TDual< float2 > TexCoord = (TDual< float2 >)0; UNROLL for (uint TexCoordIndex = 0; TexCoordIndex < CompileTimeMaxTexCoords; ++TexCoordIndex) { if (TexCoordIndex < Cluster.NumUVs) { TexCoord = Lerp( RawAttributeData0.TexCoords[TexCoordIndex], RawAttributeData1.TexCoords[TexCoordIndex], RawAttributeData2.TexCoords[TexCoordIndex], Barycentrics ); // Generate tangent frame for UV0 if (TexCoordIndex == 0) { float3x3 TangentToLocal; BRANCH if (Cluster.bHasTangents) { float4 TangentXAndSign = Lerp(TangentBasis0.TangentXAndSign, TangentBasis1.TangentXAndSign, TangentBasis2.TangentXAndSign, Barycentrics).Value; TangentToLocal = NaniteTangentToLocal(TangentXAndSign, UnnormalizedTangentZ); AttributeData.UnMirrored = TangentXAndSign.w; } else { // Implicit tangent space // Based on Christian Schüler's derivation: http://www.thetenthplanet.de/archives/1180 // The technique derives a tangent space from the interpolated normal and (position,uv) deltas in two not necessarily orthogonal directions. // The described technique uses screen space derivatives as a way to obtain these direction deltas in a pixel shader, // but as we have the triangle vertices explicitly available using the local space corner deltas directly is faster and more convenient. float3 PointLocal10 = PointLocal1 - PointLocal0; float3 PointLocal20 = PointLocal2 - PointLocal0; float2 TexCoord10 = RawAttributeData1.TexCoords[0] - RawAttributeData0.TexCoords[0]; float2 TexCoord20 = RawAttributeData2.TexCoords[0] - RawAttributeData0.TexCoords[0]; bool TangentXValid = abs(TexCoord10.x) + abs(TexCoord20.x) > 1e-6; float3 TangentX; float3 TangentY; BRANCH if (TangentXValid) { float3 Perp2 = cross(TangentZ, PointLocal20); float3 Perp1 = cross(PointLocal10, TangentZ); float3 TangentU = Perp2 * TexCoord10.x + Perp1 * TexCoord20.x; float3 TangentV = Perp2 * TexCoord10.y + Perp1 * TexCoord20.y; TangentX = normalize(TangentU); TangentY = cross(TangentZ, TangentX); AttributeData.UnMirrored = dot(TangentV, TangentY) < 0.0f ? -1.0f : 1.0f; TangentY *= AttributeData.UnMirrored; } else { const float Sign = TangentZ.z >= 0 ? 1 : -1; const float a = -rcp( Sign + TangentZ.z ); const float b = TangentZ.x * TangentZ.y * a; TangentX = float3(1 + Sign * a * Pow2(TangentZ.x), Sign * b, -Sign * TangentZ.x); TangentY = float3(b, Sign + a * Pow2(TangentZ.y), -TangentZ.y); AttributeData.UnMirrored = 1; } TangentToLocal = float3x3(TangentX, TangentY, TangentZ); } // Should be Pow2(InvScale) but that requires renormalization float3x3 LocalToWorldNoScale = DFToFloat3x3(InstanceData.LocalToWorld); float3 InvScale = InstanceData.InvNonUniformScale; LocalToWorldNoScale[0] *= InvScale.x; LocalToWorldNoScale[1] *= InvScale.y; LocalToWorldNoScale[2] *= InvScale.z; AttributeData.TangentToWorld = mul(TangentToLocal, LocalToWorldNoScale); } } else { if (TexCoordIndex == 0) { AttributeData.TangentToWorld = float3x3(float3(0, 0, 0), float3(0, 0, 0), DFMultiplyVector(TangentZ * InstanceData.InvNonUniformScale.z, InstanceData.LocalToWorld)); } } AttributeData.TexCoords[TexCoordIndex] = TexCoord; } return AttributeData; } TDual< float2 > GetTexCoord( FCluster Cluster, uint3 TriIndices, FBarycentrics Barycentrics, uint TexCoordIndex ) { if (Cluster.NumUVs == 0) return (TDual< float2 >)0; TexCoordIndex = min(TexCoordIndex, Cluster.NumUVs - 1); // Unpack and interpolate attributes const uint DecodeInfoOffset = Cluster.PageBaseAddress + Cluster.DecodeInfoOffset; const uint AttributeDataOffset = Cluster.PageBaseAddress + Cluster.AttributeOffset; #if NANITE_USE_UNCOMPRESSED_VERTEX_DATA uint3 ReadOffset = AttributeDataOffset + TriIndices * Cluster.BitsPerAttribute / 8; ReadOffset += 12 + 4 + TexCoordIndex * 8; // Normal + Color + TexCoord #else const uint4 NumColorComponentBits = UnpackToUint4(Cluster.ColorBits, 4); const uint UVBitOffset = ((Cluster.UVBitOffsets >> (TexCoordIndex * 8u)) & 0xFFu); const uint BitOffset = 2 * Cluster.NormalPrecision + dot(NumColorComponentBits, 1u) + UVBitOffset; FBitStreamReaderState AttributeStream0 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.x * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS); FBitStreamReaderState AttributeStream1 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.y * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS); FBitStreamReaderState AttributeStream2 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.z * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS); #endif #if NANITE_USE_UNCOMPRESSED_VERTEX_DATA float2 TexCoord0 = asfloat(ClusterPageData.Load2(ReadOffset.x)); float2 TexCoord1 = asfloat(ClusterPageData.Load2(ReadOffset.y)); float2 TexCoord2 = asfloat(ClusterPageData.Load2(ReadOffset.z)); #else const FUVHeader UVHeader = GetUVHeader(ClusterPageData, DecodeInfoOffset, TexCoordIndex); uint2 UVBits0 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream0, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS); uint2 UVBits1 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream1, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS); uint2 UVBits2 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream2, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS); float2 TexCoord0 = UnpackTexCoord(UVBits0, UVHeader); float2 TexCoord1 = UnpackTexCoord(UVBits1, UVHeader); float2 TexCoord2 = UnpackTexCoord(UVBits2, UVHeader); #endif return Lerp( TexCoord0, TexCoord1, TexCoord2, Barycentrics ); } #ifndef DEFINE_ITERATE_CLUSTER_SEGMENTS # define DEFINE_ITERATE_CLUSTER_SEGMENTS (0) #endif // Need manually strip unused template functions here due to a compiler issue: https://github.com/microsoft/DirectXShaderCompiler/issues/4649 #if DEFINE_ITERATE_CLUSTER_SEGMENTS template void IterateClusterSegments(FCluster Cluster, ByteAddressBuffer InClusterPageData, inout ClusterSegmentProcessor Processor) { BRANCH if (IsMaterialFastPath(Cluster)) { { Processor.Process(0, Cluster.Material0Length, Cluster.Material0Index); } if (Cluster.Material1Length > 0) { Processor.Process(Cluster.Material0Length, Cluster.Material1Length, Cluster.Material1Index); } const uint Material2Length = Cluster.MaterialTotalLength - Cluster.Material0Length - Cluster.Material1Length; if (Material2Length > 0) { Processor.Process(Cluster.Material0Length + Cluster.Material1Length, Material2Length, Cluster.Material2Index); } } else { uint TableOffset = Cluster.PageBaseAddress + Cluster.MaterialTableOffset * 4; LOOP for (uint TableEntry = 0; TableEntry < Cluster.MaterialTableLength; ++TableEntry) { uint EncodedRange = InClusterPageData.Load(TableOffset); TableOffset += 4; uint TriStart; uint TriLength; uint MaterialIndex; DecodeMaterialRange(EncodedRange, TriStart, TriLength, MaterialIndex); Processor.Process(TriStart, TriLength, MaterialIndex); } } } #endif