Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteRasterizationCommon.ush
2025-05-18 13:04:45 +08:00

704 lines
22 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "../Common.ush"
#include "../SceneData.ush"
#include "../WaveOpUtil.ush"
#include "NaniteRasterizer.ush"
#include "NaniteAttributeDecode.ush"
#include "../VirtualShadowMaps/VirtualShadowMapPageAccessCommon.ush"
#include "../VirtualShadowMaps/VirtualShadowMapPageOverlap.ush"
#include "NaniteWritePixel.ush"
#include "NaniteCullingCommon.ush"
// Material includes
#include "/Engine/Generated/Material.ush"
#include "NaniteVertexFactory.ush"
#if NANITE_TESSELLATION
#include "NaniteTessellation.ush"
#endif
#ifndef NANITE_TWO_SIDED
#define NANITE_TWO_SIDED 0
#endif
#define ENABLE_EARLY_Z_TEST (NANITE_PIXEL_PROGRAMMABLE)
// For shadows in displacement fallback raster bins, we need to apply minimum displacement on cluster triangles
#define ENABLE_FALLBACK_DISPLACEMENT (SHADOW_DEPTH_SHADER && ( \
FIXED_DISPLACEMENT_FALLBACK || \
(NANITE_VERTEX_PROGRAMMABLE && USES_DISPLACEMENT && !NANITE_TESSELLATION) \
))
#define MATERIAL_SHADER_HAS_DOMAIN (NANITE_TESSELLATION && (NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE))
#define MATERIAL_SHADER_HAS_DISPLACEMENT (ENABLE_FALLBACK_DISPLACEMENT || (MATERIAL_SHADER_HAS_DOMAIN && USES_DISPLACEMENT))
// State encompassing mapping a pixel position to memory address
struct FRaster
{
float2 ViewportScale;
float2 ViewportBias;
int4 ScissorRect;
#if VIRTUAL_TEXTURE_TARGET
uint2 pPage;
uint2 vPage;
float2 vTranslation;
bool bSinglePage;
uint ArrayIndex;
#endif
};
float4 CalculateSubpixelCoordinates( FRaster Raster, float4 PointClip )
{
float4 Subpixel = float4( PointClip.xyz, 1 ) / PointClip.w;
Subpixel.xy = Subpixel.xy * Raster.ViewportScale + Raster.ViewportBias;
Subpixel.xy = floor(Subpixel.xy);
return Subpixel;
}
float3 GetPerspectiveCorrectBarycentrics( float3 C, float3 InvW )
{
float3 CInvW = C * InvW; // Perspective weighting by (1/w0, 1/w1, 1/w2)
float3 UVW = CInvW * rcp(CInvW.x + CInvW.y + CInvW.z); // renormalize
return UVW;
}
FBarycentrics CalculateBarycentrics( FRasterTri Tri, float3 C, bool bPerspectiveCorrectDerivatives )
{
FBarycentrics Barycentrics = (FBarycentrics)0;
const float3 UVW = GetPerspectiveCorrectBarycentrics( C, Tri.InvW );
Barycentrics.Value = UVW;
BRANCH
if( bPerspectiveCorrectDerivatives )
{
const float3 OffsetX = { -Tri.Edge12.y, -Tri.Edge20.y, -Tri.Edge01.y };
const float3 OffsetY = { Tri.Edge12.x, Tri.Edge20.x, Tri.Edge01.x };
const float3 UVW_X = GetPerspectiveCorrectBarycentrics( C + OffsetX, Tri.InvW );
const float3 UVW_Y = GetPerspectiveCorrectBarycentrics( C + OffsetY, Tri.InvW );
Barycentrics.Value_dx = UVW_X - UVW;
Barycentrics.Value_dy = UVW_Y - UVW;
}
else
{
Barycentrics.Value_dx = Tri.Barycentrics_dx;
Barycentrics.Value_dy = Tri.Barycentrics_dy;
}
return Barycentrics;
}
int FindNthSetBit( uint Mask, int Index )
{
int Last = countbits( Mask ) - Index - 1;
uint p = 16;
p += countbits( Mask >> p ) <= Last ? -8 : 8;
p += countbits( Mask >> p ) <= Last ? -4 : 4;
p += countbits( Mask >> p ) <= Last ? -2 : 2;
p += countbits( Mask >> p ) <= Last ? -1 : 1;
p = countbits( Mask >> p ) == Last ? (p - 1) : p;
return p;
}
int FindNthSetBit( uint2 Mask, int Index )
{
int LowPop = countbits( Mask.x );
return FindNthSetBit( Index < LowPop ? Mask.x : Mask.y, Index < LowPop ? Index : Index - LowPop ) + ( Index < LowPop ? 0 : 32 );
}
/*int FindNthSetBit_Scalar( uint Mask, int Index )
{
return firstbitlow( WaveBallot( WavePrefixCountBits(x) == Index ) ) - 1;
return firstbitlow( WaveBallot( MaskedBitCount( Mask ) == Index ) ) - 1;
}*/
uint MaskedBitCount2( uint2 Bits, uint Index )
{
uint Mask = 1u << ( Index & 31 );
Mask -= 1;
uint A = Index < 32 ? Bits.x : Bits.y;
uint B = Index < 32 ? 0 : countbits( Bits.x );
return countbits( A & Mask ) + B;
}
groupshared uint GroupBaseVertIndex;
groupshared uint2 GroupUsedVertMask;
//groupshared uint GroupUniqueVertIndex[32];
void DeduplicateVertIndexes( uint3 VertIndexes, uint GroupThreadIndex, bool bTriValid, out uint OutNumUniqueVerts, out uint OutLaneVertIndex, out uint3 OutCornerLaneIndexes )
{
const uint LaneCount = WaveGetLaneCount();
// Calculate smallest active vertex
uint BaseVertIndex;
BRANCH
if( LaneCount < 32 )
{
if (GroupThreadIndex == 0)
{
GroupBaseVertIndex = 0xFFFFFFFFu;
GroupUsedVertMask = 0;
}
GroupMemoryBarrierWithGroupSync();
if (bTriValid) WaveInterlockedMin(GroupBaseVertIndex, VertIndexes.x); // VertIndexes.x is always smallest
GroupMemoryBarrierWithGroupSync();
BaseVertIndex = GroupBaseVertIndex;
}
else
{
BaseVertIndex = WaveActiveMin(bTriValid ? VertIndexes.x : 0xFFFFFFFFu); // VertIndexes.x is always smallest
}
uint2 TriangleVertMask = 0;
if( bTriValid )
{
VertIndexes -= BaseVertIndex;
UNROLL
for (uint i = 0; i < 3; i++)
{
bool bDstLow = VertIndexes[i] < 32;
uint DstMask = 1u << ( VertIndexes[i] & 31 );
if( bDstLow )
TriangleVertMask.x |= DstMask;
else
TriangleVertMask.y |= DstMask;
}
}
uint2 UsedVertMask = uint2(0,0);
BRANCH
if( LaneCount < 32 )
{
WaveInterlockedOr(GroupUsedVertMask.x, UsedVertMask.x);
WaveInterlockedOr(GroupUsedVertMask.y, UsedVertMask.y);
GroupMemoryBarrierWithGroupSync();
UsedVertMask = GroupUsedVertMask;
}
else
{
UsedVertMask.x = WaveActiveBitOr(TriangleVertMask.x);
UsedVertMask.y = WaveActiveBitOr(TriangleVertMask.y);
}
OutCornerLaneIndexes.x = MaskedBitCount(UsedVertMask, VertIndexes.x);
OutCornerLaneIndexes.y = MaskedBitCount(UsedVertMask, VertIndexes.y);
OutCornerLaneIndexes.z = MaskedBitCount(UsedVertMask, VertIndexes.z);
#if 0
bool2 bIsUsed = ( UsedVertMask & (1u << GroupThreadIndex) ) != 0u;
uint2 UsedPrefixSum =
{
MaskedBitCount( uint2( UsedVertMask.x, 0 ) ),
MaskedBitCount( uint2( UsedVertMask.y, 0 ) ) + countbits( UsedVertMask.x )
};
if( bIsUsed.x )
GroupUniqueVertIndex[ UsedPrefixSum.x ] = GroupThreadIndex;
if( bIsUsed.y && UsedPrefixSum.y < 32 )
GroupUniqueVertIndex[ UsedPrefixSum.y ] = GroupThreadIndex + 32;
OutLaneVertIndex = GroupUniqueVertIndex[ GroupThreadIndex ] + BaseVertIndex;
#else
OutLaneVertIndex = FindNthSetBit(UsedVertMask, GroupThreadIndex) + BaseVertIndex;
#endif
OutNumUniqueVerts = CountBits(UsedVertMask);
}
// Only works for WaveSize >= 32 and only efficient for == 32.
// A sliding window approach like this relies on the index buffer being constrained such that a triangle
// can't referance a vertex >= window size back from the largest seen index so far in the index buffer.
// In addition the WaveSize must be at least as large as the window so that vertices referenced by triangles
// in this wave must either be present in the cache from the previous iteration (Lane0 - Window) or in this iteration.
// Since we constrain to 32 when building the clusters the WaveSize that makes sense is the matching 32.
// Larger WaveSize could also work but the code would need to be changed.
template< typename FVertex >
struct TSlidingWindowVertexCache
{
FVertex CachedVert;
uint NumCached;
bool bFirstPass;
FVertex TriangleVerts[3];
bool TriangleVertsRead[3];
void Init()
{
NumCached = 0;
bFirstPass = true;
}
// Grab what's there for this triangle before updating cache. Otherwise cache would need to be double size.
uint PullExisting( uint3 VertIndexes )
{
BRANCH
if( !bFirstPass )
{
UNROLL
for( uint k = 0; k < 3; k++ )
{
TriangleVerts[k] = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
TriangleVertsRead[k] = VertIndexes[k] < NumCached;
}
}
uint MaxVertIndex = max( VertIndexes.y, VertIndexes.z );
uint WaveMaxVertIndex = WaveActiveMax( MaxVertIndex );
uint NewNumCached = min( WaveMaxVertIndex + 1, NumCached + 32 );
uint FirstUncachedLane = NumCached & 31;
uint LaneVertIndex = ( WaveGetLaneIndex() - FirstUncachedLane ) & 31;
LaneVertIndex += NumCached;
NumCached = NewNumCached;
return LaneVertIndex;
}
uint PullRemaining( uint3 VertIndexes )
{
UNROLL
for( uint k = 0; k < 3; k++ )
{
BRANCH
if( bFirstPass )
{
TriangleVerts[k] = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
}
else
{
FVertex TempVert = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
if( !TriangleVertsRead[k] )
TriangleVerts[k] = TempVert;
}
}
bFirstPass = false;
uint MaxVertIndex = max( VertIndexes.y, VertIndexes.z );
uint InvalidMask = WaveBallot( MaxVertIndex >= NumCached ).x;
uint NumTrisCached = InvalidMask ? firstbitlow( InvalidMask ) : 32;
return NumTrisCached;
}
};
struct FNullTranslation
{
bool operator()( inout FVisBufferPixel Pixel )
{
return true;
}
};
template< typename FSoftwareShader, typename FPageTranslation = FNullTranslation >
struct TNaniteWritePixel
{
FRaster Raster;
FSoftwareShader Shader;
uint PixelValue;
uint2 VisualizeValues;
FPageTranslation PageTranslation;
void operator()( uint2 PixelPos, float3 C, FRasterTri Tri )
{
float DeviceZ = Tri.DepthPlane.x + Tri.DepthPlane.y * C.y + Tri.DepthPlane.z * C.z;
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, PixelValue, DeviceZ );
#if VISUALIZE
Pixel.VisualizeValues = VisualizeValues;
#endif
#if VIRTUAL_TEXTURE_TARGET
Pixel.PhysicalPosition.xy = Pixel.Position;
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
if( !PageTranslation( Pixel ) )
{
return;
}
#endif
Pixel.WriteOverdraw();
#if NANITE_PIXEL_PROGRAMMABLE && !NANITE_TESSELLATION
#if ENABLE_EARLY_Z_TEST
BRANCH
if( !Pixel.EarlyDepthTest() )
{
return;
}
#endif
FBarycentrics Barycentrics = CalculateBarycentrics( Tri, C, false ); // Perspective correct derivatives are not worth the effort for masked/PDO.
float4 SvPosition = float4( Pixel.Position.xy + 0.5, Pixel.Depth, 1.0 );
#if VIRTUAL_TEXTURE_TARGET
// Translate it to virtual page
SvPosition.xy = SvPosition.xy - Raster.vTranslation;
#endif
BRANCH
if( !Shader.EvaluatePixel( Barycentrics, SvPosition, Tri.bBackFace, Pixel ) )
{
return;
}
#endif
Pixel.Write();
}
};
struct FMaterialShader
{
FNaniteView NaniteView;
FPrimitiveSceneData PrimitiveData;
FInstanceSceneData InstanceData;
FInstanceDynamicData InstanceDynamicData;
FCluster Cluster;
FVisibleCluster VisibleCluster;
FNaniteVertTransforms VertTransforms;
FNaniteTransformedTri TransformedTri;
FMaterialVertexParameters VertexParameters;
#if MATERIAL_SHADER_HAS_DISPLACEMENT
FNaniteMaterialDisplacementParams DisplacementParams;
float DisplacementFadeSlope;
float DisplacementFadeOffset;
#endif
void InitVertexParameters(FNanitePostDeformVertex InputVert)
{
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
// Should be Pow2(InvScale) but that requires renormalization
float3x3 LocalToWorld = DFToFloat3x3(InstanceData.LocalToWorld);
float3 InvScale = InstanceData.InvNonUniformScale;
LocalToWorld[0] *= InvScale.x;
LocalToWorld[1] *= InvScale.y;
LocalToWorld[2] *= InvScale.z;
VertexParameters = MakeInitializedMaterialVertexParameters();
SetVertexParameterInstanceData(VertexParameters, InstanceData, PrimitiveData, true /* WPO */);
SetVertexParameterAttributeData(VertexParameters, InputVert, InstanceDynamicData.LocalToTranslatedWorld, LocalToWorld);
#endif
}
#if NANITE_PIXEL_PROGRAMMABLE && NUM_TEX_COORD_INTERPOLATORS > 0
void GetCustomizedUVs(inout float2 OutCustomizedUVs[NUM_TEX_COORD_INTERPOLATORS])
{
GetMaterialCustomizedUVs(VertexParameters, OutCustomizedUVs);
GetCustomInterpolators(VertexParameters, OutCustomizedUVs);
}
#endif
float3 EvaluateWorldPositionOffset()
{
float3 WorldPositionOffset = 0.0f;
#if NANITE_VERTEX_PROGRAMMABLE
BRANCH
if ((PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_EVALUATE_WORLD_POSITION_OFFSET) != 0u)
{
#if ENABLE_NEW_HLSL_GENERATOR
EvaluateVertexMaterialAttributes(VertexParameters);
#endif
WorldPositionOffset = GetMaterialWorldPositionOffset(VertexParameters);
}
#endif
return WorldPositionOffset;
}
#if MATERIAL_SHADER_HAS_DISPLACEMENT
void InitDisplacement(FNaniteMaterialDisplacementParams InParams)
{
DisplacementParams = InParams;
#if USES_DISPLACEMENT
if (DisplacementParams.FadeSizeStop > 0.0f)
{
// Solve fade slope/offset relative to view depth
// NOTE: This isn't the same as fading linearly with projected depth, but makes EvaluateDomain cheaper
const float InstanceScale = InstanceData.NonUniformScale.w;
const float MaxDisplacement = GetAbsMaxMaterialDisplacement( PrimitiveData );
const float StartDepth = (MaxDisplacement * NaniteView.LODScale * InstanceScale) / DisplacementParams.FadeSizeStart;
const float StopDepth = (MaxDisplacement * NaniteView.LODScale * InstanceScale) / DisplacementParams.FadeSizeStop;
DisplacementFadeSlope = 1.0f / (StartDepth - StopDepth);
DisplacementFadeOffset = -StopDepth * DisplacementFadeSlope;
const bool bIsOrtho = NaniteView.ViewToClip[3][3] >= 1.0f;
if (bIsOrtho)
{
// Displacement fade is uniform at all depths, so just solve it now rather than branch/condmask below
DisplacementFadeOffset = saturate(DisplacementFadeSlope + DisplacementFadeOffset);
DisplacementFadeSlope = 0.0f;
}
}
else
#endif
{
DisplacementFadeSlope = 0.0f;
DisplacementFadeOffset = 1.0f;
}
}
bool ApplyFallbackDisplacement(inout FNanitePostDeformVertex InOutVert)
{
#if ENABLE_FALLBACK_DISPLACEMENT
#if FIXED_DISPLACEMENT_FALLBACK
// We know that all clusters rasterized by this bin are displacement fallbacks, so save the extra check
const bool bApplyDisplacement = true;
#else
const bool bApplyDisplacement = (VisibleCluster.Flags & NANITE_CULLING_FLAG_FALLBACK_RASTER) != 0 &&
(PrimitiveData.PixelProgrammableDistanceSquared > 0.0f ||
PrimitiveData.MaterialDisplacementFadeOutSize > 0.0f);
#endif // FIXED_DISPLACEMENT_FALLBACK
// For shadows in fixed function or fallback rasterization with WPO enabled, if we're rasterizing a cluster with
// displacement disabled, apply minimum displacement to avoid shadowing objects that have displacement in the main
// view (prevents false self-shadow artifacts)
if (bApplyDisplacement)
{
const float Displacement = -DisplacementParams.Center * DisplacementParams.Magnitude;
InOutVert.Position += InOutVert.TangentBasis.TangentZ * Displacement;
return true;
}
#endif // ENABLE_FALLBACK_DISPLACEMENT
return false;
}
#endif // MATERIAL_SHADER_HAS_DISPLACEMENT
#if MATERIAL_SHADER_HAS_DOMAIN
float4 EvaluateDomain( float4 UVDensities, FBarycentrics Barycentrics )
{
float3 PointPostDeform = Lerp(
TransformedTri.Verts[0].PointPostDeform,
TransformedTri.Verts[1].PointPostDeform,
TransformedTri.Verts[2].PointPostDeform,
Barycentrics ).Value;
float4 BaseClip = Lerp(
TransformedTri.Verts[0].PointClip,
TransformedTri.Verts[1].PointClip,
TransformedTri.Verts[2].PointClip,
Barycentrics ).Value;
// Hopefully never used and will be dead code eliminated. TODO VT feedback needs this right now :(
float4 SvPosition;
SvPosition.xyz = BaseClip.xyz / BaseClip.w;
SvPosition.xy = ( float2(0.5, -0.5) * SvPosition.xy + 0.5 ) * NaniteView.ViewSizeAndInvSize.xy + NaniteView.ViewRect.xy;
SvPosition.w = 1;
FVertexFactoryInterpolantsVSToPS Interpolants = (FVertexFactoryInterpolantsVSToPS)0;
FMaterialPixelParameters MaterialParameters = FetchNaniteMaterialPixelParameters( PrimitiveData, InstanceData, InstanceDynamicData, NaniteView, TransformedTri, Cluster, Barycentrics, Interpolants, SvPosition );
// Now we want to override UV derivatives to be based on uniform UV density so that there are no cracks
#if NUM_TEX_COORD_INTERPOLATORS > 0
for (uint TexCoordIndex = 0; TexCoordIndex < NUM_TEX_COORD_INTERPOLATORS; ++TexCoordIndex)
{
float2 Deriv = CalculateUVDerivativeForDomainPoint(
NaniteView,
BaseClip.w,
InstanceData.NonUniformScale.w,
UVDensities[TexCoordIndex] );
MaterialParameters.TexCoords_DDX[TexCoordIndex] = Deriv;
MaterialParameters.TexCoords_DDY[TexCoordIndex] = Deriv;
}
#endif
FPixelMaterialInputs PixelMaterialInputs;
CalcMaterialParametersEx(
MaterialParameters,
PixelMaterialInputs,
SvPosition,
MaterialParameters.ScreenPosition,
true, // bIsFrontFace
MaterialParameters.WorldPosition_CamRelative,
MaterialParameters.WorldPosition_NoOffsets_CamRelative );
#if USES_DISPLACEMENT
// Determine Displacement with a distance fade based on depth
float NormalizedDisplacement = GetMaterialDisplacement(PixelMaterialInputs);
float DisplacementFade = saturate(BaseClip.w * DisplacementFadeSlope + DisplacementFadeOffset);
#if DEPTH_ONLY // AKA Shadows
if (DisplacementFade < 1.0f)
{
// Always push to the minimum displacement (instead of 0 displacement) for shadows to prevent self-shadowing
// artifacts in the fade out region (more details above in ApplyFallbackDisplacement)
NormalizedDisplacement = 0.0f;
DisplacementFade = 1.0f;
}
#endif // DEPTH_ONLY
const float Displacement = (NormalizedDisplacement - DisplacementParams.Center) * DisplacementParams.Magnitude * DisplacementFade;
#if NANITE_TESSELLATION_NORMALIZE
float3 Normal = Lerp(
TransformedTri.Verts[0].NormalPostDeform,
TransformedTri.Verts[1].NormalPostDeform,
TransformedTri.Verts[2].NormalPostDeform,
Barycentrics ).Value;
Normal = normalize( Normal );
PointPostDeform += Normal * Displacement;
#else // !NANITE_TESSELLATION_NORMALIZE
float4 NormalClip = Lerp(
TransformedTri.Verts[0].NormalClip,
TransformedTri.Verts[1].NormalClip,
TransformedTri.Verts[2].NormalClip,
Barycentrics).Value;
float4 PointClip = BaseClip + NormalClip * Displacement;
#endif // NANITE_TESSELLATION_NORMALIZE
#endif // USES_DISPLACEMENT
#if !(USES_DISPLACEMENT && !NANITE_TESSELLATION_NORMALIZE)
float3 PointWorld = mul( float4( PointPostDeform, 1 ), InstanceDynamicData.LocalToTranslatedWorld ).xyz;
float4 PointClip = mul( float4( PointWorld, 1 ), NaniteView.TranslatedWorldToClip );
#endif
#if NANITE_PIXEL_PROGRAMMABLE && MATERIALBLENDING_MASKED
if( GetMaterialMask(PixelMaterialInputs) < 0.0 )
{
PointClip.z = -2.0;
PointClip.w = -1.0;
}
#endif
return PointClip;
}
#endif
bool EvaluatePixel( FBarycentrics Barycentrics, float4 SvPosition, bool bBackFace, inout FVisBufferPixel Pixel )
{
bool bValid = true;
#if NANITE_PIXEL_PROGRAMMABLE
FVertexFactoryInterpolantsVSToPS Interpolants = (FVertexFactoryInterpolantsVSToPS)0;
FMaterialPixelParameters MaterialParameters = FetchNaniteMaterialPixelParameters( PrimitiveData, InstanceData, InstanceDynamicData, NaniteView, TransformedTri, Cluster, Barycentrics, Interpolants, SvPosition );
MaterialParameters.TwoSidedSign = bBackFace ? 1.0f : -1.0f;
FPixelMaterialInputs PixelMaterialInputs;
CalcMaterialParameters(MaterialParameters, PixelMaterialInputs, SvPosition, true /*bIsFrontFace*/);
// NOTE: Disable PDO in shadow passes (it does undesirable things and has always been disabled in these passes in Unreal)
// PDO with tessellation would screw up barycentrics.
#if WANT_PIXEL_DEPTH_OFFSET && DEPTH_ONLY == 0 && !NANITE_TESSELLATION
ApplyPixelDepthOffsetToMaterialParameters(MaterialParameters, PixelMaterialInputs, Pixel.Depth);
#endif
#if MATERIALBLENDING_MASKED
bValid = GetMaterialMask(PixelMaterialInputs) >= 0.0;
#endif
#endif
return bValid;
}
};
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
groupshared uint GroupVsmPageTableCache[NANITE_VSM_PAGE_TABLE_CACHE_DIM * NANITE_VSM_PAGE_TABLE_CACHE_DIM];
void VsmPageTableStore(uint2 pPage, uint2 Coords)
{
uint pPagePacked = (pPage.y << 16) | pPage.x;
uint Index = Coords.y * NANITE_VSM_PAGE_TABLE_CACHE_DIM + Coords.x;
GroupVsmPageTableCache[Index] = pPagePacked;
}
uint2 VsmPageTableLoad(uint2 Coords)
{
uint Index = Coords.y * NANITE_VSM_PAGE_TABLE_CACHE_DIM + Coords.x;
uint pPagePacked = GroupVsmPageTableCache[Index];
return uint2(pPagePacked & 0xffff, pPagePacked >> 16);
}
void FetchAndCachePageTableEntry(FNaniteView NaniteView, uint2 vPageStart, uint2 vPageEnd, uint CacheIndex)
{
uint2 CacheCoords = uint2(CacheIndex & 0x7, CacheIndex >> 3);
if (all(vPageStart + CacheCoords <= vPageEnd))
{
FShadowPhysicalPage PhysicalPage = ShadowGetPhysicalPage( CalcPageOffset( NaniteView.TargetLayerIndex, NaniteView.TargetMipLevel, vPageStart + CacheCoords ) );
uint2 pPageAddress = PhysicalPage.bThisLODValidForRendering ? PhysicalPage.PhysicalAddress : 0xffff;
VsmPageTableStore(pPageAddress, CacheCoords);
}
}
struct FCachedPageTable
{
bool operator()( inout FVisBufferPixel Pixel )
{
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
uint2 pPage = VsmPageTableLoad(Pixel.Position / VSM_PAGE_SIZE);
if (pPage.x == 0xffff)
{
return false;
}
Pixel.PhysicalPosition.xy = pPage * VSM_PAGE_SIZE + (Pixel.Position & VSM_PAGE_SIZE_MASK);
#endif
return true;
}
};
#endif // VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
struct FFetchPageTable
{
uint MipLevel;
FVirtualSMLevelOffset LevelOffset;
bool operator()( inout FVisBufferPixel Pixel )
{
#if VIRTUAL_TEXTURE_TARGET
if( !VirtualToPhysicalTexelForRendering( LevelOffset, MipLevel, Pixel.Position, Pixel.PhysicalPosition.xy ) )
{
// Not committed or should not be rendered into
return false;
}
#endif
return true;
}
};
uint2 GetVisualizeValues(uint AddValue, uint SubPatch, uint MicroTri)
{
#if VISUALIZE
uint VisualizeValueMax = 0; // InterlockedMax64 using depth (value associated with surviving fragment)
uint VisualizeValueAdd = AddValue; // InterlockedAdd32 (value accumulated with every evaluated fragment)
#if NANITE_TESSELLATION
VisualizeValueMax = 3; // Software Tessellation
#elif SOFTWARE_RASTER
VisualizeValueMax = 2; // Software Raster
#else
VisualizeValueMax = 1; // Hardware Raster
#endif
VisualizeValueMax |= (SubPatch & 0xffu) << 8u;
VisualizeValueMax |= (MicroTri & 0xffu) << 16u;
return uint2(VisualizeValueMax, VisualizeValueAdd);
#else
return 0;
#endif
}
uint2 GetVisualizeValues()
{
return GetVisualizeValues(1u /* AddValue */, 0u /* SubPatch */, 0u /* MicroTri */);
}