704 lines
22 KiB
HLSL
704 lines
22 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "../Common.ush"
|
|
#include "../SceneData.ush"
|
|
#include "../WaveOpUtil.ush"
|
|
#include "NaniteRasterizer.ush"
|
|
#include "NaniteAttributeDecode.ush"
|
|
#include "../VirtualShadowMaps/VirtualShadowMapPageAccessCommon.ush"
|
|
#include "../VirtualShadowMaps/VirtualShadowMapPageOverlap.ush"
|
|
#include "NaniteWritePixel.ush"
|
|
#include "NaniteCullingCommon.ush"
|
|
|
|
// Material includes
|
|
#include "/Engine/Generated/Material.ush"
|
|
#include "NaniteVertexFactory.ush"
|
|
|
|
#if NANITE_TESSELLATION
|
|
#include "NaniteTessellation.ush"
|
|
#endif
|
|
|
|
#ifndef NANITE_TWO_SIDED
|
|
#define NANITE_TWO_SIDED 0
|
|
#endif
|
|
|
|
#define ENABLE_EARLY_Z_TEST (NANITE_PIXEL_PROGRAMMABLE)
|
|
|
|
// For shadows in displacement fallback raster bins, we need to apply minimum displacement on cluster triangles
|
|
#define ENABLE_FALLBACK_DISPLACEMENT (SHADOW_DEPTH_SHADER && ( \
|
|
FIXED_DISPLACEMENT_FALLBACK || \
|
|
(NANITE_VERTEX_PROGRAMMABLE && USES_DISPLACEMENT && !NANITE_TESSELLATION) \
|
|
))
|
|
|
|
#define MATERIAL_SHADER_HAS_DOMAIN (NANITE_TESSELLATION && (NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE))
|
|
#define MATERIAL_SHADER_HAS_DISPLACEMENT (ENABLE_FALLBACK_DISPLACEMENT || (MATERIAL_SHADER_HAS_DOMAIN && USES_DISPLACEMENT))
|
|
|
|
// State encompassing mapping a pixel position to memory address
|
|
struct FRaster
|
|
{
|
|
float2 ViewportScale;
|
|
float2 ViewportBias;
|
|
int4 ScissorRect;
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
uint2 pPage;
|
|
uint2 vPage;
|
|
float2 vTranslation;
|
|
bool bSinglePage;
|
|
uint ArrayIndex;
|
|
#endif
|
|
};
|
|
|
|
float4 CalculateSubpixelCoordinates( FRaster Raster, float4 PointClip )
|
|
{
|
|
float4 Subpixel = float4( PointClip.xyz, 1 ) / PointClip.w;
|
|
Subpixel.xy = Subpixel.xy * Raster.ViewportScale + Raster.ViewportBias;
|
|
Subpixel.xy = floor(Subpixel.xy);
|
|
return Subpixel;
|
|
}
|
|
|
|
float3 GetPerspectiveCorrectBarycentrics( float3 C, float3 InvW )
|
|
{
|
|
float3 CInvW = C * InvW; // Perspective weighting by (1/w0, 1/w1, 1/w2)
|
|
float3 UVW = CInvW * rcp(CInvW.x + CInvW.y + CInvW.z); // renormalize
|
|
|
|
return UVW;
|
|
}
|
|
|
|
FBarycentrics CalculateBarycentrics( FRasterTri Tri, float3 C, bool bPerspectiveCorrectDerivatives )
|
|
{
|
|
FBarycentrics Barycentrics = (FBarycentrics)0;
|
|
|
|
const float3 UVW = GetPerspectiveCorrectBarycentrics( C, Tri.InvW );
|
|
Barycentrics.Value = UVW;
|
|
|
|
BRANCH
|
|
if( bPerspectiveCorrectDerivatives )
|
|
{
|
|
const float3 OffsetX = { -Tri.Edge12.y, -Tri.Edge20.y, -Tri.Edge01.y };
|
|
const float3 OffsetY = { Tri.Edge12.x, Tri.Edge20.x, Tri.Edge01.x };
|
|
const float3 UVW_X = GetPerspectiveCorrectBarycentrics( C + OffsetX, Tri.InvW );
|
|
const float3 UVW_Y = GetPerspectiveCorrectBarycentrics( C + OffsetY, Tri.InvW );
|
|
Barycentrics.Value_dx = UVW_X - UVW;
|
|
Barycentrics.Value_dy = UVW_Y - UVW;
|
|
}
|
|
else
|
|
{
|
|
Barycentrics.Value_dx = Tri.Barycentrics_dx;
|
|
Barycentrics.Value_dy = Tri.Barycentrics_dy;
|
|
}
|
|
|
|
return Barycentrics;
|
|
}
|
|
|
|
int FindNthSetBit( uint Mask, int Index )
|
|
{
|
|
int Last = countbits( Mask ) - Index - 1;
|
|
|
|
uint p = 16;
|
|
p += countbits( Mask >> p ) <= Last ? -8 : 8;
|
|
p += countbits( Mask >> p ) <= Last ? -4 : 4;
|
|
p += countbits( Mask >> p ) <= Last ? -2 : 2;
|
|
p += countbits( Mask >> p ) <= Last ? -1 : 1;
|
|
p = countbits( Mask >> p ) == Last ? (p - 1) : p;
|
|
return p;
|
|
}
|
|
|
|
int FindNthSetBit( uint2 Mask, int Index )
|
|
{
|
|
int LowPop = countbits( Mask.x );
|
|
return FindNthSetBit( Index < LowPop ? Mask.x : Mask.y, Index < LowPop ? Index : Index - LowPop ) + ( Index < LowPop ? 0 : 32 );
|
|
}
|
|
|
|
/*int FindNthSetBit_Scalar( uint Mask, int Index )
|
|
{
|
|
return firstbitlow( WaveBallot( WavePrefixCountBits(x) == Index ) ) - 1;
|
|
return firstbitlow( WaveBallot( MaskedBitCount( Mask ) == Index ) ) - 1;
|
|
}*/
|
|
|
|
uint MaskedBitCount2( uint2 Bits, uint Index )
|
|
{
|
|
uint Mask = 1u << ( Index & 31 );
|
|
Mask -= 1;
|
|
|
|
uint A = Index < 32 ? Bits.x : Bits.y;
|
|
uint B = Index < 32 ? 0 : countbits( Bits.x );
|
|
|
|
return countbits( A & Mask ) + B;
|
|
}
|
|
|
|
groupshared uint GroupBaseVertIndex;
|
|
groupshared uint2 GroupUsedVertMask;
|
|
//groupshared uint GroupUniqueVertIndex[32];
|
|
|
|
void DeduplicateVertIndexes( uint3 VertIndexes, uint GroupThreadIndex, bool bTriValid, out uint OutNumUniqueVerts, out uint OutLaneVertIndex, out uint3 OutCornerLaneIndexes )
|
|
{
|
|
const uint LaneCount = WaveGetLaneCount();
|
|
|
|
// Calculate smallest active vertex
|
|
uint BaseVertIndex;
|
|
BRANCH
|
|
if( LaneCount < 32 )
|
|
{
|
|
if (GroupThreadIndex == 0)
|
|
{
|
|
GroupBaseVertIndex = 0xFFFFFFFFu;
|
|
GroupUsedVertMask = 0;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (bTriValid) WaveInterlockedMin(GroupBaseVertIndex, VertIndexes.x); // VertIndexes.x is always smallest
|
|
GroupMemoryBarrierWithGroupSync();
|
|
BaseVertIndex = GroupBaseVertIndex;
|
|
}
|
|
else
|
|
{
|
|
BaseVertIndex = WaveActiveMin(bTriValid ? VertIndexes.x : 0xFFFFFFFFu); // VertIndexes.x is always smallest
|
|
}
|
|
|
|
uint2 TriangleVertMask = 0;
|
|
if( bTriValid )
|
|
{
|
|
VertIndexes -= BaseVertIndex;
|
|
|
|
UNROLL
|
|
for (uint i = 0; i < 3; i++)
|
|
{
|
|
bool bDstLow = VertIndexes[i] < 32;
|
|
uint DstMask = 1u << ( VertIndexes[i] & 31 );
|
|
|
|
if( bDstLow )
|
|
TriangleVertMask.x |= DstMask;
|
|
else
|
|
TriangleVertMask.y |= DstMask;
|
|
}
|
|
}
|
|
|
|
uint2 UsedVertMask = uint2(0,0);
|
|
BRANCH
|
|
if( LaneCount < 32 )
|
|
{
|
|
WaveInterlockedOr(GroupUsedVertMask.x, UsedVertMask.x);
|
|
WaveInterlockedOr(GroupUsedVertMask.y, UsedVertMask.y);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
UsedVertMask = GroupUsedVertMask;
|
|
}
|
|
else
|
|
{
|
|
UsedVertMask.x = WaveActiveBitOr(TriangleVertMask.x);
|
|
UsedVertMask.y = WaveActiveBitOr(TriangleVertMask.y);
|
|
}
|
|
|
|
OutCornerLaneIndexes.x = MaskedBitCount(UsedVertMask, VertIndexes.x);
|
|
OutCornerLaneIndexes.y = MaskedBitCount(UsedVertMask, VertIndexes.y);
|
|
OutCornerLaneIndexes.z = MaskedBitCount(UsedVertMask, VertIndexes.z);
|
|
|
|
#if 0
|
|
bool2 bIsUsed = ( UsedVertMask & (1u << GroupThreadIndex) ) != 0u;
|
|
uint2 UsedPrefixSum =
|
|
{
|
|
MaskedBitCount( uint2( UsedVertMask.x, 0 ) ),
|
|
MaskedBitCount( uint2( UsedVertMask.y, 0 ) ) + countbits( UsedVertMask.x )
|
|
};
|
|
|
|
if( bIsUsed.x )
|
|
GroupUniqueVertIndex[ UsedPrefixSum.x ] = GroupThreadIndex;
|
|
if( bIsUsed.y && UsedPrefixSum.y < 32 )
|
|
GroupUniqueVertIndex[ UsedPrefixSum.y ] = GroupThreadIndex + 32;
|
|
|
|
OutLaneVertIndex = GroupUniqueVertIndex[ GroupThreadIndex ] + BaseVertIndex;
|
|
#else
|
|
OutLaneVertIndex = FindNthSetBit(UsedVertMask, GroupThreadIndex) + BaseVertIndex;
|
|
#endif
|
|
OutNumUniqueVerts = CountBits(UsedVertMask);
|
|
}
|
|
|
|
|
|
// Only works for WaveSize >= 32 and only efficient for == 32.
|
|
// A sliding window approach like this relies on the index buffer being constrained such that a triangle
|
|
// can't referance a vertex >= window size back from the largest seen index so far in the index buffer.
|
|
// In addition the WaveSize must be at least as large as the window so that vertices referenced by triangles
|
|
// in this wave must either be present in the cache from the previous iteration (Lane0 - Window) or in this iteration.
|
|
// Since we constrain to 32 when building the clusters the WaveSize that makes sense is the matching 32.
|
|
// Larger WaveSize could also work but the code would need to be changed.
|
|
template< typename FVertex >
|
|
struct TSlidingWindowVertexCache
|
|
{
|
|
FVertex CachedVert;
|
|
uint NumCached;
|
|
bool bFirstPass;
|
|
|
|
FVertex TriangleVerts[3];
|
|
bool TriangleVertsRead[3];
|
|
|
|
void Init()
|
|
{
|
|
NumCached = 0;
|
|
bFirstPass = true;
|
|
}
|
|
|
|
// Grab what's there for this triangle before updating cache. Otherwise cache would need to be double size.
|
|
uint PullExisting( uint3 VertIndexes )
|
|
{
|
|
BRANCH
|
|
if( !bFirstPass )
|
|
{
|
|
UNROLL
|
|
for( uint k = 0; k < 3; k++ )
|
|
{
|
|
TriangleVerts[k] = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
|
|
TriangleVertsRead[k] = VertIndexes[k] < NumCached;
|
|
}
|
|
}
|
|
|
|
uint MaxVertIndex = max( VertIndexes.y, VertIndexes.z );
|
|
uint WaveMaxVertIndex = WaveActiveMax( MaxVertIndex );
|
|
uint NewNumCached = min( WaveMaxVertIndex + 1, NumCached + 32 );
|
|
|
|
uint FirstUncachedLane = NumCached & 31;
|
|
uint LaneVertIndex = ( WaveGetLaneIndex() - FirstUncachedLane ) & 31;
|
|
LaneVertIndex += NumCached;
|
|
NumCached = NewNumCached;
|
|
|
|
return LaneVertIndex;
|
|
}
|
|
|
|
uint PullRemaining( uint3 VertIndexes )
|
|
{
|
|
UNROLL
|
|
for( uint k = 0; k < 3; k++ )
|
|
{
|
|
BRANCH
|
|
if( bFirstPass )
|
|
{
|
|
TriangleVerts[k] = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
|
|
}
|
|
else
|
|
{
|
|
FVertex TempVert = WaveReadLaneAt( CachedVert, VertIndexes[k] & 31 );
|
|
if( !TriangleVertsRead[k] )
|
|
TriangleVerts[k] = TempVert;
|
|
}
|
|
}
|
|
bFirstPass = false;
|
|
|
|
uint MaxVertIndex = max( VertIndexes.y, VertIndexes.z );
|
|
uint InvalidMask = WaveBallot( MaxVertIndex >= NumCached ).x;
|
|
uint NumTrisCached = InvalidMask ? firstbitlow( InvalidMask ) : 32;
|
|
return NumTrisCached;
|
|
}
|
|
};
|
|
|
|
|
|
struct FNullTranslation
|
|
{
|
|
bool operator()( inout FVisBufferPixel Pixel )
|
|
{
|
|
return true;
|
|
}
|
|
};
|
|
|
|
template< typename FSoftwareShader, typename FPageTranslation = FNullTranslation >
|
|
struct TNaniteWritePixel
|
|
{
|
|
FRaster Raster;
|
|
FSoftwareShader Shader;
|
|
uint PixelValue;
|
|
uint2 VisualizeValues;
|
|
FPageTranslation PageTranslation;
|
|
|
|
void operator()( uint2 PixelPos, float3 C, FRasterTri Tri )
|
|
{
|
|
float DeviceZ = Tri.DepthPlane.x + Tri.DepthPlane.y * C.y + Tri.DepthPlane.z * C.z;
|
|
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, PixelValue, DeviceZ );
|
|
|
|
#if VISUALIZE
|
|
Pixel.VisualizeValues = VisualizeValues;
|
|
#endif
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Pixel.PhysicalPosition.xy = Pixel.Position;
|
|
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
|
|
|
|
if( !PageTranslation( Pixel ) )
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
Pixel.WriteOverdraw();
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE && !NANITE_TESSELLATION
|
|
|
|
#if ENABLE_EARLY_Z_TEST
|
|
BRANCH
|
|
if( !Pixel.EarlyDepthTest() )
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
FBarycentrics Barycentrics = CalculateBarycentrics( Tri, C, false ); // Perspective correct derivatives are not worth the effort for masked/PDO.
|
|
|
|
float4 SvPosition = float4( Pixel.Position.xy + 0.5, Pixel.Depth, 1.0 );
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
// Translate it to virtual page
|
|
SvPosition.xy = SvPosition.xy - Raster.vTranslation;
|
|
#endif
|
|
|
|
BRANCH
|
|
if( !Shader.EvaluatePixel( Barycentrics, SvPosition, Tri.bBackFace, Pixel ) )
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
Pixel.Write();
|
|
}
|
|
};
|
|
|
|
|
|
struct FMaterialShader
|
|
{
|
|
FNaniteView NaniteView;
|
|
FPrimitiveSceneData PrimitiveData;
|
|
FInstanceSceneData InstanceData;
|
|
FInstanceDynamicData InstanceDynamicData;
|
|
FCluster Cluster;
|
|
FVisibleCluster VisibleCluster;
|
|
FNaniteVertTransforms VertTransforms;
|
|
FNaniteTransformedTri TransformedTri;
|
|
FMaterialVertexParameters VertexParameters;
|
|
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
FNaniteMaterialDisplacementParams DisplacementParams;
|
|
float DisplacementFadeSlope;
|
|
float DisplacementFadeOffset;
|
|
#endif
|
|
|
|
void InitVertexParameters(FNanitePostDeformVertex InputVert)
|
|
{
|
|
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
|
|
// Should be Pow2(InvScale) but that requires renormalization
|
|
float3x3 LocalToWorld = DFToFloat3x3(InstanceData.LocalToWorld);
|
|
float3 InvScale = InstanceData.InvNonUniformScale;
|
|
LocalToWorld[0] *= InvScale.x;
|
|
LocalToWorld[1] *= InvScale.y;
|
|
LocalToWorld[2] *= InvScale.z;
|
|
|
|
VertexParameters = MakeInitializedMaterialVertexParameters();
|
|
SetVertexParameterInstanceData(VertexParameters, InstanceData, PrimitiveData, true /* WPO */);
|
|
SetVertexParameterAttributeData(VertexParameters, InputVert, InstanceDynamicData.LocalToTranslatedWorld, LocalToWorld);
|
|
#endif
|
|
}
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE && NUM_TEX_COORD_INTERPOLATORS > 0
|
|
void GetCustomizedUVs(inout float2 OutCustomizedUVs[NUM_TEX_COORD_INTERPOLATORS])
|
|
{
|
|
GetMaterialCustomizedUVs(VertexParameters, OutCustomizedUVs);
|
|
GetCustomInterpolators(VertexParameters, OutCustomizedUVs);
|
|
}
|
|
#endif
|
|
|
|
float3 EvaluateWorldPositionOffset()
|
|
{
|
|
float3 WorldPositionOffset = 0.0f;
|
|
#if NANITE_VERTEX_PROGRAMMABLE
|
|
BRANCH
|
|
if ((PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_EVALUATE_WORLD_POSITION_OFFSET) != 0u)
|
|
{
|
|
#if ENABLE_NEW_HLSL_GENERATOR
|
|
EvaluateVertexMaterialAttributes(VertexParameters);
|
|
#endif
|
|
WorldPositionOffset = GetMaterialWorldPositionOffset(VertexParameters);
|
|
}
|
|
#endif
|
|
return WorldPositionOffset;
|
|
}
|
|
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
void InitDisplacement(FNaniteMaterialDisplacementParams InParams)
|
|
{
|
|
DisplacementParams = InParams;
|
|
|
|
#if USES_DISPLACEMENT
|
|
if (DisplacementParams.FadeSizeStop > 0.0f)
|
|
{
|
|
// Solve fade slope/offset relative to view depth
|
|
// NOTE: This isn't the same as fading linearly with projected depth, but makes EvaluateDomain cheaper
|
|
const float InstanceScale = InstanceData.NonUniformScale.w;
|
|
const float MaxDisplacement = GetAbsMaxMaterialDisplacement( PrimitiveData );
|
|
const float StartDepth = (MaxDisplacement * NaniteView.LODScale * InstanceScale) / DisplacementParams.FadeSizeStart;
|
|
const float StopDepth = (MaxDisplacement * NaniteView.LODScale * InstanceScale) / DisplacementParams.FadeSizeStop;
|
|
DisplacementFadeSlope = 1.0f / (StartDepth - StopDepth);
|
|
DisplacementFadeOffset = -StopDepth * DisplacementFadeSlope;
|
|
|
|
const bool bIsOrtho = NaniteView.ViewToClip[3][3] >= 1.0f;
|
|
if (bIsOrtho)
|
|
{
|
|
// Displacement fade is uniform at all depths, so just solve it now rather than branch/condmask below
|
|
DisplacementFadeOffset = saturate(DisplacementFadeSlope + DisplacementFadeOffset);
|
|
DisplacementFadeSlope = 0.0f;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
DisplacementFadeSlope = 0.0f;
|
|
DisplacementFadeOffset = 1.0f;
|
|
}
|
|
}
|
|
|
|
bool ApplyFallbackDisplacement(inout FNanitePostDeformVertex InOutVert)
|
|
{
|
|
#if ENABLE_FALLBACK_DISPLACEMENT
|
|
#if FIXED_DISPLACEMENT_FALLBACK
|
|
// We know that all clusters rasterized by this bin are displacement fallbacks, so save the extra check
|
|
const bool bApplyDisplacement = true;
|
|
#else
|
|
const bool bApplyDisplacement = (VisibleCluster.Flags & NANITE_CULLING_FLAG_FALLBACK_RASTER) != 0 &&
|
|
(PrimitiveData.PixelProgrammableDistanceSquared > 0.0f ||
|
|
PrimitiveData.MaterialDisplacementFadeOutSize > 0.0f);
|
|
#endif // FIXED_DISPLACEMENT_FALLBACK
|
|
|
|
// For shadows in fixed function or fallback rasterization with WPO enabled, if we're rasterizing a cluster with
|
|
// displacement disabled, apply minimum displacement to avoid shadowing objects that have displacement in the main
|
|
// view (prevents false self-shadow artifacts)
|
|
if (bApplyDisplacement)
|
|
{
|
|
const float Displacement = -DisplacementParams.Center * DisplacementParams.Magnitude;
|
|
InOutVert.Position += InOutVert.TangentBasis.TangentZ * Displacement;
|
|
return true;
|
|
}
|
|
#endif // ENABLE_FALLBACK_DISPLACEMENT
|
|
|
|
return false;
|
|
}
|
|
#endif // MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
|
|
#if MATERIAL_SHADER_HAS_DOMAIN
|
|
float4 EvaluateDomain( float4 UVDensities, FBarycentrics Barycentrics )
|
|
{
|
|
float3 PointPostDeform = Lerp(
|
|
TransformedTri.Verts[0].PointPostDeform,
|
|
TransformedTri.Verts[1].PointPostDeform,
|
|
TransformedTri.Verts[2].PointPostDeform,
|
|
Barycentrics ).Value;
|
|
|
|
float4 BaseClip = Lerp(
|
|
TransformedTri.Verts[0].PointClip,
|
|
TransformedTri.Verts[1].PointClip,
|
|
TransformedTri.Verts[2].PointClip,
|
|
Barycentrics ).Value;
|
|
|
|
// Hopefully never used and will be dead code eliminated. TODO VT feedback needs this right now :(
|
|
float4 SvPosition;
|
|
SvPosition.xyz = BaseClip.xyz / BaseClip.w;
|
|
SvPosition.xy = ( float2(0.5, -0.5) * SvPosition.xy + 0.5 ) * NaniteView.ViewSizeAndInvSize.xy + NaniteView.ViewRect.xy;
|
|
SvPosition.w = 1;
|
|
|
|
FVertexFactoryInterpolantsVSToPS Interpolants = (FVertexFactoryInterpolantsVSToPS)0;
|
|
FMaterialPixelParameters MaterialParameters = FetchNaniteMaterialPixelParameters( PrimitiveData, InstanceData, InstanceDynamicData, NaniteView, TransformedTri, Cluster, Barycentrics, Interpolants, SvPosition );
|
|
|
|
// Now we want to override UV derivatives to be based on uniform UV density so that there are no cracks
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 0
|
|
for (uint TexCoordIndex = 0; TexCoordIndex < NUM_TEX_COORD_INTERPOLATORS; ++TexCoordIndex)
|
|
{
|
|
float2 Deriv = CalculateUVDerivativeForDomainPoint(
|
|
NaniteView,
|
|
BaseClip.w,
|
|
InstanceData.NonUniformScale.w,
|
|
UVDensities[TexCoordIndex] );
|
|
MaterialParameters.TexCoords_DDX[TexCoordIndex] = Deriv;
|
|
MaterialParameters.TexCoords_DDY[TexCoordIndex] = Deriv;
|
|
}
|
|
#endif
|
|
|
|
FPixelMaterialInputs PixelMaterialInputs;
|
|
CalcMaterialParametersEx(
|
|
MaterialParameters,
|
|
PixelMaterialInputs,
|
|
SvPosition,
|
|
MaterialParameters.ScreenPosition,
|
|
true, // bIsFrontFace
|
|
MaterialParameters.WorldPosition_CamRelative,
|
|
MaterialParameters.WorldPosition_NoOffsets_CamRelative );
|
|
|
|
#if USES_DISPLACEMENT
|
|
// Determine Displacement with a distance fade based on depth
|
|
float NormalizedDisplacement = GetMaterialDisplacement(PixelMaterialInputs);
|
|
float DisplacementFade = saturate(BaseClip.w * DisplacementFadeSlope + DisplacementFadeOffset);
|
|
|
|
#if DEPTH_ONLY // AKA Shadows
|
|
if (DisplacementFade < 1.0f)
|
|
{
|
|
// Always push to the minimum displacement (instead of 0 displacement) for shadows to prevent self-shadowing
|
|
// artifacts in the fade out region (more details above in ApplyFallbackDisplacement)
|
|
NormalizedDisplacement = 0.0f;
|
|
DisplacementFade = 1.0f;
|
|
}
|
|
#endif // DEPTH_ONLY
|
|
|
|
const float Displacement = (NormalizedDisplacement - DisplacementParams.Center) * DisplacementParams.Magnitude * DisplacementFade;
|
|
|
|
#if NANITE_TESSELLATION_NORMALIZE
|
|
float3 Normal = Lerp(
|
|
TransformedTri.Verts[0].NormalPostDeform,
|
|
TransformedTri.Verts[1].NormalPostDeform,
|
|
TransformedTri.Verts[2].NormalPostDeform,
|
|
Barycentrics ).Value;
|
|
Normal = normalize( Normal );
|
|
|
|
PointPostDeform += Normal * Displacement;
|
|
#else // !NANITE_TESSELLATION_NORMALIZE
|
|
float4 NormalClip = Lerp(
|
|
TransformedTri.Verts[0].NormalClip,
|
|
TransformedTri.Verts[1].NormalClip,
|
|
TransformedTri.Verts[2].NormalClip,
|
|
Barycentrics).Value;
|
|
|
|
float4 PointClip = BaseClip + NormalClip * Displacement;
|
|
#endif // NANITE_TESSELLATION_NORMALIZE
|
|
#endif // USES_DISPLACEMENT
|
|
|
|
#if !(USES_DISPLACEMENT && !NANITE_TESSELLATION_NORMALIZE)
|
|
float3 PointWorld = mul( float4( PointPostDeform, 1 ), InstanceDynamicData.LocalToTranslatedWorld ).xyz;
|
|
float4 PointClip = mul( float4( PointWorld, 1 ), NaniteView.TranslatedWorldToClip );
|
|
#endif
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE && MATERIALBLENDING_MASKED
|
|
if( GetMaterialMask(PixelMaterialInputs) < 0.0 )
|
|
{
|
|
PointClip.z = -2.0;
|
|
PointClip.w = -1.0;
|
|
}
|
|
#endif
|
|
|
|
return PointClip;
|
|
}
|
|
#endif
|
|
|
|
bool EvaluatePixel( FBarycentrics Barycentrics, float4 SvPosition, bool bBackFace, inout FVisBufferPixel Pixel )
|
|
{
|
|
bool bValid = true;
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE
|
|
FVertexFactoryInterpolantsVSToPS Interpolants = (FVertexFactoryInterpolantsVSToPS)0;
|
|
|
|
FMaterialPixelParameters MaterialParameters = FetchNaniteMaterialPixelParameters( PrimitiveData, InstanceData, InstanceDynamicData, NaniteView, TransformedTri, Cluster, Barycentrics, Interpolants, SvPosition );
|
|
MaterialParameters.TwoSidedSign = bBackFace ? 1.0f : -1.0f;
|
|
|
|
FPixelMaterialInputs PixelMaterialInputs;
|
|
CalcMaterialParameters(MaterialParameters, PixelMaterialInputs, SvPosition, true /*bIsFrontFace*/);
|
|
|
|
// NOTE: Disable PDO in shadow passes (it does undesirable things and has always been disabled in these passes in Unreal)
|
|
// PDO with tessellation would screw up barycentrics.
|
|
#if WANT_PIXEL_DEPTH_OFFSET && DEPTH_ONLY == 0 && !NANITE_TESSELLATION
|
|
ApplyPixelDepthOffsetToMaterialParameters(MaterialParameters, PixelMaterialInputs, Pixel.Depth);
|
|
#endif
|
|
|
|
#if MATERIALBLENDING_MASKED
|
|
bValid = GetMaterialMask(PixelMaterialInputs) >= 0.0;
|
|
#endif
|
|
#endif
|
|
|
|
return bValid;
|
|
}
|
|
};
|
|
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
|
|
groupshared uint GroupVsmPageTableCache[NANITE_VSM_PAGE_TABLE_CACHE_DIM * NANITE_VSM_PAGE_TABLE_CACHE_DIM];
|
|
|
|
void VsmPageTableStore(uint2 pPage, uint2 Coords)
|
|
{
|
|
uint pPagePacked = (pPage.y << 16) | pPage.x;
|
|
uint Index = Coords.y * NANITE_VSM_PAGE_TABLE_CACHE_DIM + Coords.x;
|
|
GroupVsmPageTableCache[Index] = pPagePacked;
|
|
}
|
|
|
|
uint2 VsmPageTableLoad(uint2 Coords)
|
|
{
|
|
uint Index = Coords.y * NANITE_VSM_PAGE_TABLE_CACHE_DIM + Coords.x;
|
|
uint pPagePacked = GroupVsmPageTableCache[Index];
|
|
return uint2(pPagePacked & 0xffff, pPagePacked >> 16);
|
|
}
|
|
|
|
void FetchAndCachePageTableEntry(FNaniteView NaniteView, uint2 vPageStart, uint2 vPageEnd, uint CacheIndex)
|
|
{
|
|
uint2 CacheCoords = uint2(CacheIndex & 0x7, CacheIndex >> 3);
|
|
if (all(vPageStart + CacheCoords <= vPageEnd))
|
|
{
|
|
FShadowPhysicalPage PhysicalPage = ShadowGetPhysicalPage( CalcPageOffset( NaniteView.TargetLayerIndex, NaniteView.TargetMipLevel, vPageStart + CacheCoords ) );
|
|
uint2 pPageAddress = PhysicalPage.bThisLODValidForRendering ? PhysicalPage.PhysicalAddress : 0xffff;
|
|
VsmPageTableStore(pPageAddress, CacheCoords);
|
|
}
|
|
}
|
|
|
|
struct FCachedPageTable
|
|
{
|
|
bool operator()( inout FVisBufferPixel Pixel )
|
|
{
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
uint2 pPage = VsmPageTableLoad(Pixel.Position / VSM_PAGE_SIZE);
|
|
if (pPage.x == 0xffff)
|
|
{
|
|
return false;
|
|
}
|
|
Pixel.PhysicalPosition.xy = pPage * VSM_PAGE_SIZE + (Pixel.Position & VSM_PAGE_SIZE_MASK);
|
|
#endif
|
|
return true;
|
|
}
|
|
};
|
|
|
|
#endif // VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
|
|
struct FFetchPageTable
|
|
{
|
|
uint MipLevel;
|
|
FVirtualSMLevelOffset LevelOffset;
|
|
|
|
bool operator()( inout FVisBufferPixel Pixel )
|
|
{
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
if( !VirtualToPhysicalTexelForRendering( LevelOffset, MipLevel, Pixel.Position, Pixel.PhysicalPosition.xy ) )
|
|
{
|
|
// Not committed or should not be rendered into
|
|
return false;
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
};
|
|
|
|
uint2 GetVisualizeValues(uint AddValue, uint SubPatch, uint MicroTri)
|
|
{
|
|
#if VISUALIZE
|
|
uint VisualizeValueMax = 0; // InterlockedMax64 using depth (value associated with surviving fragment)
|
|
uint VisualizeValueAdd = AddValue; // InterlockedAdd32 (value accumulated with every evaluated fragment)
|
|
|
|
#if NANITE_TESSELLATION
|
|
VisualizeValueMax = 3; // Software Tessellation
|
|
#elif SOFTWARE_RASTER
|
|
VisualizeValueMax = 2; // Software Raster
|
|
#else
|
|
VisualizeValueMax = 1; // Hardware Raster
|
|
#endif
|
|
|
|
VisualizeValueMax |= (SubPatch & 0xffu) << 8u;
|
|
VisualizeValueMax |= (MicroTri & 0xffu) << 16u;
|
|
|
|
return uint2(VisualizeValueMax, VisualizeValueAdd);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
uint2 GetVisualizeValues()
|
|
{
|
|
return GetVisualizeValues(1u /* AddValue */, 0u /* SubPatch */, 0u /* MicroTri */);
|
|
}
|