2856 lines
99 KiB
HLSL
2856 lines
99 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
// This must be defined before including Common.ush (see GetShadowReplaceState)
|
|
#define SHADOW_DEPTH_SHADER DEPTH_ONLY
|
|
|
|
#define SPLIT_WORK_QUEUE NANITE_TESSELLATION // TODO: Remove once shader rewriter has been fixed (UE-202409)
|
|
|
|
#include "NaniteRasterizationCommon.ush"
|
|
#include "../VirtualShadowMaps/VirtualShadowMapPageAccessCommon.ush"
|
|
#include "../VirtualShadowMaps/VirtualShadowMapPageOverlap.ush"
|
|
#include "../MaterialCache/MaterialCacheCommon.ush"
|
|
#include "../ComputeShaderUtils.ush"
|
|
#include "../Random.ush"
|
|
#include "../Matrices.ush"
|
|
|
|
#if NANITE_TESSELLATION
|
|
#include "NaniteTessellation.ush"
|
|
#include "NaniteDice.ush"
|
|
#endif
|
|
|
|
#ifndef WORKGRAPH_NODE
|
|
#define WORKGRAPH_NODE 0
|
|
#endif
|
|
|
|
#if WORKGRAPH_NODE
|
|
#include "../ShaderBundleWorkGraphCommon.ush"
|
|
#endif
|
|
|
|
#define CONSTANT_DIR ( 0 && !VIRTUAL_TEXTURE_TARGET )
|
|
#define CONSTANT_DIR_RECT ( 1 && !VIRTUAL_TEXTURE_TARGET )
|
|
#define BRICK_TRACE_WORK_REDISTRIBUTION 1
|
|
#define BRICK_TRACE_TRANSPOSE 0
|
|
#define BRICK_TRACE_APPROXIMATE_DIVIDE 0 // Only good up to ~30x30px bricks
|
|
|
|
// Update this GUID to bump and recompile all Nanite rasterization material shaders
|
|
// Merge conflicts on this line should be resolved by generating a new GUID
|
|
#pragma message("UESHADERMETADATA_VERSION A6174FDD-04E8-4C49-A97C-18750449C462")
|
|
|
|
#if PIXELSHADER
|
|
ALLOW_NO_PS_EXPORT
|
|
#endif
|
|
|
|
#ifndef NANITE_MESH_SHADER
|
|
#define NANITE_MESH_SHADER 0
|
|
#endif
|
|
|
|
#ifndef NANITE_PRIM_SHADER
|
|
#define NANITE_PRIM_SHADER 0
|
|
#endif
|
|
|
|
#ifndef NANITE_VERT_REUSE_BATCH
|
|
#define NANITE_VERT_REUSE_BATCH 0
|
|
#endif
|
|
|
|
#ifndef NANITE_TWO_SIDED
|
|
#define NANITE_TWO_SIDED 0
|
|
#endif
|
|
|
|
#define NANITE_HW_RASTER_INTERPOLATE_DEPTH (DEPTH_ONLY)
|
|
|
|
#if NANITE_VERT_REUSE_BATCH || NANITE_VOXELS
|
|
#define THREADGROUP_SIZE 32
|
|
#else
|
|
#define THREADGROUP_SIZE 64
|
|
#endif
|
|
|
|
#if COMPUTESHADER && (NANITE_PIXEL_PROGRAMMABLE && !NANITE_TESSELLATION) || NANITE_VOXELS
|
|
MAX_OCCUPANCY
|
|
DISABLE_TARGET_OCCUPANCY_WARNING
|
|
#endif
|
|
|
|
#if COMPUTESHADER && (NANITE_PIXEL_PROGRAMMABLE || NANITE_TESSELLATION)
|
|
DISABLE_POTENTIALLY_UNINITIALIZED_WARNING
|
|
#endif
|
|
|
|
HOIST_DESCRIPTORS
|
|
|
|
#include "/Engine/Public/RootConstants.ush"
|
|
|
|
uint GetRasterBin() { return GetRootConstant0(); }
|
|
|
|
RWStructuredBuffer<FNaniteStats> OutStatsBuffer;
|
|
StructuredBuffer<FNaniteRasterBinMeta> RasterBinMeta;
|
|
StructuredBuffer<uint2> RasterBinData;
|
|
|
|
// .x = VisibleIndex
|
|
// .y = RangeStart
|
|
// .z = RangeEnd
|
|
// .w = MaterialFlags
|
|
uint4 FetchSWRasterBin(const uint ClusterIndex)
|
|
{
|
|
const uint RasterBinOffset = RasterBinMeta[GetRasterBin()].ClusterOffset;
|
|
const uint2 PackedData = RasterBinData[RasterBinOffset + ClusterIndex].xy;
|
|
const uint VisibleIndex = PackedData.x;
|
|
const uint RangeStart = PackedData.y >> 16u;
|
|
const uint RangeEnd = PackedData.y & 0xFFFFu;
|
|
return uint4(VisibleIndex, RangeStart, RangeEnd, RasterBinMeta[GetRasterBin()].MaterialFlags_DepthBlock & 0xFFFFu);
|
|
}
|
|
|
|
// .x = VisibleIndex
|
|
// .y = RangeStart
|
|
// .z = RangeEnd
|
|
// .w = MaterialFlags
|
|
uint4 FetchHWRasterBin(const uint ClusterIndex)
|
|
{
|
|
const uint RasterBinOffset = RasterBinMeta[GetRasterBin()].ClusterOffset;
|
|
const uint RasterBinCapacity = RasterBinMeta[GetRasterBin()].BinSWCount + RasterBinMeta[GetRasterBin()].BinHWCount;
|
|
const uint2 PackedData = RasterBinData[RasterBinOffset + ((RasterBinCapacity - 1) - ClusterIndex)].xy; // HW clusters are written from the top
|
|
const uint VisibleIndex = PackedData.x;
|
|
const uint RangeStart = PackedData.y >> 16u;
|
|
const uint RangeEnd = PackedData.y & 0xFFFFu;
|
|
return uint4(VisibleIndex, RangeStart, RangeEnd, RasterBinMeta[GetRasterBin()].MaterialFlags_DepthBlock & 0xFFFFu);
|
|
}
|
|
|
|
ViewState ResolveView(FNaniteView NaniteView)
|
|
{
|
|
ViewState Ret = ResolveView();
|
|
Ret.SVPositionToTranslatedWorld = NaniteView.SVPositionToTranslatedWorld;
|
|
Ret.ViewToTranslatedWorld = NaniteView.ViewToTranslatedWorld;
|
|
Ret.TranslatedWorldToView = NaniteView.TranslatedWorldToView;
|
|
Ret.TranslatedWorldToClip = NaniteView.TranslatedWorldToClip;
|
|
Ret.ViewToClip = NaniteView.ViewToClip;
|
|
Ret.ClipToWorld = NaniteView.ClipToWorld;
|
|
Ret.PrevTranslatedWorldToView = NaniteView.PrevTranslatedWorldToView;
|
|
Ret.PrevTranslatedWorldToClip = NaniteView.PrevTranslatedWorldToClip;
|
|
Ret.PrevViewToClip = NaniteView.PrevViewToClip;
|
|
Ret.PrevClipToWorld = NaniteView.PrevClipToWorld;
|
|
Ret.ViewRectMin = (float4)NaniteView.ViewRect;
|
|
Ret.ViewSizeAndInvSize = NaniteView.ViewSizeAndInvSize;
|
|
Ret.PreViewTranslation = NaniteView.PreViewTranslation;
|
|
Ret.PrevPreViewTranslation = NaniteView.PrevPreViewTranslation;
|
|
Ret.ViewForward = NaniteView.ViewForward;
|
|
Ret.ViewOriginHigh = NaniteView.ViewOriginHigh;
|
|
Ret.NearPlane = NaniteView.NearPlane;
|
|
|
|
// HACK: This fixes some material nodes for shadows, as shadow views borrow some view uniforms from the closest
|
|
// camera view, rather than exposing their own parameters.
|
|
Ret.WorldCameraOrigin = DFFastSubtract(NaniteView.CullingViewOriginTranslatedWorld, NaniteView.PreViewTranslation);
|
|
|
|
#if VIEW_HAS_TILEOFFSET_DATA
|
|
Ret.TileOffset.PreViewTranslation = DFToTileOffset(Ret.PreViewTranslation);
|
|
Ret.TileOffset.PrevPreViewTranslation = DFToTileOffset(Ret.PrevPreViewTranslation);
|
|
//Ret.TileOffset.WorldViewOrigin = DFToTileOffset(Ret.WorldViewOrigin);
|
|
//Ret.TileOffset.PrevWorldViewOrigin = DFToTileOffset(Ret.PrevWorldViewOrigin);
|
|
Ret.TileOffset.WorldCameraOrigin = DFToTileOffset(Ret.WorldCameraOrigin);
|
|
//Ret.TileOffset.PrevWorldCameraOrigin = DFToTileOffset(Ret.PrevWorldCameraOrigin);
|
|
#endif
|
|
|
|
return Ret;
|
|
}
|
|
|
|
// Default cull mode is CW. If this returns true, CCW culling is required
|
|
bool ReverseWindingOrder(FNaniteView NaniteView, FPrimitiveSceneData PrimitiveData, FInstanceSceneData InstanceData)
|
|
{
|
|
// Negative determinant sign for non uniform scale means that an odd number of components are negative, so
|
|
// we need to reverse the triangle winding order.
|
|
float DeterminantSign = InstanceData.DeterminantSign;
|
|
bool bReverseInstanceCull = (DeterminantSign < 0.0f);
|
|
|
|
#if SUPPORT_REVERSE_CULLING_IN_NANITE
|
|
if (PrimitiveData.Flags & PRIMITIVE_SCENE_DATA_FLAG_REVERSE_CULLING)
|
|
{
|
|
// reverse culling if the primitive has elected to do so
|
|
bReverseInstanceCull = !bReverseInstanceCull;
|
|
}
|
|
#endif
|
|
|
|
bool bViewReverseCull = (NaniteView.Flags & NANITE_VIEW_FLAG_REVERSE_CULLING);
|
|
|
|
// Logical XOR
|
|
return (bReverseInstanceCull != bViewReverseCull);
|
|
}
|
|
|
|
StructuredBuffer< uint2 > InTotalPrevDrawClusters;
|
|
Buffer<uint> InClusterOffsetSWHW;
|
|
|
|
struct FTriRange
|
|
{
|
|
uint Start;
|
|
uint Num;
|
|
};
|
|
|
|
FTriRange GetIndexAndTriRangeSW( inout uint VisibleIndex )
|
|
{
|
|
FTriRange Range = { 0, 0 };
|
|
|
|
uint4 RasterBin = FetchSWRasterBin(VisibleIndex);
|
|
VisibleIndex = RasterBin.x;
|
|
Range.Start = RasterBin.y;
|
|
Range.Num = RasterBin.z - RasterBin.y;
|
|
|
|
return Range;
|
|
}
|
|
|
|
FTriRange GetIndexAndTriRangeHW( inout uint VisibleIndex )
|
|
{
|
|
FTriRange Range = { 0, 0 };
|
|
|
|
uint4 RasterBin = FetchHWRasterBin(VisibleIndex);
|
|
VisibleIndex = RasterBin.x;
|
|
Range.Start = RasterBin.y;
|
|
Range.Num = RasterBin.z - RasterBin.y;
|
|
|
|
return Range;
|
|
}
|
|
|
|
FRaster CreateRaster( FNaniteView NaniteView, FVisibleCluster VisibleCluster )
|
|
{
|
|
FRaster Raster;
|
|
Raster.ScissorRect = NaniteView.ViewRect;
|
|
|
|
// DX11 spec
|
|
// x = (x + 1) * ViewSize.x * 0.5 + ViewRect.x;
|
|
// y = (1 - y) * ViewSize.y * 0.5 + ViewRect.y;
|
|
Raster.ViewportScale = float2(0.5, -0.5) * NaniteView.ViewSizeAndInvSize.xy;
|
|
Raster.ViewportBias = 0.5 * NaniteView.ViewSizeAndInvSize.xy + NaniteView.ViewRect.xy;
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
// Scalar
|
|
Raster.vPage = VisibleCluster.vPage;
|
|
Raster.pPage = 0;
|
|
Raster.bSinglePage = all( VisibleCluster.vPage == VisibleCluster.vPageEnd );
|
|
if (Raster.bSinglePage)
|
|
{
|
|
FShadowPhysicalPage PhysicalPage = ShadowGetPhysicalPage( CalcPageOffset( NaniteView.TargetLayerIndex, NaniteView.TargetMipLevel, Raster.vPage ) );
|
|
Raster.pPage = PhysicalPage.bThisLODValidForRendering ? PhysicalPage.PhysicalAddress : 0xffff;
|
|
}
|
|
|
|
// Virtual shadow maps can scatter instances into different physical pages for caching purposes
|
|
const bool bCacheAsStatic = (VisibleCluster.Flags & NANITE_CULLING_FLAG_CACHE_AS_STATIC) != 0u;
|
|
Raster.ArrayIndex = bCacheAsStatic ? GetVirtualShadowMapStaticArrayIndex() : 0;
|
|
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
#if NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
Raster.ScissorRect.xy = 0;
|
|
Raster.ScissorRect.zw = (VisibleCluster.vPageEnd - VisibleCluster.vPage) * VSM_PAGE_SIZE + VSM_PAGE_SIZE;
|
|
#else
|
|
Raster.vPage = 0;
|
|
Raster.ScissorRect.xy = VisibleCluster.vPage * VSM_PAGE_SIZE;
|
|
Raster.ScissorRect.zw = VisibleCluster.vPageEnd * VSM_PAGE_SIZE + VSM_PAGE_SIZE;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
Raster.ScissorRect.xy = Raster.pPage * VSM_PAGE_SIZE;
|
|
Raster.ScissorRect.zw = Raster.ScissorRect.xy + VSM_PAGE_SIZE;
|
|
}
|
|
|
|
Raster.vTranslation = ( (float2)Raster.pPage - (float2)Raster.vPage ) * VSM_PAGE_SIZE;
|
|
Raster.ViewportBias += Raster.vTranslation;
|
|
#endif
|
|
|
|
#if !NANITE_VOXELS
|
|
Raster.ViewportScale *= NANITE_SUBPIXEL_SAMPLES;
|
|
Raster.ViewportBias *= NANITE_SUBPIXEL_SAMPLES;
|
|
Raster.ViewportBias += 0.5f;
|
|
#endif
|
|
|
|
return Raster;
|
|
}
|
|
|
|
#if PATCHES
|
|
#define VERTEX_CACHE_SIZE 120 // (MaxTessFactor+1)*(MaxTessFactor+2)/2
|
|
#else
|
|
#define VERTEX_CACHE_SIZE 256
|
|
#endif
|
|
groupshared float3 GroupVerts[VERTEX_CACHE_SIZE];
|
|
|
|
struct FCachedVertex
|
|
{
|
|
FNaniteTransformedVert TransformedVert;
|
|
float4 PointSubpixelClip;
|
|
};
|
|
|
|
// 64 rolling window vertex cache for pixel programmable shaders.
|
|
// The expectation is that most materials will only require PointSubpixelClip and maybe 1/2 UV sets and the rest will be DCE'd
|
|
groupshared float3 VertexCache_PointLocal[64];
|
|
groupshared float3 VertexCache_PointPostDeform[64];
|
|
groupshared float3 VertexCache_PrevPointPostDeform[64];
|
|
groupshared float3 VertexCache_PointWorld[64];
|
|
groupshared float3 VertexCache_PointWorld_NoOffset[64];
|
|
groupshared float4 VertexCache_PointClip[64];
|
|
groupshared half3 VertexCache_NormalPostDeform[64];
|
|
groupshared float4 VertexCache_NormalClip[64];
|
|
groupshared half4 VertexCache_TangentXAndSignPostDeform[64];
|
|
groupshared half4 VertexCache_TangentXAndSign[64];
|
|
groupshared float3 VertexCache_TangentZ[64];
|
|
groupshared float4 VertexCache_Color[64];
|
|
groupshared float2 VertexCache_TexCoords0[64];
|
|
groupshared float2 VertexCache_TexCoords1[64];
|
|
groupshared float2 VertexCache_TexCoords2[64];
|
|
groupshared float2 VertexCache_TexCoords3[64];
|
|
groupshared float2 VertexCache_CustomizedUVs0[64];
|
|
groupshared float2 VertexCache_CustomizedUVs1[64];
|
|
groupshared float2 VertexCache_CustomizedUVs2[64];
|
|
groupshared float2 VertexCache_CustomizedUVs3[64];
|
|
groupshared float4 VertexCache_PointSubpixelClip[64];
|
|
|
|
HLSL_STATIC_ASSERT( sizeof( FCachedVertex ) == 220 + 8 * NUM_TEX_COORD_INTERPOLATORS, "Unexpected size of FCachedVertex. Update StoreVertexToLDS to reflect changes." );
|
|
void StoreVertexToLDS( uint VertexIndex, FCachedVertex Vertex )
|
|
{
|
|
const uint CacheIndex = VertexIndex & 63u;
|
|
|
|
VertexCache_PointLocal[CacheIndex] = Vertex.TransformedVert.PointLocal;
|
|
VertexCache_PointPostDeform[CacheIndex] = Vertex.TransformedVert.PointPostDeform;
|
|
VertexCache_PrevPointPostDeform[CacheIndex] = Vertex.TransformedVert.PrevPointPostDeform;
|
|
VertexCache_PointWorld[CacheIndex] = Vertex.TransformedVert.PointWorld;
|
|
VertexCache_PointWorld_NoOffset[CacheIndex] = Vertex.TransformedVert.PointWorld_NoOffset;
|
|
VertexCache_PointClip[CacheIndex] = Vertex.TransformedVert.PointClip;
|
|
VertexCache_NormalClip[CacheIndex] = Vertex.TransformedVert.NormalClip;
|
|
VertexCache_NormalPostDeform[CacheIndex] = Vertex.TransformedVert.TangentBasis.TangentZ;
|
|
VertexCache_TangentXAndSignPostDeform[CacheIndex] = Vertex.TransformedVert.TangentBasis.TangentXAndSign;
|
|
VertexCache_TangentXAndSign[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TangentXAndSign;
|
|
VertexCache_TangentZ[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TangentZ;
|
|
VertexCache_Color[CacheIndex] = Vertex.TransformedVert.RawAttributeData.Color;
|
|
VertexCache_TexCoords0[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TexCoords[0];
|
|
VertexCache_TexCoords1[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TexCoords[1];
|
|
VertexCache_TexCoords2[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TexCoords[2];
|
|
VertexCache_TexCoords3[CacheIndex] = Vertex.TransformedVert.RawAttributeData.TexCoords[3];
|
|
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 0
|
|
VertexCache_CustomizedUVs0[CacheIndex] = Vertex.TransformedVert.CustomizedUVs[0];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 1
|
|
VertexCache_CustomizedUVs1[CacheIndex] = Vertex.TransformedVert.CustomizedUVs[1];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 2
|
|
VertexCache_CustomizedUVs2[CacheIndex] = Vertex.TransformedVert.CustomizedUVs[2];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 3
|
|
VertexCache_CustomizedUVs3[CacheIndex] = Vertex.TransformedVert.CustomizedUVs[3];
|
|
#endif
|
|
|
|
VertexCache_PointSubpixelClip[CacheIndex] = Vertex.PointSubpixelClip;
|
|
}
|
|
|
|
HLSL_STATIC_ASSERT( sizeof( FCachedVertex ) == 220 + 8 * NUM_TEX_COORD_INTERPOLATORS, "Unexpected size of FCachedVertex. Update LoadVertexFromLDS to reflect changes." );
|
|
FCachedVertex LoadVertexFromLDS( uint VertexIndex )
|
|
{
|
|
const uint CacheIndex = VertexIndex & 63u;
|
|
|
|
FCachedVertex Result;
|
|
Result.TransformedVert.VertIndex = VertexIndex;
|
|
Result.TransformedVert.PointLocal = VertexCache_PointLocal[CacheIndex];
|
|
Result.TransformedVert.PointPostDeform = VertexCache_PointPostDeform[CacheIndex];
|
|
Result.TransformedVert.PrevPointPostDeform = VertexCache_PrevPointPostDeform[CacheIndex];
|
|
Result.TransformedVert.PointWorld = VertexCache_PointWorld[CacheIndex];
|
|
Result.TransformedVert.PointWorld_NoOffset = VertexCache_PointWorld_NoOffset[CacheIndex];
|
|
Result.TransformedVert.PointClip = VertexCache_PointClip[CacheIndex];
|
|
Result.TransformedVert.NormalClip = VertexCache_NormalClip[CacheIndex];
|
|
Result.TransformedVert.TangentBasis.TangentZ = VertexCache_NormalPostDeform[CacheIndex];
|
|
Result.TransformedVert.TangentBasis.TangentXAndSign = VertexCache_TangentXAndSignPostDeform[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TangentXAndSign = VertexCache_TangentXAndSign[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TangentZ = VertexCache_TangentZ[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.Color = VertexCache_Color[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TexCoords[0] = VertexCache_TexCoords0[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TexCoords[1] = VertexCache_TexCoords1[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TexCoords[2] = VertexCache_TexCoords2[CacheIndex];
|
|
Result.TransformedVert.RawAttributeData.TexCoords[3] = VertexCache_TexCoords3[CacheIndex];
|
|
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 0
|
|
Result.TransformedVert.CustomizedUVs[0] = VertexCache_CustomizedUVs0[CacheIndex];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 1
|
|
Result.TransformedVert.CustomizedUVs[1] = VertexCache_CustomizedUVs1[CacheIndex];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 2
|
|
Result.TransformedVert.CustomizedUVs[2] = VertexCache_CustomizedUVs2[CacheIndex];
|
|
#endif
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 3
|
|
Result.TransformedVert.CustomizedUVs[3] = VertexCache_CustomizedUVs3[CacheIndex];
|
|
#endif
|
|
|
|
Result.PointSubpixelClip = VertexCache_PointSubpixelClip[CacheIndex];
|
|
|
|
return Result;
|
|
}
|
|
|
|
void ClusterRasterize( uint VisibleIndex, uint GroupThreadIndex )
|
|
{
|
|
FTriRange TriRange = GetIndexAndTriRangeSW( VisibleIndex );
|
|
|
|
// Should be all scalar.
|
|
FVisibleCluster VisibleCluster = GetVisibleCluster( VisibleIndex, VIRTUAL_TEXTURE_TARGET );
|
|
|
|
FPrimitiveSceneData PrimitiveData;
|
|
FInstanceSceneData InstanceData;
|
|
GetNaniteMaterialSceneData(VisibleCluster, PrimitiveData, InstanceData);
|
|
|
|
FNaniteView NaniteView = GetNaniteView( VisibleCluster.ViewId );
|
|
#if ALWAYS_EVALUATE_WORLD_POSITION_OFFSET
|
|
const bool bEvaluateWPO = true;
|
|
#else
|
|
const bool bEvaluateWPO = (VisibleCluster.Flags & NANITE_CULLING_FLAG_ENABLE_WPO) != 0;
|
|
#endif
|
|
const bool bReverseWindingOrder = ReverseWindingOrder(NaniteView, PrimitiveData, InstanceData);
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
FInstanceDynamicData InstanceDynamicData = CalculateInstanceDynamicData(NaniteView, InstanceData);
|
|
|
|
FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
if( TriRange.Num == 0 )
|
|
TriRange.Num = Cluster.NumTris;
|
|
|
|
FMaterialShader MaterialShader;
|
|
MaterialShader.PrimitiveData = PrimitiveData;
|
|
MaterialShader.InstanceData = InstanceData;
|
|
MaterialShader.InstanceDynamicData = InstanceDynamicData;
|
|
MaterialShader.NaniteView = NaniteView;
|
|
MaterialShader.Cluster = Cluster;
|
|
MaterialShader.VisibleCluster = VisibleCluster;
|
|
MaterialShader.VertTransforms = CalculateNaniteVertexTransforms( InstanceData, InstanceDynamicData, NaniteView );
|
|
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
MaterialShader.InitDisplacement(RasterBinMeta[GetRasterBin()].MaterialDisplacementParams);
|
|
#endif
|
|
|
|
FRaster Raster = CreateRaster( NaniteView, VisibleCluster );
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
UNROLL
|
|
for (uint Offset = 0; Offset < NANITE_VSM_PAGE_TABLE_CACHE_DIM * NANITE_VSM_PAGE_TABLE_CACHE_DIM; Offset += THREADGROUP_SIZE)
|
|
{
|
|
FetchAndCachePageTableEntry(NaniteView, VisibleCluster.vPage, VisibleCluster.vPageEnd, Offset + GroupThreadIndex);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#endif
|
|
|
|
#if NANITE_TESSELLATION
|
|
float LowTessDistance = 0.0f;
|
|
#if USES_DISPLACEMENT
|
|
LowTessDistance = CalcDisplacementLowTessDistance(PrimitiveData, InstanceData, NaniteView);
|
|
#endif
|
|
|
|
uint TriIndex = TriRange.Start + GroupThreadIndex;
|
|
bool bTriValid = GroupThreadIndex < TriRange.Num;
|
|
|
|
uint3 VertIndexes = 0;
|
|
if( bTriValid )
|
|
{
|
|
VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
}
|
|
|
|
uint NumUniqueVerts;
|
|
uint LaneVertIndex;
|
|
uint3 VertLaneIndexes;
|
|
DeduplicateVertIndexes( VertIndexes, GroupThreadIndex, bTriValid, NumUniqueVerts, LaneVertIndex, VertLaneIndexes );
|
|
|
|
FNaniteTransformedVert Vert;
|
|
float3 PointView;
|
|
|
|
if (GroupThreadIndex < NumUniqueVerts)
|
|
{
|
|
Vert = FetchTransformedNaniteVertex( PrimitiveData, InstanceData, GetInstanceViewData(InstanceData.InstanceId, NaniteView.SceneRendererPrimaryViewId), MaterialShader.VertTransforms, Cluster, VisibleCluster, LaneVertIndex, bEvaluateWPO );
|
|
PointView = mul( float4( Vert.PointWorld, 1 ), NaniteView.TranslatedWorldToView ).xyz;
|
|
}
|
|
|
|
float3 TriPointView[3];
|
|
TriPointView[0] = WaveReadLaneAt( PointView, VertLaneIndexes[0] );
|
|
TriPointView[1] = WaveReadLaneAt( PointView, VertLaneIndexes[1] );
|
|
TriPointView[2] = WaveReadLaneAt( PointView, VertLaneIndexes[2] );
|
|
|
|
float3 TessFactors = GetTessFactors( NaniteView, TriPointView, LowTessDistance );
|
|
|
|
const uint ImmediateSplitLimit = 8;
|
|
|
|
bool bCanDice = max3( TessFactors.x, TessFactors.y, TessFactors.z ) <= NANITE_TESSELLATION_TABLE_IMMEDIATE_SIZE;
|
|
|
|
if( WaveActiveAnyTrue( bCanDice ) )
|
|
{
|
|
FDiceTask DiceTask;
|
|
DiceTask.Raster = Raster;
|
|
DiceTask.Shader = MaterialShader;
|
|
DiceTask.PixelValue = ( VisibleIndex + 1 ) << 7;
|
|
DiceTask.VisualizeValues = GetVisualizeValues();
|
|
DiceTask.UVDensities = GetMaterialUVDensities( Cluster, InstanceData.PrimitiveId, TriRange.Start );
|
|
DiceTask.bReverseWinding = bReverseWindingOrder;
|
|
DiceTask.Vert = Vert;
|
|
|
|
DiceTask.CacheToLDS();
|
|
|
|
uint NumVerts = 0;
|
|
uint NumTris = 0;
|
|
if( bTriValid && bCanDice )
|
|
{
|
|
DiceTask.Init( TessFactors, VertLaneIndexes, TriIndex );
|
|
NumVerts = DiceTask.TessellatedPatch.GetNumVerts();
|
|
NumTris = DiceTask.TessellatedPatch.GetNumTris();
|
|
}
|
|
|
|
BRANCH
|
|
if ((RenderFlags & NANITE_RENDER_FLAG_WRITE_STATS) != 0u)
|
|
{
|
|
WaveInterlockedAdd(OutStatsBuffer[0].NumDicedTrianglesClusters, NumTris);
|
|
WaveInterlockedAddScalar(OutStatsBuffer[0].NumImmediatePatches, 1);
|
|
}
|
|
|
|
DistributeWork( DiceTask, GroupThreadIndex, NumTris );
|
|
}
|
|
|
|
if( VIRTUAL_TEXTURE_TARGET == 0 )
|
|
{
|
|
FClusterSplitTask SplitTask;
|
|
|
|
uint NumVerts = 0;
|
|
uint NumTris = 0;
|
|
if( bTriValid && !bCanDice )
|
|
{
|
|
float3 SplitFactors = min( GetSplitFactors( TessFactors ), ImmediateSplitLimit );
|
|
|
|
SplitTask.Init( SplitFactors, VisibleIndex, TriIndex );
|
|
NumVerts = SplitTask.TessellatedPatch.GetNumVerts();
|
|
NumTris = SplitTask.TessellatedPatch.GetNumTris();
|
|
}
|
|
|
|
DistributeWork( SplitTask, GroupThreadIndex, NumTris );
|
|
}
|
|
else if( bTriValid && !bCanDice )
|
|
{
|
|
uint WriteOffset = SplitWorkQueue.Add();
|
|
if( WriteOffset < SplitWorkQueue.Size )
|
|
{
|
|
uint4 Encoded;
|
|
Encoded.x = ( VisibleIndex << 7 ) | TriIndex;
|
|
Encoded.y = BarycentricMax;
|
|
Encoded.z = BarycentricMax << 16;
|
|
Encoded.w = 0;
|
|
|
|
checkSlow(
|
|
Encoded.x != ~0u &&
|
|
Encoded.y != ~0u &&
|
|
Encoded.z != ~0u &&
|
|
Encoded.w != ~0u );
|
|
|
|
SplitWorkQueue.DataBuffer_Store4( WriteOffset * 16, Encoded );
|
|
}
|
|
}
|
|
|
|
#elif NANITE_PIXEL_PROGRAMMABLE
|
|
|
|
// We can assume wave size >= 32 here as we force HW raster for hardware that can use smaller wave sizes
|
|
|
|
FCachedVertex TriangleVerts[3];
|
|
FNaniteTransformedVert CachedTransformedVerts[2];
|
|
|
|
// TODO: DXC doesn't manage to strip all the unused groupshared arrays, which is very bad for performance.
|
|
// When manually stripped, the groupshared version is faster, so we should revisit once this has been fixed.
|
|
const bool bGroupsharedCache = !COMPILER_DXC;
|
|
|
|
uint NumCachedVerts = 0;
|
|
for( uint FirstTriIndex = 0; FirstTriIndex < TriRange.Num; FirstTriIndex += 32 )
|
|
{
|
|
const uint LocalTriIndex = FirstTriIndex + GroupThreadIndex;
|
|
const uint TriIndex = TriRange.Start + LocalTriIndex;
|
|
const bool bTriValid = LocalTriIndex < TriRange.Num;
|
|
|
|
uint3 VertIndexes = 0;
|
|
if( bTriValid )
|
|
{
|
|
VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
}
|
|
|
|
UNROLL
|
|
for( uint k = 0; k < 3; k++ )
|
|
{
|
|
const uint Index = VertIndexes[k];
|
|
|
|
BRANCH
|
|
if( bGroupsharedCache )
|
|
{
|
|
TriangleVerts[k] = LoadVertexFromLDS( Index );
|
|
}
|
|
else
|
|
{
|
|
const FNaniteTransformedVert A = WaveReadLaneAt( CachedTransformedVerts[0], Index & 31 );
|
|
const FNaniteTransformedVert B = WaveReadLaneAt( CachedTransformedVerts[1], Index & 31 );
|
|
|
|
FCachedVertex Vert;
|
|
if( (Index - NumCachedVerts ) & 32 )
|
|
Vert.TransformedVert = A;
|
|
else
|
|
Vert.TransformedVert = B;
|
|
|
|
Vert.PointSubpixelClip = VertexCache_PointSubpixelClip[Index & 63];
|
|
|
|
TriangleVerts[k] = Vert;
|
|
}
|
|
}
|
|
|
|
const uint MaxVertIndex = max( VertIndexes.y, VertIndexes.z );
|
|
|
|
while( WaveActiveAnyTrue( MaxVertIndex >= NumCachedVerts ) )
|
|
{
|
|
// Transform and store next batch of vertices
|
|
{
|
|
const uint LaneVertIndex = NumCachedVerts + GroupThreadIndex;
|
|
|
|
FCachedVertex Vert;
|
|
|
|
BRANCH
|
|
if( LaneVertIndex < Cluster.NumVerts ) // Ideally, we would be testing against the number of verts for the range, not the whole cluster.
|
|
{
|
|
Vert.TransformedVert = FetchTransformedNaniteVertex( PrimitiveData, InstanceData, GetInstanceViewData(InstanceData.InstanceId, NaniteView.SceneRendererPrimaryViewId), MaterialShader.VertTransforms, Cluster, VisibleCluster, LaneVertIndex, bEvaluateWPO );
|
|
Vert.PointSubpixelClip = CalculateSubpixelCoordinates( Raster, Vert.TransformedVert.PointClip );
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
BRANCH
|
|
if( bGroupsharedCache )
|
|
{
|
|
StoreVertexToLDS( LaneVertIndex, Vert );
|
|
}
|
|
else
|
|
{
|
|
CachedTransformedVerts[1] = CachedTransformedVerts[0];
|
|
CachedTransformedVerts[0] = Vert.TransformedVert;
|
|
|
|
VertexCache_PointSubpixelClip[LaneVertIndex & 63] = Vert.PointSubpixelClip;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
UNROLL
|
|
for( uint k = 0; k < 3; k++ )
|
|
{
|
|
const uint Index = VertIndexes[k];
|
|
|
|
FCachedVertex Vert;
|
|
if( bGroupsharedCache )
|
|
{
|
|
Vert = LoadVertexFromLDS( Index );
|
|
}
|
|
else
|
|
{
|
|
Vert.TransformedVert = WaveReadLaneAt( CachedTransformedVerts[0], Index & 31 ); // After refill any new vertex will be in CachedVertex[0]
|
|
Vert.PointSubpixelClip = VertexCache_PointSubpixelClip[Index & 63];
|
|
}
|
|
|
|
if( Index >= NumCachedVerts )
|
|
TriangleVerts[k] = Vert;
|
|
}
|
|
|
|
NumCachedVerts += 32;
|
|
}
|
|
|
|
float4 Verts[3];
|
|
UNROLL
|
|
for( uint k = 0; k < 3; k++ )
|
|
{
|
|
MaterialShader.TransformedTri.Verts[k] = TriangleVerts[k].TransformedVert;
|
|
Verts[k] = TriangleVerts[k].PointSubpixelClip;
|
|
}
|
|
|
|
FRasterTri Tri = SetupTriangle< NANITE_SUBPIXEL_SAMPLES, !NANITE_TWO_SIDED >( Raster.ScissorRect, Verts );
|
|
|
|
if( Tri.bIsValid && bTriValid )
|
|
{
|
|
uint PixelValue = (VisibleIndex + 1) << 7;
|
|
PixelValue |= TriIndex;
|
|
|
|
uint2 VisualizeValues = GetVisualizeValues();
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
// @lh-todo: Explicitly initialize structs with empty struct fields until DXC/SPIR-V can handle it properly
|
|
TNaniteWritePixel< FMaterialShader, FCachedPageTable > NaniteWritePixel;
|
|
NaniteWritePixel.Raster = Raster;
|
|
NaniteWritePixel.Shader = MaterialShader;
|
|
NaniteWritePixel.PixelValue = PixelValue;
|
|
NaniteWritePixel.VisualizeValues = VisualizeValues;
|
|
RasterizeTri_Adaptive( Tri, NaniteWritePixel );
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
// @lh-todo: Explicitly initialize structs with empty struct fields until DXC/SPIR-V can handle it properly
|
|
TNaniteWritePixel< FMaterialShader > NaniteWritePixel;
|
|
NaniteWritePixel.Raster = Raster;
|
|
NaniteWritePixel.Shader = MaterialShader;
|
|
NaniteWritePixel.PixelValue = PixelValue;
|
|
NaniteWritePixel.VisualizeValues = VisualizeValues;
|
|
RasterizeTri_Adaptive( Tri, NaniteWritePixel );
|
|
}
|
|
}
|
|
}
|
|
|
|
#else
|
|
UNROLL
|
|
for( uint i = 0; i < VERTEX_CACHE_SIZE; i += THREADGROUP_SIZE )
|
|
{
|
|
const uint VertIndex = GroupThreadIndex + i;
|
|
|
|
BRANCH
|
|
if (VertIndex >= Cluster.NumVerts)
|
|
break;
|
|
|
|
// Transform vertex and store in group shared memory.
|
|
FNanitePostDeformVertex InputVert = FetchAndDeformLocalNaniteVertex(PrimitiveData, InstanceData, GetInstanceViewData(InstanceData.InstanceId, NaniteView.SceneRendererPrimaryViewId), Cluster, VisibleCluster, VertIndex, NANITE_NUM_TEXCOORDS_TO_DECODE);
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
MaterialShader.ApplyFallbackDisplacement(InputVert);
|
|
#endif
|
|
|
|
float3 WorldPositionOffset = 0.0f;
|
|
#if NANITE_VERTEX_PROGRAMMABLE
|
|
BRANCH
|
|
if (bEvaluateWPO)
|
|
{
|
|
MaterialShader.InitVertexParameters(InputVert);
|
|
WorldPositionOffset = MaterialShader.EvaluateWorldPositionOffset();
|
|
}
|
|
#endif
|
|
|
|
const float3 PointTranslatedWorld = mul( float4( InputVert.Position, 1 ), InstanceDynamicData.LocalToTranslatedWorld ).xyz + WorldPositionOffset;
|
|
const float4 PointClip = mul( float4( PointTranslatedWorld, 1 ), NaniteView.TranslatedWorldToClip );
|
|
|
|
GroupVerts[VertIndex] = CalculateSubpixelCoordinates( Raster, PointClip ).xyz;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
UNROLL
|
|
for( uint j = 0; j < NANITE_MAX_CLUSTER_TRIANGLES; j += THREADGROUP_SIZE )
|
|
{
|
|
const uint ThreadIndex = GroupThreadIndex + j;
|
|
const uint TriIndex = ThreadIndex + TriRange.Start;
|
|
|
|
uint3 VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
|
|
float4 Verts[3];
|
|
Verts[0] = float4( GroupVerts[ VertIndexes.x ], 1 );
|
|
Verts[1] = float4( GroupVerts[ VertIndexes.y ], 1 );
|
|
Verts[2] = float4( GroupVerts[ VertIndexes.z ], 1 );
|
|
|
|
BRANCH
|
|
if (ThreadIndex >= TriRange.Num)
|
|
break;
|
|
|
|
FRasterTri Tri = SetupTriangle< NANITE_SUBPIXEL_SAMPLES, !NANITE_TWO_SIDED >( Raster.ScissorRect, Verts );
|
|
|
|
if( Tri.bIsValid )
|
|
{
|
|
uint PixelValue = (VisibleIndex + 1) << 7;
|
|
PixelValue |= TriIndex;
|
|
|
|
uint2 VisualizeValues = GetVisualizeValues();
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
// @lh-todo: Explicitly initialize structs with empty struct fields until DXC/SPIR-V can handle it properly
|
|
TNaniteWritePixel< FMaterialShader, FCachedPageTable > NaniteWritePixel;
|
|
NaniteWritePixel.Raster = Raster;
|
|
NaniteWritePixel.Shader = MaterialShader;
|
|
NaniteWritePixel.PixelValue = PixelValue;
|
|
NaniteWritePixel.VisualizeValues = VisualizeValues;
|
|
RasterizeTri_Adaptive( Tri, NaniteWritePixel );
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
// @lh-todo: Explicitly initialize structs with empty struct fields until DXC/SPIR-V can handle it properly
|
|
TNaniteWritePixel< FMaterialShader > NaniteWritePixel;
|
|
NaniteWritePixel.Raster = Raster;
|
|
NaniteWritePixel.Shader = MaterialShader;
|
|
NaniteWritePixel.PixelValue = PixelValue;
|
|
NaniteWritePixel.VisualizeValues = VisualizeValues;
|
|
RasterizeTri_Adaptive( Tri, NaniteWritePixel );
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
|
{
|
|
#if NANITE_TESSELLATION
|
|
if(GroupThreadIndex >= WaveGetLaneCount()) // Workaround for wave sizes smaller than 32
|
|
{
|
|
return;
|
|
}
|
|
const uint ThreadGroupSize = min(THREADGROUP_SIZE, WaveGetLaneCount());
|
|
|
|
const uint TotalPatches = RasterBinMeta[GetRasterBin()].BinSWCount;
|
|
|
|
const uint PatchStartIndex = min(GroupID * MaxPatchesPerGroup, TotalPatches);
|
|
const uint PatchEndIndex = min(PatchStartIndex + MaxPatchesPerGroup, TotalPatches);
|
|
const uint NumPatches = PatchEndIndex - PatchStartIndex;
|
|
|
|
// Stuff that gets calculated during the patch setup phase
|
|
uint4 Patches_EncodedPatch;
|
|
bool Patches_bReverseWindingOrders;
|
|
FInstanceSceneData Patches_InstanceData;
|
|
FInstanceDynamicData Patches_InstanceDynamicData;
|
|
FSplitPatch Patches_SplitPatch;
|
|
FTessellatedPatch Patches_TessellatedPatch;
|
|
FNaniteVertTransforms Patches_VertTransforms;
|
|
FNaniteTransformedVert Patches_Verts;
|
|
float4 Patches_UVDensities;
|
|
|
|
if (GroupThreadIndex < NumPatches * 3u)
|
|
{
|
|
const uint LocalPatchIndex = GroupThreadIndex / 3u;
|
|
const uint PatchCornerIndex = GroupThreadIndex - LocalPatchIndex * 3u;
|
|
|
|
const uint PatchIndex = PatchStartIndex + LocalPatchIndex;
|
|
const uint PatchStartLane = LocalPatchIndex * 3;
|
|
|
|
const uint4 RasterBin = FetchSWRasterBin(PatchIndex);
|
|
const uint VisibleIndex = RasterBin.x;
|
|
|
|
#if NANITE_TESSELLATION_PATCH_REFS
|
|
const uint2 VisiblePatch = VisiblePatches.Load2(VisibleIndex * 8);
|
|
Patches_EncodedPatch = SplitWorkQueue.DataBuffer_Load4(VisiblePatch.x * 16);
|
|
#else
|
|
Patches_EncodedPatch = VisiblePatches.Load4(VisibleIndex * 16);
|
|
#endif
|
|
|
|
Patches_SplitPatch.Decode(Patches_EncodedPatch);
|
|
|
|
const FVisibleCluster VisibleCluster = GetVisibleCluster(Patches_SplitPatch.VisibleClusterIndex, VIRTUAL_TEXTURE_TARGET);
|
|
const FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
|
|
FPrimitiveSceneData PrimitiveData;
|
|
GetNaniteMaterialSceneData(VisibleCluster, PrimitiveData, Patches_InstanceData);
|
|
|
|
const FNaniteView NaniteView = GetNaniteView(VisibleCluster.ViewId);
|
|
|
|
float LowTessDistance = 0.0f;
|
|
#if USE_DISPLACEMENT
|
|
LowTessDistance = CalcDisplacementLowTessDistance(PrimitiveData, Patches_InstanceData, NaniteView);
|
|
#endif
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
Patches_InstanceDynamicData = CalculateInstanceDynamicData(NaniteView, Patches_InstanceData);
|
|
|
|
Patches_bReverseWindingOrders = ReverseWindingOrder(NaniteView, PrimitiveData, Patches_InstanceData);
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
|
|
Patches_VertTransforms = CalculateNaniteVertexTransforms(Patches_InstanceData, Patches_InstanceDynamicData, NaniteView);
|
|
#endif
|
|
|
|
#if ALWAYS_EVALUATE_WORLD_POSITION_OFFSET
|
|
const bool bEvaluateWPO = true;
|
|
#else
|
|
const bool bEvaluateWPO = (VisibleCluster.Flags & NANITE_CULLING_FLAG_ENABLE_WPO) != 0;
|
|
#endif
|
|
|
|
const uint3 VertIndexes = DecodeTriangleIndices(Cluster, Patches_SplitPatch.TriIndex);
|
|
Patches_Verts = FetchTransformedNaniteVertex(PrimitiveData, Patches_InstanceData, GetInstanceViewData(Patches_InstanceData.InstanceId, NaniteView.SceneRendererPrimaryViewId), Patches_VertTransforms, Cluster, VisibleCluster, VertIndexes[PatchCornerIndex], bEvaluateWPO);
|
|
|
|
Patches_UVDensities = GetMaterialUVDensities(Cluster, Patches_InstanceData.PrimitiveId, Patches_SplitPatch.TriIndex);
|
|
|
|
#if NANITE_TESSELLATION_PATCH_REFS
|
|
Patches_TessellatedPatch.Init(VisiblePatch.y, false);
|
|
#else
|
|
|
|
const float3 OuterPatchCornersView = mul(float4(Patches_Verts.PointWorld, 1), NaniteView.TranslatedWorldToView).xyz;
|
|
|
|
const float3 InnerPatchCornersView = WaveReadLaneAt(OuterPatchCornersView, PatchStartLane + 0) * Patches_SplitPatch.Barycentrics[PatchCornerIndex].x +
|
|
WaveReadLaneAt(OuterPatchCornersView, PatchStartLane + 1) * Patches_SplitPatch.Barycentrics[PatchCornerIndex].y +
|
|
WaveReadLaneAt(OuterPatchCornersView, PatchStartLane + 2) * Patches_SplitPatch.Barycentrics[PatchCornerIndex].z;
|
|
|
|
float3 CornersView[3];
|
|
CornersView[0] = WaveReadLaneAt(InnerPatchCornersView, PatchStartLane + 0);
|
|
CornersView[1] = WaveReadLaneAt(InnerPatchCornersView, PatchStartLane + 1);
|
|
CornersView[2] = WaveReadLaneAt(InnerPatchCornersView, PatchStartLane + 2);
|
|
|
|
const float3 TessFactors = GetTessFactors(NaniteView, CornersView, LowTessDistance);
|
|
Patches_TessellatedPatch.Init(TessFactors, Patches_EncodedPatch.yzw, false);
|
|
Patches_SplitPatch.Decode(Patches_EncodedPatch);
|
|
#endif
|
|
}
|
|
|
|
for (uint i = 0; i < NumPatches; i++)
|
|
{
|
|
const uint PatchStartLane = i * 3;
|
|
|
|
// Read values from patch setup
|
|
const bool bReverseWindingOrder = WaveReadLaneAt(Patches_bReverseWindingOrders, PatchStartLane);
|
|
const FSplitPatch SplitPatch = WaveReadLaneAt(Patches_SplitPatch, PatchStartLane);
|
|
const FTessellatedPatch TessellatedPatch = WaveReadLaneAt(Patches_TessellatedPatch, PatchStartLane);
|
|
const float4 UVDensities = WaveReadLaneAt(Patches_UVDensities, PatchStartLane);
|
|
|
|
// The following values can be used in a shader, but will most likely be dead code eliminated
|
|
const FInstanceSceneData InstanceData = WaveReadLaneAt(Patches_InstanceData, PatchStartLane);
|
|
const FInstanceDynamicData InstanceDynamicData = WaveReadLaneAt(Patches_InstanceDynamicData, PatchStartLane);
|
|
const FNaniteVertTransforms VertTransforms = WaveReadLaneAt(Patches_VertTransforms, PatchStartLane);
|
|
|
|
#if VISUALIZE
|
|
const uint4 PatchEncoded = WaveReadLaneAt(Patches_EncodedPatch, PatchStartLane);
|
|
#endif
|
|
|
|
const FVisibleCluster VisibleCluster = GetVisibleCluster(SplitPatch.VisibleClusterIndex, VIRTUAL_TEXTURE_TARGET);
|
|
const FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
|
|
const FNaniteView NaniteView = GetNaniteView(VisibleCluster.ViewId);
|
|
#if NANITE_VERTEX_PROGRAMMABLE || NANITE_PIXEL_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
FMaterialShader MaterialShader;
|
|
MaterialShader.PrimitiveData = GetPrimitiveData(InstanceData.PrimitiveId);
|
|
MaterialShader.InstanceData = InstanceData;
|
|
MaterialShader.InstanceDynamicData = InstanceDynamicData;
|
|
MaterialShader.NaniteView = NaniteView;
|
|
MaterialShader.Cluster = Cluster;
|
|
MaterialShader.VisibleCluster = VisibleCluster;
|
|
MaterialShader.VertTransforms = VertTransforms;
|
|
MaterialShader.TransformedTri = MakeTransformedNaniteTriangle(Patches_Verts, PatchStartLane + uint3(0, 1, 2));
|
|
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
MaterialShader.InitDisplacement(RasterBinMeta[GetRasterBin()].MaterialDisplacementParams);
|
|
#endif
|
|
|
|
uint PixelValue = (SplitPatch.VisibleClusterIndex + 1) << 7;
|
|
|
|
uint NumVerts = TessellatedPatch.GetNumVerts();
|
|
uint NumTris = TessellatedPatch.GetNumTris();
|
|
|
|
FRaster Raster = CreateRaster( NaniteView, VisibleCluster );
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
for (uint Offset = 0; Offset < NANITE_VSM_PAGE_TABLE_CACHE_DIM * NANITE_VSM_PAGE_TABLE_CACHE_DIM; Offset += ThreadGroupSize)
|
|
{
|
|
FetchAndCachePageTableEntry(NaniteView, VisibleCluster.vPage, VisibleCluster.vPageEnd, Offset + GroupThreadIndex);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#endif
|
|
|
|
for( uint VertIndex = GroupThreadIndex; VertIndex < NumVerts; VertIndex += ThreadGroupSize )
|
|
{
|
|
FBarycentrics Barycentrics;
|
|
Barycentrics.Value = TessellatedPatch.GetVert( VertIndex );
|
|
Barycentrics.Value_dx = 0;//float3( -1, 1, 0 ) / TessFactors.x;
|
|
Barycentrics.Value_dy = 0;//float3( 0, -1, 1 ) / TessFactors.y;
|
|
|
|
Barycentrics = SplitPatch.TransformBarycentrics( Barycentrics );
|
|
|
|
GroupVerts[ VertIndex ] = CalculateSubpixelCoordinates( Raster, MaterialShader.EvaluateDomain( UVDensities, Barycentrics ) ).xyz;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
for( uint TriIndex = GroupThreadIndex; TriIndex < NumTris; TriIndex += ThreadGroupSize )
|
|
{
|
|
uint3 VertIndexes = TessellatedPatch.GetIndexes( TriIndex );
|
|
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
|
|
float4 Verts[3];
|
|
Verts[0] = float4( GroupVerts[ VertIndexes.x ], 1 );
|
|
Verts[1] = float4( GroupVerts[ VertIndexes.y ], 1 );
|
|
Verts[2] = float4( GroupVerts[ VertIndexes.z ], 1 );
|
|
|
|
FRasterTri Tri = SetupTriangle< NANITE_SUBPIXEL_SAMPLES, !NANITE_TWO_SIDED >( Raster.ScissorRect, Verts );
|
|
|
|
if( max3( Verts[0].z, Verts[1].z, Verts[2].z ) > 1 )
|
|
Tri.bIsValid = false;
|
|
|
|
if( Tri.bIsValid )
|
|
{
|
|
#if VISUALIZE
|
|
const uint SubPatch = (Rand3DPCG32(PatchEncoded.yzw).x & 0xff0000u) >> 16u;
|
|
const uint MicroTri = TriIndex & 0xffu;
|
|
const uint2 VisualizeValues = GetVisualizeValues(1u /* AddValue */, SubPatch, MicroTri);
|
|
#else
|
|
const uint2 VisualizeValues = uint2(0, 0);
|
|
#endif
|
|
|
|
RasterizeDicedTri(
|
|
Tri,
|
|
Raster,
|
|
MaterialShader,
|
|
PixelValue | SplitPatch.TriIndex,
|
|
VisualizeValues );
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#if NANITE_VOXELS
|
|
#include "Voxel/Voxel.ush"
|
|
|
|
bool IntersectBox(float3 RayOrigin, float3 RayDir, float3 BoxCenter, float3 BoxHalfSize, inout float OutIntersectionTime)
|
|
{
|
|
float3 InvDir = rcp(RayDir);
|
|
float3 LocalBoxCenter = BoxCenter - RayOrigin;
|
|
float3 PlaneIntersect0 = (LocalBoxCenter - BoxHalfSize) * InvDir;
|
|
float3 PlaneIntersect1 = (LocalBoxCenter + BoxHalfSize) * InvDir;
|
|
float3 MinIntersection = min(PlaneIntersect0, PlaneIntersect1);
|
|
float3 MaxIntersection = max(PlaneIntersect0, PlaneIntersect1);
|
|
|
|
float MaxMin = max3(MinIntersection.x, MinIntersection.y, MinIntersection.z);
|
|
float MinMax = min3(MaxIntersection.x, MaxIntersection.y, MaxIntersection.z);
|
|
OutIntersectionTime = MaxMin;
|
|
|
|
return MaxMin < MinMax;
|
|
}
|
|
|
|
void PlotPixel(FRaster Raster, int2 PixelCoord, uint PixelValue, float DeviceZ)
|
|
{
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel(PixelCoord, PixelValue, DeviceZ);
|
|
|
|
#if VISUALIZE
|
|
Pixel.VisualizeValues = GetVisualizeValues();
|
|
#endif
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
Pixel.PhysicalPosition.xy = Pixel.Position;
|
|
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
FCachedPageTable PageTranslation;
|
|
if (!PageTranslation(Pixel))
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
Pixel.WriteOverdraw();
|
|
Pixel.Write();
|
|
}
|
|
|
|
FRay GetLocalRay( FNaniteView NaniteView, FInstanceSceneData InstanceData, float4 SvPosition, bool bIsOrtho )
|
|
{
|
|
FDFVector3 RayWorldOrigin;
|
|
float3 RayWorldDirection;
|
|
#if 1
|
|
if( bIsOrtho )
|
|
{
|
|
float3 NearPoint = mul( float4( SvPosition.xy, 1, 1 ), NaniteView.SVPositionToTranslatedWorld ).xyz;
|
|
float3 FarPoint = mul( float4( SvPosition.xy, 0, 1 ), NaniteView.SVPositionToTranslatedWorld ).xyz;
|
|
RayWorldOrigin = DFFastSubtract( NearPoint, NaniteView.PreViewTranslation );
|
|
RayWorldDirection = FarPoint - NearPoint;
|
|
}
|
|
else
|
|
{
|
|
RayWorldOrigin = NaniteView.WorldCameraOrigin;
|
|
RayWorldDirection = mul( float4( SvPosition.xy, 0, 1 ), NaniteView.SVPositionToTranslatedWorld ).xyz;
|
|
}
|
|
#else
|
|
float4 NearPoint = mul( float4( SvPosition.xy, 1, 1 ), NaniteView.SVPositionToTranslatedWorld );
|
|
float4 FarPoint = mul( float4( SvPosition.xy, 0, 1 ), NaniteView.SVPositionToTranslatedWorld );
|
|
|
|
RayWorldOrigin = DFFastSubtract( NearPoint.xyz / NearPoint.w, NaniteView.PreViewTranslation );
|
|
RayWorldDirection = normalize( NearPoint.w * FarPoint.xyz - FarPoint.w * NearPoint.xyz );
|
|
#endif
|
|
|
|
FRay RayLocal;
|
|
RayLocal.Origin = DFMultiplyDemote( RayWorldOrigin, InstanceData.WorldToLocal );
|
|
RayLocal.Direction = DFMultiplyVector( RayWorldDirection, InstanceData.WorldToLocal );
|
|
RayLocal.Time[0] = 0; // TODO NaniteView.NearPlane
|
|
RayLocal.Time[1] = 1e24;
|
|
return RayLocal;
|
|
}
|
|
|
|
void ProcessBrickPixel(
|
|
FNaniteView NaniteView,
|
|
FRaster Raster,
|
|
FInstanceSceneData InstanceData,
|
|
FRay Ray,
|
|
bool bIsOrtho,
|
|
int2 PixelPos,
|
|
uint PixelValue,
|
|
uint2 ReverseBrickBits,
|
|
float3 LocalVoxelBoundsExtent,
|
|
float VoxelSize,
|
|
float RcpVoxelSize,
|
|
float Bias
|
|
)
|
|
{
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, PixelValue, 0.0f /*Unused*/ );
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Pixel.PhysicalPosition.xy = Pixel.Position;
|
|
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
FCachedPageTable PageTranslation;
|
|
PageTranslation(Pixel);
|
|
}
|
|
#endif
|
|
|
|
Ray.Time = float2(0, 1e24f); // TODO NaniteView.NearPlane
|
|
|
|
const float Epsilon = 1e-8;
|
|
#if 1
|
|
Ray.Direction = select( abs( Ray.Direction ) < Epsilon, Epsilon, Ray.Direction );
|
|
#elif 0
|
|
Ray.Direction = asfloat( asuint( max( abs( Ray.Direction ), Epsilon ) ) | ( asuint( Ray.Direction ) & 0x80000000u ) ); // v_max, v_and_or
|
|
#elif 0
|
|
float3 Replacement = select( Ray.Direction > 0, Epsilon, -Epsilon );
|
|
Ray.Direction = select( abs( Ray.Direction ) < Epsilon, Replacement, Ray.Direction );
|
|
#endif
|
|
|
|
Ray.Time = Intersect( Ray, LocalVoxelBoundsExtent, LocalVoxelBoundsExtent );
|
|
|
|
|
|
#if VISUALIZE
|
|
Pixel.VisualizeValues = GetVisualizeValues();
|
|
#endif
|
|
|
|
if( Ray.Time[0] < Ray.Time[1] )
|
|
{
|
|
#if 1
|
|
Ray.Time += float2( Bias, -Bias );
|
|
#elif 0
|
|
Ray.Time = float2( lerp( Ray.Time[0], Ray.Time[1], 0.05 ), lerp( Ray.Time[1], Ray.Time[0], 0.05 ) );
|
|
#elif 0
|
|
if( bIsOrtho )
|
|
Ray.Time += float2(1e-7, -1e-7);
|
|
else
|
|
Ray.Time += float2(5e-4, -5e-4);
|
|
#endif
|
|
|
|
FDDA DDA = InitDDA( Ray );
|
|
StartDDA( DDA, 1, Ray );
|
|
|
|
const UlongType ReverseVoxelMask64 = PackUlongType( ReverseBrickBits );
|
|
|
|
int Hit = 0; // Negative means hit
|
|
UNROLL
|
|
for( uint Tests = 0; Tests < 3*3 + 1; Tests++ )
|
|
{
|
|
#if COMPILER_SUPPORTS_ULONG_TYPES
|
|
Hit = UnpackUlongType( ReverseVoxelMask64 << DDA.VoxelIndex ).y;
|
|
#else
|
|
Hit = ( DDA.VoxelIndex < 32 ? ReverseBrickBits.y : ReverseBrickBits.x ) << ( DDA.VoxelIndex & 31 );
|
|
#endif
|
|
BRANCH
|
|
if( Hit < 0 ) break;
|
|
|
|
StepDDA( DDA, 1 );
|
|
BRANCH
|
|
if (DDA.Time[0] >= DDA.Time[1]) break;
|
|
}
|
|
|
|
if( Hit < 0 )
|
|
{
|
|
DDA.Time[0] = 0.5 * ( DDA.Time[0] + NextTime( DDA ) );
|
|
if( bIsOrtho )
|
|
Pixel.Depth = 1 - DDA.Time[0];
|
|
else
|
|
Pixel.Depth = NaniteView.ViewToClip[3][2] / DDA.Time[0] + NaniteView.ViewToClip[2][2];
|
|
|
|
Pixel.WriteOverdraw();
|
|
Pixel.Write();
|
|
}
|
|
}
|
|
}
|
|
|
|
groupshared uint GroupWorkEnd[32];
|
|
groupshared uint3 GroupBrickData[32];
|
|
groupshared uint GroupSourceLaneAndPixelPos[64];
|
|
|
|
void ProcessBrickPixelBatchFromQueue(
|
|
inout int QueueNumElements,
|
|
inout uint QueueReadOffset,
|
|
// Uniform inputs
|
|
bool bIsOrtho,
|
|
FNaniteView NaniteView,
|
|
FRaster Raster,
|
|
FInstanceSceneData InstanceData,
|
|
FCluster Cluster,
|
|
uint VisibleIndex,
|
|
float VoxelSize,
|
|
float RcpVoxelSize,
|
|
float Bias,
|
|
// Uniform or variable depending on mode
|
|
float3 RayDirection,
|
|
float3 RayDirection_dx,
|
|
float3 RayDirection_dy,
|
|
float3 RayOrigin,
|
|
float3 RayOrigin_dx,
|
|
float3 RayOrigin_dy,
|
|
// Variable inputs
|
|
uint2 ReverseBrickBits,
|
|
uint BrickMax_VertIndex,
|
|
float CenterPixelClipW,
|
|
uint GroupThreadIndex
|
|
)
|
|
{
|
|
const uint ReadIndex = ( QueueReadOffset + GroupThreadIndex ) & 63;
|
|
|
|
const uint PackedSourceLaneAndPixelPos = GroupSourceLaneAndPixelPos[ ReadIndex ];
|
|
const uint SourceLane = PackedSourceLaneAndPixelPos & 31u;
|
|
|
|
const int2 PixelPos = int2( BitFieldExtractU32( PackedSourceLaneAndPixelPos, 14, 5 ),
|
|
BitFieldExtractU32( PackedSourceLaneAndPixelPos, 13, 19 ) );
|
|
|
|
const float3 SourceRayOrigin = WaveReadLaneAt( RayOrigin, SourceLane );
|
|
const uint2 SourceReverseBrickBits = WaveReadLaneAt( ReverseBrickBits, SourceLane );
|
|
const uint SourceBrickMax_VertIndex = WaveReadLaneAt( BrickMax_VertIndex, SourceLane );
|
|
|
|
const float3 SourceHalfBrickMax = float3( BitFieldExtractU32( SourceBrickMax_VertIndex, 8, 0 ),
|
|
BitFieldExtractU32( SourceBrickMax_VertIndex, 8, 8 ),
|
|
BitFieldExtractU32( SourceBrickMax_VertIndex, 8, 16 ) ) * 0.5f;
|
|
|
|
const uint SourceVertIndex = SourceBrickMax_VertIndex >> 24;
|
|
const uint SourcePixelValue = ( ( VisibleIndex + 1 ) << 7 ) | SourceVertIndex;
|
|
|
|
FRay Ray;
|
|
BRANCH
|
|
if( CONSTANT_DIR || bIsOrtho )
|
|
{
|
|
#if NANITE_PER_VOXEL_BRICK_SKINNING
|
|
const float3 SourceRayDirection = WaveReadLaneAt( RayDirection, SourceLane );
|
|
const float3 SourceRayOrigin_dx = WaveReadLaneAt( RayOrigin_dx, SourceLane );
|
|
const float3 SourceRayOrigin_dy = WaveReadLaneAt( RayOrigin_dy, SourceLane );
|
|
|
|
Ray.Origin = SourceRayOrigin + PixelPos.x * SourceRayOrigin_dx + PixelPos.y * SourceRayOrigin_dy;
|
|
Ray.Direction = SourceRayDirection;
|
|
#elif CONSTANT_DIR
|
|
const float3 SourceRayDirection = WaveReadLaneAt( RayDirection, SourceLane );
|
|
const float SourceCenterPixelClipW = WaveReadLaneAt( CenterPixelClipW, SourceLane );
|
|
Ray.Origin = SourceRayOrigin + ( PixelPos.x * SourceCenterPixelClipW ) * RayDirection_dx + ( PixelPos.y * SourceCenterPixelClipW ) * RayDirection_dy;
|
|
Ray.Direction = SourceRayDirection;
|
|
#else
|
|
Ray.Origin = SourceRayOrigin + PixelPos.x * RayOrigin_dx + PixelPos.y * RayOrigin_dy;
|
|
Ray.Direction = RayDirection;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
Ray.Origin = SourceRayOrigin;
|
|
|
|
#if NANITE_PER_VOXEL_BRICK_SKINNING
|
|
const float3 SourceRayDirection = WaveReadLaneAt( RayDirection, SourceLane );
|
|
const float3 SourceRayDirection_dx = WaveReadLaneAt( RayDirection_dx, SourceLane );
|
|
const float3 SourceRayDirection_dy = WaveReadLaneAt( RayDirection_dy, SourceLane );
|
|
|
|
Ray.Direction = SourceRayDirection + SourceRayDirection_dx * PixelPos.x + SourceRayDirection_dy * PixelPos.y;
|
|
#else
|
|
Ray.Direction = RayDirection + RayDirection_dx * PixelPos.x + RayDirection_dy * PixelPos.y;
|
|
#endif
|
|
}
|
|
|
|
if( GroupThreadIndex < QueueNumElements )
|
|
{
|
|
ProcessBrickPixel(NaniteView, Raster, InstanceData,
|
|
Ray, bIsOrtho,
|
|
PixelPos, SourcePixelValue, SourceReverseBrickBits, SourceHalfBrickMax,
|
|
VoxelSize, RcpVoxelSize,
|
|
Bias
|
|
);
|
|
}
|
|
|
|
QueueNumElements -= 32;
|
|
QueueReadOffset += 32;
|
|
}
|
|
|
|
bool OcclusionTestPixel( FRaster Raster, int2 PixelPos, float Depth )
|
|
{
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, 0, Depth );
|
|
|
|
bool bActive = true;
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Pixel.PhysicalPosition.xy = Pixel.Position;
|
|
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
|
|
if( !Raster.bSinglePage )
|
|
{
|
|
FCachedPageTable PageTranslation;
|
|
if( !PageTranslation( Pixel ) )
|
|
bActive = false;
|
|
}
|
|
#endif
|
|
|
|
if( bActive )
|
|
bActive = Pixel.EarlyDepthTest();
|
|
|
|
return bActive;
|
|
}
|
|
|
|
void ClusterTraceBricks( uint VisibleIndex, uint GroupThreadIndex )
|
|
{
|
|
GetIndexAndTriRangeSW( VisibleIndex );
|
|
|
|
FVisibleCluster VisibleCluster = GetVisibleCluster( VisibleIndex, VIRTUAL_TEXTURE_TARGET );
|
|
FInstanceSceneData InstanceData = GetInstanceSceneDataUnchecked( VisibleCluster.InstanceId );
|
|
FNaniteView NaniteView = GetNaniteView( VisibleCluster.ViewId );
|
|
FCluster Cluster = GetCluster( VisibleCluster.PageIndex, VisibleCluster.ClusterIndex );
|
|
FRaster Raster = CreateRaster( NaniteView, VisibleCluster );
|
|
|
|
FInstanceDynamicData InstanceDynamicData = CalculateInstanceDynamicData( NaniteView, InstanceData );
|
|
|
|
|
|
float4 SvPositionStart = float4( 0.5, 0.5, 0, 1 );
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
SvPositionStart.xy -= Raster.vTranslation.xy;
|
|
#endif
|
|
// TODO: optimize for perspective main view?
|
|
bool bIsOrtho = IsOrthoProjection( NaniteView.ViewToClip );
|
|
|
|
const float RcpVoxelSize = rcp( Cluster.LODError );
|
|
|
|
// Calculate ray in voxel space of local cluster
|
|
FRay RayBase = GetLocalRay( NaniteView, InstanceData, SvPositionStart, bIsOrtho );
|
|
|
|
float3 RayDirection_dx, RayDirection_dy;
|
|
float3 RayOrigin_dx, RayOrigin_dy;
|
|
|
|
{
|
|
float3 Ray_dx = DFMultiplyVector( NaniteView.SVPositionToTranslatedWorld[0].xyz, InstanceData.WorldToLocal ) * RcpVoxelSize;
|
|
float3 Ray_dy = DFMultiplyVector( NaniteView.SVPositionToTranslatedWorld[1].xyz, InstanceData.WorldToLocal ) * RcpVoxelSize;
|
|
|
|
BRANCH
|
|
if( bIsOrtho )
|
|
{
|
|
RayOrigin_dx = Ray_dx;
|
|
RayOrigin_dy = Ray_dy;
|
|
RayDirection_dx = 0;
|
|
RayDirection_dy = 0;
|
|
}
|
|
else
|
|
{
|
|
RayOrigin_dx = 0;
|
|
RayOrigin_dy = 0;
|
|
RayDirection_dx = Ray_dx;
|
|
RayDirection_dy = Ray_dy;
|
|
}
|
|
}
|
|
|
|
float4x4 LocalToClip = mul( InstanceDynamicData.LocalToTranslatedWorld, NaniteView.TranslatedWorldToClip );
|
|
float4x4 LocalVoxelToPixelClip = LocalToClip;
|
|
|
|
LocalVoxelToPixelClip._m00_m10_m20_m30 = Raster.ViewportScale.x * LocalVoxelToPixelClip._m00_m10_m20_m30 + Raster.ViewportBias.x * LocalVoxelToPixelClip._m03_m13_m23_m33;
|
|
LocalVoxelToPixelClip._m01_m11_m21_m31 = Raster.ViewportScale.y * LocalVoxelToPixelClip._m01_m11_m21_m31 + Raster.ViewportBias.y * LocalVoxelToPixelClip._m03_m13_m23_m33;
|
|
|
|
#if USE_SKINNING
|
|
FNaniteSkinningHeader SkinningHeader = LoadNaniteSkinningHeader(InstanceData.PrimitiveId);
|
|
FBoneInfluenceHeader BoneInfluenceHeader = GetBoneInfluenceHeader(Cluster);
|
|
|
|
#if !NANITE_PER_VOXEL_BRICK_SKINNING
|
|
{
|
|
const float4x3 SkinningTransform4x3 = SampleVoxelSkinningTransform( InstanceData, Cluster, SkinningHeader );
|
|
|
|
const float3x3 InvSkinningTransform3x3 = Inverse( float3x3( SkinningTransform4x3[0], SkinningTransform4x3[1], SkinningTransform4x3[2] ) );
|
|
|
|
const float4x4 SkinningTransform4x4 = float4x4( float4( SkinningTransform4x3[0], 0 ),
|
|
float4( SkinningTransform4x3[1], 0 ),
|
|
float4( SkinningTransform4x3[2], 0 ),
|
|
float4( SkinningTransform4x3[3], 1 ) );
|
|
|
|
const float3x3 SkinningTransform3x3 = float3x3( SkinningTransform4x3[0], SkinningTransform4x3[1], SkinningTransform4x3[2] );
|
|
|
|
LocalVoxelToPixelClip = mul( SkinningTransform4x4, LocalVoxelToPixelClip );
|
|
|
|
RayBase.Origin = mul( RayBase.Origin - SkinningTransform4x3[3], InvSkinningTransform3x3 );
|
|
RayBase.Direction = mul( RayBase.Direction, InvSkinningTransform3x3 );
|
|
|
|
RayDirection_dx = mul( RayDirection_dx, InvSkinningTransform3x3 );
|
|
RayDirection_dy = mul( RayDirection_dy, InvSkinningTransform3x3 );
|
|
|
|
RayOrigin_dx = mul( RayOrigin_dx, InvSkinningTransform3x3 );
|
|
RayOrigin_dy = mul( RayOrigin_dy, InvSkinningTransform3x3 );
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
RayBase.Origin *= RcpVoxelSize;
|
|
RayBase.Direction *= RcpVoxelSize;
|
|
|
|
const float Bias = 0.04 / length(RayBase.Direction); // VOXELTODO: Get approximate ray length from matrix directly?
|
|
|
|
LocalVoxelToPixelClip[0] *= Cluster.LODError;
|
|
LocalVoxelToPixelClip[1] *= Cluster.LODError;
|
|
LocalVoxelToPixelClip[2] *= Cluster.LODError;
|
|
|
|
LocalVoxelToPixelClip[0] = ToScalarMemory( LocalVoxelToPixelClip[0] );
|
|
LocalVoxelToPixelClip[1] = ToScalarMemory( LocalVoxelToPixelClip[1] );
|
|
LocalVoxelToPixelClip[2] = ToScalarMemory( LocalVoxelToPixelClip[2] );
|
|
LocalVoxelToPixelClip[3] = ToScalarMemory( LocalVoxelToPixelClip[3] );
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET && NANITE_LATE_VSM_PAGE_TRANSLATION
|
|
if (!Raster.bSinglePage)
|
|
{
|
|
UNROLL
|
|
for (uint Offset = 0; Offset < NANITE_VSM_PAGE_TABLE_CACHE_DIM * NANITE_VSM_PAGE_TABLE_CACHE_DIM; Offset += THREADGROUP_SIZE)
|
|
{
|
|
FetchAndCachePageTableEntry(NaniteView, VisibleCluster.vPage, VisibleCluster.vPageEnd, Offset + GroupThreadIndex);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
#endif
|
|
|
|
|
|
#if 0
|
|
Cluster.NumVerts = min( Cluster.NumVerts, 4096 );
|
|
|
|
for( uint BrickIndex = GroupThreadIndex; BrickIndex < Cluster.NumVerts; BrickIndex += THREADGROUP_SIZE )
|
|
{
|
|
const uint PixelValue = ((VisibleIndex + 1) << 7) | (BrickIndex & 127);
|
|
|
|
const float3 BoundsCenter = FetchLocalNaniteVertexPosition( InstanceData, Cluster, VisibleCluster, BrickIndex );
|
|
const float3 BoundsExtent = Cluster.LODError * 0.5f;
|
|
|
|
FFrustumCullData FrustumCull = BoxCullFrustum( BoundsCenter, BoundsExtent, LocalToClip, NaniteView.ViewToClip, bIsOrtho, !bIsOrtho, true );
|
|
float4 Rect = ( float4( FrustumCull.RectMin.xy, FrustumCull.RectMax.xy ) * Raster.ViewportScale.xyxy + Raster.ViewportBias.xyxy ).xwzy;
|
|
|
|
// Round to nearest pixel
|
|
int2 MinPixels = (int2)floor( Rect.xy + 0.5 );
|
|
int2 MaxPixels = (int2)floor( Rect.zw - 0.5 ); // inclusive!
|
|
|
|
// Scissor
|
|
MinPixels = max( MinPixels, Raster.ScissorRect.xy );
|
|
MaxPixels = min( MaxPixels, Raster.ScissorRect.zw - 1 );
|
|
|
|
// Limit the rasterizer bounds to a sensible max.
|
|
MaxPixels = min( MaxPixels, MinPixels + 16 );
|
|
|
|
for( int y = MinPixels.y; y < MaxPixels.y; y++ )
|
|
{
|
|
for( int x = MinPixels.x; x < MaxPixels.x; x++ )
|
|
{
|
|
int2 PixelPos = int2(x,y);
|
|
|
|
float4 SvPosition = SvPositionStart;
|
|
SvPosition.xy += PixelPos;
|
|
|
|
FRay Ray = GetLocalRay( NaniteView, InstanceData, SvPosition, bIsOrtho );
|
|
|
|
const float Epsilon = 1e-8;
|
|
Ray.Direction = select( abs( Ray.Direction ) < Epsilon, Epsilon, Ray.Direction );
|
|
|
|
#if 1
|
|
Ray.Time = Intersect( Ray, BoundsCenter, BoundsExtent );
|
|
|
|
if( Ray.Time[0] >= Ray.Time[1] )
|
|
continue;
|
|
|
|
float DeviceZ;
|
|
if( bIsOrtho )
|
|
DeviceZ = 1 - Ray.Time[0];
|
|
else
|
|
DeviceZ = NaniteView.ViewToClip[3][2] / Ray.Time[0] + NaniteView.ViewToClip[2][2];
|
|
#else
|
|
float DeviceZ = FrustumCull.RectMax.z;
|
|
#endif
|
|
|
|
PlotPixel( Raster, PixelPos, PixelValue, DeviceZ );
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
#endif
|
|
|
|
for( uint BrickIndexBase = 0; BrickIndexBase < NANITE_MAX_CLUSTER_TRIANGLES; BrickIndexBase += THREADGROUP_SIZE )
|
|
{
|
|
BRANCH
|
|
if( BrickIndexBase >= Cluster.BrickDataNum )
|
|
break;
|
|
|
|
const uint BrickIndex = BrickIndexBase + GroupThreadIndex;
|
|
const uint FetchBrickIndex = min( BrickIndex, Cluster.BrickDataNum - 1 );
|
|
|
|
const FBrick Brick = DecodeBrick( Cluster, FetchBrickIndex );
|
|
|
|
const float3 LocalVoxelPosition = (float3)Brick.StartPos;
|
|
const float3 LocalVoxelBoundsExtent = Brick.BrickMax * 0.5f;
|
|
const float3 LocalVoxelBoundsCenter = LocalVoxelPosition + LocalVoxelBoundsExtent;
|
|
|
|
float4x4 Brick_LocalVoxelToPixelClip = LocalVoxelToPixelClip;
|
|
FRay Brick_RayBase = RayBase;
|
|
float3 Brick_RayDirection_dx = RayDirection_dx;
|
|
float3 Brick_RayDirection_dy = RayDirection_dy;
|
|
|
|
float3 Brick_RayOrigin_dx = RayOrigin_dx;
|
|
float3 Brick_RayOrigin_dy = RayOrigin_dy;
|
|
|
|
#if USE_SKINNING && NANITE_PER_VOXEL_BRICK_SKINNING
|
|
const float4x3 SkinningTransform4x3 = SampleSkinningTransform( InstanceData, SkinningHeader, BoneInfluenceHeader, Brick.VertOffset );
|
|
const float3 SkinningTranslation = SkinningTransform4x3[3] * RcpVoxelSize;
|
|
|
|
const float3x3 InvSkinningTransform3x3 = Inverse( float3x3( SkinningTransform4x3[0], SkinningTransform4x3[1], SkinningTransform4x3[2] ) );
|
|
|
|
const float4x4 SkinningTransform4x4 = float4x4( float4( SkinningTransform4x3[0], 0 ),
|
|
float4( SkinningTransform4x3[1], 0 ),
|
|
float4( SkinningTransform4x3[2], 0 ),
|
|
float4( SkinningTranslation, 1 ) );
|
|
|
|
const float3x3 SkinningTransform3x3 = float3x3( SkinningTransform4x3[0], SkinningTransform4x3[1], SkinningTransform4x3[2] );
|
|
|
|
Brick_LocalVoxelToPixelClip = mul( SkinningTransform4x4, Brick_LocalVoxelToPixelClip );
|
|
|
|
Brick_RayBase.Origin = mul( Brick_RayBase.Origin - SkinningTranslation, InvSkinningTransform3x3 );
|
|
Brick_RayBase.Direction = mul( Brick_RayBase.Direction, InvSkinningTransform3x3 );
|
|
|
|
Brick_RayDirection_dx = mul( Brick_RayDirection_dx, InvSkinningTransform3x3 );
|
|
Brick_RayDirection_dy = mul( Brick_RayDirection_dy, InvSkinningTransform3x3 );
|
|
|
|
Brick_RayOrigin_dx = mul( Brick_RayOrigin_dx, InvSkinningTransform3x3 );
|
|
Brick_RayOrigin_dy = mul( Brick_RayOrigin_dy, InvSkinningTransform3x3 );
|
|
#endif
|
|
|
|
const float4 CenterPixelClip = mul( float4( LocalVoxelBoundsCenter, 1.0 ), Brick_LocalVoxelToPixelClip );
|
|
const float3 CenterPixel = CenterPixelClip.xyz / CenterPixelClip.w;
|
|
|
|
#if CONSTANT_DIR
|
|
// 0.5 to counter the half pixel shift from SvPositionStart
|
|
float2 CenterPixelXY = CenterPixel.xy - 0.5f;
|
|
|
|
// Constant direction picked as brick center
|
|
Brick_RayBase.Direction += Brick_RayDirection_dx * CenterPixelXY.x;
|
|
Brick_RayBase.Direction += Brick_RayDirection_dy * CenterPixelXY.y;
|
|
|
|
// Make ray with fixed direction hit same point at mid brick depth, CenterClip.w.
|
|
// Position = Origin + Direction * Time, Time = w.
|
|
Brick_RayOrigin_dx = Brick_RayDirection_dx * CenterPixelClip.w;
|
|
Brick_RayOrigin_dy = Brick_RayDirection_dy * CenterPixelClip.w;
|
|
|
|
Brick_RayBase.Origin -= LocalVoxelPosition + CenterPixelXY.x * Brick_RayOrigin_dx + CenterPixelXY.y * Brick_RayOrigin_dy;
|
|
#else
|
|
Brick_RayBase.Origin -= LocalVoxelPosition;
|
|
#endif
|
|
|
|
|
|
#if CONSTANT_DIR_RECT
|
|
// Apply shear to counter ray direction
|
|
const float2 RayShear = CenterPixel.xy;
|
|
const float2 ExtentClipXY =
|
|
abs( LocalVoxelBoundsExtent.x * ( Brick_LocalVoxelToPixelClip[0].xy - Brick_LocalVoxelToPixelClip[0].w * RayShear ) ) +
|
|
abs( LocalVoxelBoundsExtent.y * ( Brick_LocalVoxelToPixelClip[1].xy - Brick_LocalVoxelToPixelClip[1].w * RayShear ) ) +
|
|
abs( LocalVoxelBoundsExtent.z * ( Brick_LocalVoxelToPixelClip[2].xy - Brick_LocalVoxelToPixelClip[2].w * RayShear ) );
|
|
|
|
const float ExtentClipW =
|
|
LocalVoxelBoundsExtent.x * Brick_LocalVoxelToPixelClip[0].w +
|
|
LocalVoxelBoundsExtent.y * Brick_LocalVoxelToPixelClip[1].w +
|
|
LocalVoxelBoundsExtent.z * Brick_LocalVoxelToPixelClip[2].w;
|
|
|
|
const float MinW = CenterPixelClip.w - ExtentClipW;
|
|
const float MaxW = CenterPixelClip.w + ExtentClipW;
|
|
|
|
FFrustumCullData FrustumCull;
|
|
|
|
#if CONSTANT_DIR
|
|
FrustumCull.RectMin.xy = CenterPixel.xy - ExtentClipXY / CenterPixelClip.w;
|
|
FrustumCull.RectMax.xy = CenterPixel.xy + ExtentClipXY / CenterPixelClip.w;
|
|
#else
|
|
// Project near face of skewed box for conservative rect
|
|
const float2 Center = CenterPixelClip.xy + ( MinW - CenterPixelClip.w ) * RayShear;
|
|
FrustumCull.RectMin.xy = ( Center - ExtentClipXY ) / MinW;
|
|
FrustumCull.RectMax.xy = ( Center + ExtentClipXY ) / MinW;
|
|
#endif
|
|
|
|
const float MinZ = NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2] / MaxW;
|
|
const float MaxZ = NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2] / MinW;
|
|
|
|
FrustumCull.RectMin.z = MinZ;
|
|
FrustumCull.RectMax.z = MaxZ;
|
|
const float4 Rect = float4( FrustumCull.RectMin.xy, FrustumCull.RectMax.xy );
|
|
#else
|
|
// TODO: Unify with existing Frustum culling functions
|
|
FFrustumCullData FrustumCull;
|
|
const float3 Extent = LocalVoxelBoundsExtent;
|
|
|
|
BRANCH
|
|
if( bIsOrtho )
|
|
{
|
|
const float3 PixelClipDelta = abs( Extent.x * Brick_LocalVoxelToPixelClip[0].xyz ) +
|
|
abs( Extent.y * Brick_LocalVoxelToPixelClip[1].xyz ) +
|
|
abs( Extent.z * Brick_LocalVoxelToPixelClip[2].xyz );
|
|
|
|
FrustumCull.RectMin = CenterPixelClip.xyz - PixelClipDelta;
|
|
FrustumCull.RectMax = CenterPixelClip.xyz + PixelClipDelta;
|
|
}
|
|
else
|
|
{
|
|
const float4 DeltaX = ( 2.0f * Extent.x ) * Brick_LocalVoxelToPixelClip[0];
|
|
const float4 DeltaY = ( 2.0f * Extent.y ) * Brick_LocalVoxelToPixelClip[1];
|
|
const float4 DeltaZ = ( 2.0f * Extent.z ) * Brick_LocalVoxelToPixelClip[2];
|
|
|
|
float MinW = +INFINITE_FLOAT;
|
|
float MaxW = -INFINITE_FLOAT;
|
|
|
|
FrustumCull.RectMin.xy = +INFINITE_FLOAT;
|
|
FrustumCull.RectMax.xy = -INFINITE_FLOAT;
|
|
|
|
#define EVAL_X01( _PointClip ) \
|
|
{ \
|
|
const float4 Clip0 = ( _PointClip ); \
|
|
const float4 Clip1 = ( _PointClip ) + DeltaX; \
|
|
const float2 Screen0 = Clip0.xy / Clip0.w; \
|
|
const float2 Screen1 = Clip1.xy / Clip1.w; \
|
|
MinW = min3( MinW, Clip0.w, Clip1.w ); \
|
|
MaxW = max3( MaxW, Clip0.w, Clip1.w ); \
|
|
FrustumCull.RectMin.xy = min3( FrustumCull.RectMin.xy, Screen0, Screen1 ); \
|
|
FrustumCull.RectMax.xy = max3( FrustumCull.RectMax.xy, Screen0, Screen1 ); \
|
|
}
|
|
|
|
const float4 Clip000 = CenterPixelClip - 0.5f * ( DeltaX + DeltaY + DeltaZ );
|
|
EVAL_X01( Clip000 );
|
|
EVAL_X01( Clip000 + DeltaY );
|
|
EVAL_X01( Clip000 + DeltaZ );
|
|
EVAL_X01( Clip000 + DeltaY + DeltaZ );
|
|
|
|
const float MinZ = MaxW * NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2];
|
|
const float MaxZ = MinW * NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2];
|
|
|
|
FrustumCull.RectMin.z = MinZ / MaxW;
|
|
FrustumCull.RectMax.z = MaxZ / MinW;
|
|
|
|
#undef EVAL_X01
|
|
}
|
|
const float4 Rect = float4( FrustumCull.RectMin.xy, FrustumCull.RectMax.xy );
|
|
#endif
|
|
|
|
// Round to nearest pixel
|
|
int2 MinPixels = (int2)floor( Rect.xy + 0.5 );
|
|
int2 MaxPixels = (int2)floor( Rect.zw - 0.5 ); // inclusive!
|
|
|
|
// Scissor
|
|
MinPixels = max( MinPixels, Raster.ScissorRect.xy );
|
|
MaxPixels = min( MaxPixels, Raster.ScissorRect.zw - 1 );
|
|
MaxPixels = min( MaxPixels, MinPixels + ( BRICK_TRACE_APPROXIMATE_DIVIDE ? 30 : 128 ) );
|
|
|
|
|
|
#if BRICK_TRACE_WORK_REDISTRIBUTION
|
|
uint QueueReadOffset = 0;
|
|
int QueueNumElements = 0;
|
|
|
|
const int2 RectSize = max( MaxPixels - MinPixels + 1, 0 );
|
|
const uint BrickMax_BrickIndex = ( Brick.BrickMax.x ) | (Brick.BrickMax.y << 8 ) | (Brick.BrickMax.z << 16 ) | ( BrickIndex << 24 );
|
|
|
|
#if BRICK_TRACE_TRANSPOSE
|
|
const int NumPixels = MulU24( RectSize.x, RectSize.y );
|
|
|
|
const uint PixelStartOffset = WavePrefixSum( NumPixels );
|
|
const uint TotalPixels = WaveReadLaneLast( PixelStartOffset + NumPixels );
|
|
|
|
const uint LaneMask = 0xFFFFFFFFu << GroupThreadIndex;
|
|
const uint PixelEndOffset = PixelStartOffset + NumPixels - 1u;
|
|
|
|
const uint PackedMinPixels = MinPixels.x | ( MinPixels.y << 16 );
|
|
|
|
#if BRICK_TRACE_APPROXIMATE_DIVIDE
|
|
const uint IntRcpRectWidth = ceil(0x8000u * (1.0f / RectSize.x));
|
|
const uint RectMulValues = IntRcpRectWidth | ( -RectSize.x << 16 );
|
|
#else
|
|
const uint RectMulValues = -RectSize.x;
|
|
#endif
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
GroupBrickData[ GroupThreadIndex ] = uint3( RectMulValues, asuint( FrustumCull.RectMax.z ), PackedMinPixels );
|
|
|
|
const uint AcceptThreshold = NumPixels ? 31 + (int)NumPixels : 0;
|
|
const uint QueueWriteValue = GroupThreadIndex | ( PixelStartOffset << 8 );
|
|
|
|
for( uint PixelIndexBase = 0; PixelIndexBase < TotalPixels; PixelIndexBase += 32 )
|
|
{
|
|
const uint PixelIndex = PixelIndexBase + GroupThreadIndex;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
GroupWorkEnd[ GroupThreadIndex ] = 0xFFFFFFFFu;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
const int RelativeIndex = int( PixelEndOffset - PixelIndexBase );
|
|
if( (uint)RelativeIndex < AcceptThreshold )
|
|
GroupWorkEnd[ min( RelativeIndex, 31 ) ] = QueueWriteValue;
|
|
|
|
const uint MarkBufferValue = GroupWorkEnd[ GroupThreadIndex ];
|
|
const uint BrickStartMask = WaveBallot( MarkBufferValue != 0xFFFFFFFFu ).x;
|
|
const int BrickStartIndex = firstbitlow( BrickStartMask & LaneMask );
|
|
const uint BrickLaneData = WaveReadLaneAt( MarkBufferValue, BrickStartIndex );
|
|
|
|
const uint BrickLane = BrickLaneData & 0xFFu;
|
|
const uint BrickThread = PixelIndex - ( BrickLaneData >> 8 );
|
|
|
|
const uint3 BrickData = GroupBrickData[ BrickLane ];
|
|
|
|
#if BRICK_TRACE_APPROXIMATE_DIVIDE
|
|
const int BrickY = MulU24( BrickThread, BrickData.x & 0xFFFFu ) >> 15;
|
|
const int BrickX = MadI24( (int)BrickY, ( (int)BrickData.x >> 16 ), BrickThread);
|
|
#else
|
|
const int BrickY = floor( ( BrickThread + 0.5f ) / -(int)BrickData.x );
|
|
const int BrickX = MadI24( BrickY, BrickData.x, BrickThread );
|
|
#endif
|
|
|
|
const float BrickRectMaxZ = asfloat( BrickData.y );
|
|
const int2 BrickPixelPos = int2( BrickData.z & 0xFFFF, BrickData.z >> 16 ) + int2( BrickX, BrickY );
|
|
|
|
bool bActive = PixelIndex < TotalPixels;
|
|
|
|
BRANCH
|
|
if( bActive )
|
|
{
|
|
bActive = OcclusionTestPixel( Raster, BrickPixelPos, BrickRectMaxZ );
|
|
}
|
|
|
|
BRANCH
|
|
if( WaveActiveAnyTrue( bActive ) )
|
|
{
|
|
if( bActive )
|
|
{
|
|
const uint TaskIndex = QueueReadOffset + QueueNumElements + WavePrefixCountBits( true );
|
|
const uint WriteIndex = TaskIndex & 63;
|
|
GroupSourceLaneAndPixelPos[ WriteIndex ] = BrickLane | ( BrickPixelPos.x << 5 ) | ( BrickPixelPos.y << 19 );
|
|
}
|
|
|
|
QueueNumElements += WaveActiveCountBits( bActive );
|
|
|
|
BRANCH
|
|
if( QueueNumElements >= 32 )
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ProcessBrickPixelBatchFromQueue( QueueNumElements, QueueReadOffset,
|
|
bIsOrtho, NaniteView, Raster, InstanceData, Cluster,
|
|
VisibleIndex, Cluster.LODError, RcpVoxelSize, Bias,
|
|
Brick_RayBase.Direction, Brick_RayDirection_dx, Brick_RayDirection_dy,
|
|
Brick_RayBase.Origin, Brick_RayOrigin_dx, Brick_RayOrigin_dy,
|
|
Brick.ReverseBrickBits, BrickMax_BrickIndex, CenterPixelClip.w, GroupThreadIndex );
|
|
}
|
|
}
|
|
}
|
|
#else // !BRICK_TRACE_TRANSPOSE
|
|
|
|
int2 PixelPos = MinPixels;
|
|
|
|
bool bLaneActive = BrickIndex < Cluster.BrickDataNum;
|
|
while( WaveActiveAnyTrue( bLaneActive ) )
|
|
{
|
|
bool bActive = bLaneActive;
|
|
|
|
BRANCH
|
|
if( bActive )
|
|
{
|
|
bActive = OcclusionTestPixel( Raster, PixelPos, FrustumCull.RectMax.z );
|
|
}
|
|
|
|
BRANCH
|
|
if( WaveActiveAnyTrue( bActive ) )
|
|
{
|
|
if( bActive )
|
|
{
|
|
const uint TaskIndex = QueueReadOffset + QueueNumElements + WavePrefixCountBits( bActive );
|
|
const uint WriteIndex = TaskIndex & 63;
|
|
|
|
GroupSourceLaneAndPixelPos[ WriteIndex ] = GroupThreadIndex | ( PixelPos.x << 5 ) | ( PixelPos.y << 19 );
|
|
}
|
|
|
|
QueueNumElements += WaveActiveCountBits( bActive );
|
|
BRANCH
|
|
if (QueueNumElements >= 32)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ProcessBrickPixelBatchFromQueue( QueueNumElements, QueueReadOffset,
|
|
bIsOrtho, NaniteView, Raster, InstanceData, Cluster,
|
|
VisibleIndex, Cluster.LODError, RcpVoxelSize, Bias,
|
|
Brick_RayBase.Direction, Brick_RayDirection_dx, Brick_RayDirection_dy,
|
|
Brick_RayBase.Origin, Brick_RayOrigin_dx, Brick_RayOrigin_dy,
|
|
Brick.ReverseBrickBits, BrickMax_BrickIndex, CenterPixelClip.w, GroupThreadIndex );
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
}
|
|
|
|
if( PixelPos.x < MaxPixels.x )
|
|
{
|
|
PixelPos.x++;
|
|
}
|
|
else if( PixelPos.y < MaxPixels.y )
|
|
{
|
|
PixelPos.y++;
|
|
PixelPos.x = MinPixels.x;
|
|
}
|
|
else
|
|
{
|
|
bLaneActive = false;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
BRANCH
|
|
if( QueueNumElements > 0 )
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ProcessBrickPixelBatchFromQueue( QueueNumElements, QueueReadOffset,
|
|
bIsOrtho, NaniteView, Raster, InstanceData, Cluster,
|
|
VisibleIndex, Cluster.LODError, RcpVoxelSize, Bias,
|
|
Brick_RayBase.Direction, Brick_RayDirection_dx, Brick_RayDirection_dy,
|
|
Brick_RayBase.Origin, Brick_RayOrigin_dx, Brick_RayOrigin_dy,
|
|
Brick.ReverseBrickBits, BrickMax_BrickIndex, CenterPixelClip.w, GroupThreadIndex );
|
|
}
|
|
|
|
#else // !BRICK_TRACE_WORK_REDISTRIBUTION
|
|
BRANCH
|
|
if( BrickIndex >= Cluster.BrickDataNum )
|
|
break;
|
|
|
|
const uint PixelValue = ( ( VisibleIndex + 1 ) << 7 ) | BrickIndex;
|
|
int2 PixelPos = MinPixels;
|
|
while( true )
|
|
{
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, PixelValue, FrustumCull.RectMax.z );
|
|
|
|
bool bDepthPassed = true;
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Pixel.PhysicalPosition.xy = Pixel.Position;
|
|
Pixel.PhysicalPosition.z = Raster.ArrayIndex;
|
|
if( !Raster.bSinglePage )
|
|
{
|
|
FCachedPageTable PageTranslation;
|
|
if( !PageTranslation( Pixel ) )
|
|
bDepthPassed = false;
|
|
}
|
|
#endif
|
|
|
|
if( bDepthPassed )
|
|
bDepthPassed = Pixel.EarlyDepthTest();
|
|
|
|
BRANCH
|
|
if( bDepthPassed )
|
|
{
|
|
FRay Ray = Brick_RayBase;
|
|
|
|
if( CONSTANT_DIR || bIsOrtho )
|
|
{
|
|
Ray.Origin += Brick_RayOrigin_dx * PixelPos.x + Brick_RayOrigin_dy * PixelPos.y;
|
|
}
|
|
else
|
|
{
|
|
Ray.Direction += Brick_RayDirection_dx * PixelPos.x + Brick_RayDirection_dy * PixelPos.y;
|
|
}
|
|
|
|
ProcessBrickPixel( NaniteView, Raster, InstanceData,
|
|
Ray, bIsOrtho,
|
|
PixelPos, PixelValue, Brick.ReverseBrickBits, LocalVoxelBoundsExtent,
|
|
Cluster.LODError, RcpVoxelSize, Bias );
|
|
}
|
|
|
|
if( PixelPos.x < MaxPixels.x )
|
|
{
|
|
PixelPos.x++;
|
|
}
|
|
else if( PixelPos.y < MaxPixels.y )
|
|
{
|
|
PixelPos.y++;
|
|
PixelPos.x = MinPixels.x;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
#if WORKGRAPH_NODE
|
|
[Shader("node")]
|
|
[NodeLaunch("broadcasting")]
|
|
[NodeMaxDispatchGrid(65535,1,1)]
|
|
#endif
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void MicropolyRasterize(
|
|
uint DispatchThreadID : SV_DispatchThreadID,
|
|
uint GroupID : SV_GroupID,
|
|
uint GroupIndex : SV_GroupIndex
|
|
#if WORKGRAPH_NODE
|
|
, DispatchNodeInputRecord<FShaderBundleNodeRecord> InputRecord
|
|
#endif
|
|
)
|
|
{
|
|
#if NANITE_VOXELS
|
|
ClusterTraceBricks( GroupID, GroupIndex );
|
|
#elif PATCHES
|
|
PatchRasterize( GroupID, GroupIndex );
|
|
#else
|
|
ClusterRasterize( GroupID, GroupIndex );
|
|
#endif
|
|
}
|
|
|
|
|
|
#define VERTEX_TO_TRIANGLE_MASKS (NANITE_PRIM_SHADER && (!DEPTH_ONLY || NANITE_PIXEL_PROGRAMMABLE))
|
|
|
|
#ifndef NANITE_ALLOW_SV_BARYCENTRICS
|
|
#define NANITE_ALLOW_SV_BARYCENTRICS 1
|
|
#endif
|
|
|
|
// Use barycentric intrinsics when available, otherwise prefer SV_Barycentrics.
|
|
// If all else fails export them explicitly (incompatible with vertex reuse).
|
|
#define BARYCENTRIC_MODE_NONE (!NANITE_PIXEL_PROGRAMMABLE)
|
|
#define BARYCENTRIC_MODE_INTRINSICS (!BARYCENTRIC_MODE_NONE && (NANITE_MESH_SHADER || NANITE_PRIM_SHADER) && COMPILER_SUPPORTS_BARYCENTRIC_INTRINSICS)
|
|
#define BARYCENTRIC_MODE_SV_BARYCENTRICS (!BARYCENTRIC_MODE_NONE && NANITE_MESH_SHADER && NANITE_ALLOW_SV_BARYCENTRICS && !COMPILER_SUPPORTS_BARYCENTRIC_INTRINSICS)
|
|
#define BARYCENTRIC_MODE_EXPORT (!BARYCENTRIC_MODE_NONE && !BARYCENTRIC_MODE_INTRINSICS && !BARYCENTRIC_MODE_SV_BARYCENTRICS)
|
|
|
|
struct PrimitiveAttributes
|
|
{
|
|
uint PixelValue;
|
|
uint ViewId;
|
|
bool bSwapVW;
|
|
uint MipLevel;
|
|
uint ArrayIndex;
|
|
uint LevelOffset;
|
|
uint4 ViewRect;
|
|
};
|
|
|
|
struct PrimitiveAttributesPacked
|
|
{
|
|
// Use uint4 to prevent compiler from erroneously packing per-vertex and per-prim attributes together
|
|
nointerpolation uint4 PixelValue_ViewId_SwapVW_Mip_ArrayIndex_LevelOffset_ViewRect : TEXCOORD1;
|
|
};
|
|
|
|
struct VSOut
|
|
{
|
|
#if NANITE_HW_RASTER_INTERPOLATE_DEPTH
|
|
float2 ClipZW : TEXCOORD0;
|
|
#endif
|
|
|
|
#if !NANITE_MESH_SHADER
|
|
PrimitiveAttributesPacked PrimitivePacked;
|
|
#endif
|
|
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
CUSTOM_INTERPOLATION uint2 ToTriangleMask_TriRangeStart : TEXCOORD3;
|
|
#else
|
|
CUSTOM_INTERPOLATION uint4 ToTriangleMasks : TEXCOORD3;
|
|
#endif
|
|
#endif
|
|
|
|
#if BARYCENTRIC_MODE_INTRINSICS
|
|
CUSTOM_INTERPOLATION uint VertexID : TEXCOORD4;
|
|
#elif BARYCENTRIC_MODE_SV_BARYCENTRICS && PIXELSHADER
|
|
float3 Barycentrics : SV_Barycentrics;
|
|
#elif BARYCENTRIC_MODE_EXPORT
|
|
float2 BarycentricsUV : TEXCOORD4;
|
|
#endif
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE
|
|
float4 TexCoords : TEXCOORD5;
|
|
#endif
|
|
|
|
float4 Position : SV_Position;
|
|
|
|
#if USE_GLOBAL_CLIP_PLANE && !PIXELSHADER
|
|
float OutGlobalClipPlaneDistance : SV_ClipDistance;
|
|
#endif
|
|
};
|
|
|
|
PrimitiveAttributesPacked PackPrimitiveAttributes(PrimitiveAttributes In)
|
|
{
|
|
uint4 PackedData = uint4(In.PixelValue, In.ViewId, 0u, 0u);
|
|
|
|
PackedData.y |= (In.bSwapVW ? (1u << 16) : 0u);
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
PackedData.y |= (In.MipLevel << 18) | (In.ArrayIndex << 23);
|
|
PackedData.z = In.LevelOffset;
|
|
|
|
// xy: VisibleCluster.vPage * VSM_PAGE_SIZE
|
|
// zw: VisibleCluster.vPageEnd * VSM_PAGE_SIZE + VSM_PAGE_SIZE
|
|
const uint2 vPage = In.ViewRect.xy / VSM_PAGE_SIZE;
|
|
const uint2 vPageEnd = (In.ViewRect.zw - VSM_PAGE_SIZE) / VSM_PAGE_SIZE;
|
|
const uint2 vPageDelta = vPageEnd - vPage;
|
|
|
|
// 3-bit delta. This must match the logic in UnpackVisibleCluster() in NaniteDecode.ush
|
|
PackedData.w = ((vPageDelta.y << 29u) | (vPageDelta.x << 26u) | (vPage.y << 13u) | vPage.x);
|
|
#else
|
|
PackedData.zw = uint2((In.ViewRect.y << 16u) | In.ViewRect.x, (In.ViewRect.w << 16u) | In.ViewRect.z);
|
|
#endif
|
|
|
|
PrimitiveAttributesPacked Out;
|
|
Out.PixelValue_ViewId_SwapVW_Mip_ArrayIndex_LevelOffset_ViewRect = PackedData;
|
|
return Out;
|
|
}
|
|
|
|
PrimitiveAttributes UnpackPrimitiveAttributes(PrimitiveAttributesPacked In)
|
|
{
|
|
const uint4 PackedData = In.PixelValue_ViewId_SwapVW_Mip_ArrayIndex_LevelOffset_ViewRect;
|
|
|
|
PrimitiveAttributes Out = (PrimitiveAttributes)0;
|
|
Out.PixelValue = PackedData.x;
|
|
Out.ViewId = BitFieldExtractU32(PackedData.y, 16, 0);
|
|
Out.bSwapVW = BitFieldExtractU32(PackedData.y, 1, 16);
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Out.MipLevel = BitFieldExtractU32(PackedData.y, 5, 18);
|
|
Out.ArrayIndex = PackedData.y >> 23;
|
|
Out.LevelOffset = PackedData.z;
|
|
|
|
const uint2 vPage = uint2(BitFieldExtractU32(PackedData.w, 13, 0), BitFieldExtractU32(PackedData.w, 13, 13));
|
|
const uint2 vPageDelta = uint2(BitFieldExtractU32(PackedData.w, 3, 26), BitFieldExtractU32(PackedData.w, 3, 29));
|
|
const uint2 vPageEnd = vPage + vPageDelta;
|
|
Out.ViewRect = uint4(vPage * VSM_PAGE_SIZE, vPageEnd * VSM_PAGE_SIZE + VSM_PAGE_SIZE);
|
|
#else
|
|
Out.ViewRect.x = BitFieldExtractU32(PackedData.z, 16, 0);
|
|
Out.ViewRect.y = BitFieldExtractU32(PackedData.z, 16, 16);
|
|
Out.ViewRect.z = BitFieldExtractU32(PackedData.w, 16, 0);
|
|
Out.ViewRect.w = BitFieldExtractU32(PackedData.w, 16, 16);
|
|
#endif
|
|
|
|
return Out;
|
|
}
|
|
|
|
PrimitiveAttributes MakePrimitiveAttributes(FNaniteView NaniteView, FVisibleCluster VisibleCluster, uint PixelValue, bool bReverseWindingOrder)
|
|
{
|
|
PrimitiveAttributes Out = (PrimitiveAttributes)0;
|
|
|
|
Out.PixelValue = PixelValue;
|
|
Out.ViewId = VisibleCluster.ViewId;
|
|
|
|
#if BARYCENTRIC_MODE_SV_BARYCENTRICS || BARYCENTRIC_MODE_EXPORT
|
|
// Set SwapVW flag to indicate that the V and W barycentrics need to be swapped in the PS to compensate for the swapping of the i1 and i2 vertices.
|
|
// BARYCENTRIC_MODE_EXPORT doesn't need this as it compensates by flipping the exported barycentrics instead.
|
|
Out.bSwapVW = bReverseWindingOrder;
|
|
#endif
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
Out.MipLevel = NaniteView.TargetMipLevel;
|
|
const bool bCacheAsStatic = (VisibleCluster.Flags & NANITE_CULLING_FLAG_CACHE_AS_STATIC) != 0u;
|
|
Out.ArrayIndex = bCacheAsStatic ? GetVirtualShadowMapStaticArrayIndex() : 0;
|
|
Out.LevelOffset = CalcPageTableLevelOffset(NaniteView.TargetLayerIndex, NaniteView.TargetMipLevel).GetPacked();
|
|
Out.ViewRect = uint4(VisibleCluster.vPage * VSM_PAGE_SIZE, VisibleCluster.vPageEnd * VSM_PAGE_SIZE + VSM_PAGE_SIZE);
|
|
#else
|
|
Out.ViewRect = NaniteView.ViewRect;
|
|
#endif
|
|
|
|
return Out;
|
|
}
|
|
|
|
VSOut CommonRasterizerVS(FNaniteView NaniteView, FPrimitiveSceneData PrimitiveData, FInstanceSceneData InstanceData, FVisibleCluster VisibleCluster, FCluster Cluster, uint VertIndex, uint PixelValue, bool bReverseWindingOrder)
|
|
{
|
|
VSOut Out;
|
|
|
|
FNanitePostDeformVertex InputVert = FetchAndDeformLocalNaniteVertex(PrimitiveData, InstanceData, GetInstanceViewData(InstanceData.InstanceId, NaniteView.SceneRendererPrimaryViewId), Cluster, VisibleCluster, VertIndex, NANITE_NUM_TEXCOORDS_TO_DECODE_HW_VS);
|
|
|
|
float3 WorldPositionOffset = 0.0f;
|
|
FMaterialShader MaterialShader;
|
|
MaterialShader.PrimitiveData = PrimitiveData;
|
|
MaterialShader.InstanceData = InstanceData;
|
|
MaterialShader.InstanceDynamicData = CalculateInstanceDynamicData(NaniteView, InstanceData);
|
|
MaterialShader.NaniteView = NaniteView;
|
|
MaterialShader.Cluster = Cluster;
|
|
MaterialShader.VisibleCluster = VisibleCluster;
|
|
|
|
#if MATERIAL_SHADER_HAS_DISPLACEMENT
|
|
MaterialShader.InitDisplacement(RasterBinMeta[GetRasterBin()].MaterialDisplacementParams);
|
|
MaterialShader.ApplyFallbackDisplacement(InputVert);
|
|
#endif
|
|
|
|
MaterialShader.InitVertexParameters(InputVert);
|
|
|
|
BRANCH
|
|
if ( (VisibleCluster.Flags & NANITE_CULLING_FLAG_ENABLE_WPO) != 0 )
|
|
{
|
|
WorldPositionOffset = MaterialShader.EvaluateWorldPositionOffset();
|
|
}
|
|
|
|
const float3 PointTranslatedWorld = DFTransformLocalToTranslatedWorld(InputVert.Position, InstanceData.LocalToWorld, NaniteView.PreViewTranslation).xyz + WorldPositionOffset;
|
|
float4 PointClip = mul( float4( PointTranslatedWorld, 1 ), NaniteView.TranslatedWorldToClip );
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
/*
|
|
float2 vUV = PointClip.xy * float2(0.5, -0.5) + 0.5 * PointClip.w;
|
|
float2 vPixels = vUV * NaniteView.ViewSizeAndInvSize.xy;
|
|
float2 LocalPixels = vPixels - VisibleCluster.vPage * VSM_PAGE_SIZE * PointClip.w;
|
|
float2 LocalUV = LocalPixels / ( 4 * VSM_PAGE_SIZE );
|
|
float2 LocalClip = LocalUV * float2(2, -2) + float2(-1, 1) * PointClip.w;
|
|
PointClip.xy = LocalClip;
|
|
*/
|
|
PointClip.xy = NaniteView.ClipSpaceScaleOffset.xy * PointClip.xy + NaniteView.ClipSpaceScaleOffset.zw * PointClip.w;
|
|
|
|
// Offset 0,0 to be at vPage for a 0, VSM_PAGE_SIZE * VSM_RASTER_WINDOW_PAGES viewport.
|
|
PointClip.xy += PointClip.w * ( float2(-2, 2) / VSM_RASTER_WINDOW_PAGES ) * VisibleCluster.vPage;
|
|
#else
|
|
PointClip.xy = NaniteView.ClipSpaceScaleOffset.xy * PointClip.xy + NaniteView.ClipSpaceScaleOffset.zw * PointClip.w;
|
|
#endif
|
|
|
|
#if !NANITE_MESH_SHADER
|
|
Out.PrimitivePacked = PackPrimitiveAttributes(MakePrimitiveAttributes(NaniteView, VisibleCluster, PixelValue, bReverseWindingOrder));
|
|
#endif
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE && NUM_TEX_COORD_INTERPOLATORS > 0
|
|
float2 CustomizedUVs[NUM_TEX_COORD_INTERPOLATORS];
|
|
MaterialShader.GetCustomizedUVs(CustomizedUVs);
|
|
#endif
|
|
|
|
#if NANITE_PIXEL_PROGRAMMABLE
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 1
|
|
Out.TexCoords.xy = CustomizedUVs[0];
|
|
Out.TexCoords.zw = CustomizedUVs[1];
|
|
#elif NUM_TEX_COORD_INTERPOLATORS > 0
|
|
Out.TexCoords.xy = CustomizedUVs[0];
|
|
Out.TexCoords.zw = InputVert.RawAttributeData.TexCoords[1];
|
|
#else
|
|
Out.TexCoords.xy = InputVert.RawAttributeData.TexCoords[0];
|
|
Out.TexCoords.zw = InputVert.RawAttributeData.TexCoords[1];
|
|
#endif
|
|
#endif
|
|
|
|
#if MATERIAL_CACHE
|
|
#if NUM_MATERIAL_OUTPUTS_GETMATERIALCACHE > 0
|
|
float2 MaterialCacheUV = GetMaterialCache1(MaterialShader.VertexParameters);
|
|
#else // NUM_MATERIAL_OUTPUTS_GETMATERIALCACHE > 0
|
|
float2 MaterialCacheUV = InputVert.RawAttributeData.TexCoords[0];
|
|
#endif // NUM_MATERIAL_OUTPUTS_GETMATERIALCACHE
|
|
|
|
PointClip = GetMaterialCacheUnwrapClipPosition(MaterialCacheUV, NaniteView.MaterialCacheUnwrapMinAndInvSize, NaniteView.MaterialCachePageAdvanceAndInvCount.xy);
|
|
#endif // MATERIAL_CACHE
|
|
|
|
#if !PIXELSHADER
|
|
Out.Position = PointClip;
|
|
|
|
#if NANITE_HW_RASTER_INTERPOLATE_DEPTH
|
|
Out.ClipZW = PointClip.zw;
|
|
#endif
|
|
|
|
const bool bNearClip = ((NaniteView.Flags & NANITE_VIEW_FLAG_NEAR_CLIP) != 0u);
|
|
if (!bNearClip)
|
|
{
|
|
// Shader workaround to avoid HW depth clipping. Should be replaced with rasterizer state ideally.
|
|
Out.Position.z = 0.5f * Out.Position.w;
|
|
}
|
|
#endif
|
|
|
|
#if BARYCENTRIC_MODE_INTRINSICS
|
|
Out.VertexID = VertIndex;
|
|
#endif
|
|
|
|
#if USE_GLOBAL_CLIP_PLANE && !PIXELSHADER
|
|
Out.OutGlobalClipPlaneDistance = GetGlobalClipPlaneDistance(NaniteView, PointTranslatedWorld);
|
|
#endif
|
|
|
|
return Out;
|
|
}
|
|
|
|
#if NANITE_PRIM_SHADER
|
|
|
|
#pragma argument(realtypes)
|
|
|
|
struct PrimitiveInput
|
|
{
|
|
uint Index : PRIM_SHADER_SEM_VERT_INDEX;
|
|
#if !NANITE_VERT_REUSE_BATCH
|
|
uint WaveIndex : PRIM_SHADER_SEM_WAVE_INDEX;
|
|
#endif
|
|
};
|
|
|
|
struct PrimitiveOutput
|
|
{
|
|
VSOut Out;
|
|
|
|
uint PrimExport : PRIM_SHADER_SEM_PRIM_EXPORT;
|
|
uint VertCount : PRIM_SHADER_SEM_VERT_COUNT;
|
|
uint PrimCount : PRIM_SHADER_SEM_PRIM_COUNT;
|
|
};
|
|
|
|
uint PackTriangleExport(uint3 TriangleIndices)
|
|
{
|
|
return TriangleIndices.x | (TriangleIndices.y << 10) | (TriangleIndices.z << 20);
|
|
}
|
|
|
|
uint3 UnpackTriangleExport(uint Packed)
|
|
{
|
|
const uint Index0 = (Packed & 0x3FF);
|
|
const uint Index1 = (Packed >> 10) & 0x3FF;
|
|
const uint Index2 = (Packed >> 20);
|
|
return uint3(Index0, Index1, Index2);
|
|
}
|
|
|
|
#define NUM_VERTEX_MASKS ((NANITE_MAX_CLUSTER_VERTICES + 31)/32)
|
|
|
|
groupshared union
|
|
{
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
uint VertexToTriangleMasks[NANITE_MAX_CLUSTER_VERTICES][4];
|
|
#endif
|
|
struct
|
|
{
|
|
uint ClusterIndex; // NOTE: Overlapping ClusterIndex with VertexToTriangleMasks reduces peak LDS usage because of allocation granularity.
|
|
uint ReferencedVerticesMasks[NUM_VERTEX_MASKS];
|
|
uint ReferencedVerticesPrefixSums[NUM_VERTEX_MASKS];
|
|
uchar NewToOldVertex[NANITE_MAX_CLUSTER_VERTICES];
|
|
uchar OldToNewVertex[NANITE_MAX_CLUSTER_VERTICES];
|
|
} S;
|
|
} LDS;
|
|
|
|
groupshared uint GroupVertToTriMasks[32];
|
|
|
|
PRIM_SHADER_OUTPUT_TRIANGLES
|
|
PRIM_SHADER_PRIM_COUNT(1)
|
|
PRIM_SHADER_VERT_COUNT(1)
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
PRIM_SHADER_VERT_LIMIT(32)
|
|
PRIM_SHADER_AMP_FACTOR(32)
|
|
#else
|
|
PRIM_SHADER_VERT_LIMIT(256)
|
|
PRIM_SHADER_AMP_FACTOR(128)
|
|
#endif
|
|
PRIM_SHADER_AMP_ENABLE
|
|
PrimitiveOutput HWRasterizeVS(PrimitiveInput Input)
|
|
{
|
|
const uint LaneIndex = WaveGetLaneIndex();
|
|
const uint LaneCount = WaveGetLaneCount();
|
|
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
const uint GroupThreadID = LaneIndex;
|
|
uint VisibleIndex = WaveReadLaneAt(Input.Index, 0);
|
|
#else
|
|
const uint GroupThreadID = LaneIndex + Input.WaveIndex * LaneCount;
|
|
|
|
if (GroupThreadID == 0)
|
|
{
|
|
// Input index is only initialized for lane 0, so we need to manually communicate it to all other threads in subgroup (not just wavefront).
|
|
LDS.S.ClusterIndex = Input.Index;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
uint VisibleIndex = LDS.S.ClusterIndex;
|
|
#endif
|
|
|
|
FTriRange TriRange = GetIndexAndTriRangeHW( VisibleIndex );
|
|
|
|
// Should be all scalar.
|
|
FVisibleCluster VisibleCluster = GetVisibleCluster( VisibleIndex, VIRTUAL_TEXTURE_TARGET );
|
|
|
|
FPrimitiveSceneData PrimitiveData;
|
|
FInstanceSceneData InstanceData;
|
|
GetNaniteMaterialSceneData(VisibleCluster, PrimitiveData, InstanceData);
|
|
|
|
FNaniteView NaniteView = GetNaniteView( VisibleCluster.ViewId );
|
|
const bool bReverseWindingOrder = ReverseWindingOrder(NaniteView, PrimitiveData, InstanceData);
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
if( TriRange.Num == 0 )
|
|
TriRange.Num = Cluster.NumTris;
|
|
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
GroupVertToTriMasks[GroupThreadID] = 0;
|
|
#endif
|
|
|
|
const uint TriIndex = TriRange.Start + GroupThreadID;
|
|
|
|
bool bTriValid = GroupThreadID < TriRange.Num;
|
|
|
|
uint3 VertIndexes = 0;
|
|
if (bTriValid)
|
|
{
|
|
VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
}
|
|
|
|
uint NumUniqueVerts;
|
|
uint3 VertLaneIndexes;
|
|
uint LaneVertIndex;
|
|
DeduplicateVertIndexes(VertIndexes, GroupThreadID, bTriValid, NumUniqueVerts, LaneVertIndex, VertLaneIndexes);
|
|
|
|
PrimitiveOutput PrimOutput;
|
|
PrimOutput.VertCount = NumUniqueVerts;
|
|
PrimOutput.PrimCount = TriRange.Num;
|
|
|
|
if (GroupThreadID < NumUniqueVerts)
|
|
{
|
|
const uint PixelValue = (VisibleIndex + 1) << 7;
|
|
PrimOutput.Out = CommonRasterizerVS(NaniteView, PrimitiveData, InstanceData, VisibleCluster, Cluster, LaneVertIndex, PixelValue, bReverseWindingOrder);
|
|
}
|
|
|
|
if (bTriValid)
|
|
{
|
|
PrimOutput.PrimExport = PackTriangleExport(VertLaneIndexes);
|
|
}
|
|
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
if (bTriValid)
|
|
{
|
|
InterlockedOr(GroupVertToTriMasks[VertLaneIndexes.x], 1 << GroupThreadID);
|
|
InterlockedOr(GroupVertToTriMasks[VertLaneIndexes.y], 1 << GroupThreadID);
|
|
InterlockedOr(GroupVertToTriMasks[VertLaneIndexes.z], 1 << GroupThreadID);
|
|
}
|
|
|
|
GroupMemoryBarrier();
|
|
|
|
if (GroupThreadID < NumUniqueVerts)
|
|
{
|
|
PrimOutput.Out.ToTriangleMask_TriRangeStart = uint2(GroupVertToTriMasks[GroupThreadID], TriRange.Start);
|
|
}
|
|
#endif
|
|
|
|
#else // !NANITE_VERT_REUSE_BATCH
|
|
uint NumExportVertices = Cluster.NumVerts;
|
|
bool bNeedsCompaction = (TriRange.Num != Cluster.NumTris);
|
|
|
|
uint SrcVertexIndex = GroupThreadID;
|
|
uint3 VertIndexes;
|
|
if (GroupThreadID < TriRange.Num)
|
|
{
|
|
VertIndexes = DecodeTriangleIndices(Cluster, TriRange.Start + GroupThreadID);
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
}
|
|
|
|
BRANCH
|
|
if (bNeedsCompaction)
|
|
{
|
|
// Programmable raster renders a single material at a time, so clusters with multiple materials need to only
|
|
// export triangles from the current material. Unreferenced vertices are not allowed in primitive shaders,
|
|
// so we need to compact the vertices and remap any references.
|
|
|
|
// The expectation is that this path is going to be rare as most clusters will have just a single material and
|
|
// most materials will not need programmable raster.
|
|
|
|
if (GroupThreadID < NUM_VERTEX_MASKS)
|
|
{
|
|
// Clear vertex reference masks
|
|
LDS.S.ReferencedVerticesMasks[GroupThreadID] = 0u;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadID < TriRange.Num)
|
|
{
|
|
// Mark referenced vertices
|
|
InterlockedOr(LDS.S.ReferencedVerticesMasks[VertIndexes.x >> 5], 1u << (VertIndexes.x & 31));
|
|
InterlockedOr(LDS.S.ReferencedVerticesMasks[VertIndexes.y >> 5], 1u << (VertIndexes.y & 31));
|
|
InterlockedOr(LDS.S.ReferencedVerticesMasks[VertIndexes.z >> 5], 1u << (VertIndexes.z & 31));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadID < NUM_VERTEX_MASKS)
|
|
{
|
|
// Calculate dword prefix sums
|
|
const uint NumMaskBits = countbits(LDS.S.ReferencedVerticesMasks[GroupThreadID]);
|
|
LDS.S.ReferencedVerticesPrefixSums[GroupThreadID] = WavePrefixSum(NumMaskBits);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Update export vertices to number of referenced vertices
|
|
NumExportVertices = LDS.S.ReferencedVerticesPrefixSums[NUM_VERTEX_MASKS - 1] + countbits(LDS.S.ReferencedVerticesMasks[NUM_VERTEX_MASKS - 1]);
|
|
|
|
if (GroupThreadID < Cluster.NumVerts)
|
|
{
|
|
const uint DwordIndex = GroupThreadID >> 5;
|
|
const uint BitIndex = GroupThreadID & 31;
|
|
if (LDS.S.ReferencedVerticesMasks[DwordIndex] & (1u << BitIndex))
|
|
{
|
|
// Fill mappings between old and new (compact) vertex indices
|
|
const uint NewVertexIndex = LDS.S.ReferencedVerticesPrefixSums[DwordIndex] + countbits(BitFieldExtractU32(LDS.S.ReferencedVerticesMasks[DwordIndex], BitIndex, 0));
|
|
LDS.S.OldToNewVertex[GroupThreadID] = (uchar)NewVertexIndex;
|
|
LDS.S.NewToOldVertex[NewVertexIndex] = (uchar)GroupThreadID;
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadID < TriRange.Num)
|
|
{
|
|
// Remap triangles to new vertex indices
|
|
VertIndexes = uint3(LDS.S.OldToNewVertex[VertIndexes.x], LDS.S.OldToNewVertex[VertIndexes.y], LDS.S.OldToNewVertex[VertIndexes.z]);
|
|
}
|
|
if (GroupThreadID < NumExportVertices)
|
|
{
|
|
// Remap source vertex from compact to old
|
|
SrcVertexIndex = LDS.S.NewToOldVertex[GroupThreadID];
|
|
}
|
|
}
|
|
|
|
PrimitiveOutput PrimOutput;
|
|
PrimOutput.VertCount = NumExportVertices;
|
|
PrimOutput.PrimCount = TriRange.Num;
|
|
|
|
if (GroupThreadID < TriRange.Num)
|
|
{
|
|
PrimOutput.PrimExport = PackTriangleExport(VertIndexes);
|
|
}
|
|
|
|
if (GroupThreadID < NumExportVertices)
|
|
{
|
|
const uint PixelValue = ((VisibleIndex + 1) << 7);
|
|
PrimOutput.Out = CommonRasterizerVS(NaniteView, PrimitiveData, InstanceData, VisibleCluster, Cluster, SrcVertexIndex, PixelValue, bReverseWindingOrder);
|
|
}
|
|
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
GroupMemoryBarrierWithGroupSync(); // Sync to make sure there is no lifetime overlap with LDS.S
|
|
|
|
if (GroupThreadID < NumExportVertices)
|
|
{
|
|
LDS.VertexToTriangleMasks[GroupThreadID][0] = 0;
|
|
LDS.VertexToTriangleMasks[GroupThreadID][1] = 0;
|
|
LDS.VertexToTriangleMasks[GroupThreadID][2] = 0;
|
|
LDS.VertexToTriangleMasks[GroupThreadID][3] = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadID < TriRange.Num)
|
|
{
|
|
const uint TriangleID = TriRange.Start + GroupThreadID;
|
|
const uint DwordIndex = (TriangleID >> 5) & 3;
|
|
const uint TriangleMask = 1 << (TriangleID & 31);
|
|
InterlockedOr(LDS.VertexToTriangleMasks[VertIndexes.x][DwordIndex], TriangleMask);
|
|
InterlockedOr(LDS.VertexToTriangleMasks[VertIndexes.y][DwordIndex], TriangleMask);
|
|
InterlockedOr(LDS.VertexToTriangleMasks[VertIndexes.z][DwordIndex], TriangleMask);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadID < NumExportVertices)
|
|
{
|
|
PrimOutput.Out.ToTriangleMasks = uint4( LDS.VertexToTriangleMasks[GroupThreadID][0],
|
|
LDS.VertexToTriangleMasks[GroupThreadID][1],
|
|
LDS.VertexToTriangleMasks[GroupThreadID][2],
|
|
LDS.VertexToTriangleMasks[GroupThreadID][3]);
|
|
}
|
|
#endif
|
|
#endif // NANITE_VERT_REUSE_BATCH
|
|
|
|
return PrimOutput;
|
|
}
|
|
|
|
#elif NANITE_MESH_SHADER
|
|
|
|
#if MESHSHADER || WORKGRAPH_NODE
|
|
|
|
#if WORKGRAPH_NODE
|
|
[Shader("node")]
|
|
[NodeLaunch("mesh")]
|
|
[NodeMaxDispatchGrid(65535,1,1)]
|
|
#endif
|
|
MESH_SHADER_TRIANGLE_ATTRIBUTES(NANITE_MESH_SHADER_TG_SIZE)
|
|
void HWRasterizeMS(
|
|
uint GroupThreadID : SV_GroupThreadID,
|
|
uint3 GroupID : SV_GroupID,
|
|
#if WORKGRAPH_NODE
|
|
DispatchNodeInputRecord<FShaderBundleNodeRecord> InputRecord,
|
|
#endif
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
MESH_SHADER_VERTEX_EXPORT(VSOut, 32),
|
|
MESH_SHADER_TRIANGLE_EXPORT(32),
|
|
MESH_SHADER_PRIMITIVE_EXPORT(PrimitiveAttributesPacked, 32)
|
|
#else
|
|
MESH_SHADER_VERTEX_EXPORT(VSOut, 256),
|
|
MESH_SHADER_TRIANGLE_EXPORT(128),
|
|
MESH_SHADER_PRIMITIVE_EXPORT(PrimitiveAttributesPacked, 128)
|
|
#endif
|
|
)
|
|
{
|
|
bool bValidIndex = true;
|
|
|
|
#if PLATFORM_REQUIRES_UNWRAPPED_MESH_SHADER_ARGS
|
|
uint VisibleIndex = GroupID.x;
|
|
#else
|
|
// Avoid overflowing the 64k limit on single dimension of SV_GroupID
|
|
uint VisibleIndex = GetUnWrappedDispatchGroupId(GroupID);
|
|
BRANCH
|
|
if (GroupID.y > 0 || GroupID.z > 0)
|
|
{
|
|
// Due to wrapping, the visible index can be out of range
|
|
bValidIndex = (VisibleIndex < RasterBinMeta[GetRasterBin()].BinHWCount);
|
|
}
|
|
#endif
|
|
|
|
// NOTE: Doing a simple early out here doesn't work. Likely because divergent control
|
|
// flow is not allowed around SetMeshOutputCounts, even if the condition is uniform for
|
|
// the group. The compiler succeeds but corruption occurs.
|
|
|
|
FTriRange TriRange;
|
|
FVisibleCluster VisibleCluster;
|
|
FInstanceSceneData InstanceData;
|
|
FPrimitiveSceneData PrimitiveData;
|
|
FNaniteView NaniteView;
|
|
|
|
uint NumUniqueVerts = 0;
|
|
uint3 VertIndexes = 0;
|
|
TriRange.Num = 0;
|
|
uint TriIndex = 0;
|
|
FCluster Cluster;
|
|
uint LaneVertIndex = 0;
|
|
bool bReverseWindingOrder = false;
|
|
|
|
BRANCH
|
|
if (bValidIndex)
|
|
{
|
|
TriRange = GetIndexAndTriRangeHW(VisibleIndex);
|
|
VisibleCluster = GetVisibleCluster(VisibleIndex, VIRTUAL_TEXTURE_TARGET);
|
|
GetNaniteMaterialSceneData(VisibleCluster, PrimitiveData, InstanceData);
|
|
NaniteView = GetNaniteView(VisibleCluster.ViewId);
|
|
bReverseWindingOrder = ReverseWindingOrder(NaniteView, PrimitiveData, InstanceData);
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
if( TriRange.Num == 0 )
|
|
TriRange.Num = Cluster.NumTris;
|
|
|
|
TriIndex = TriRange.Start + GroupThreadID;
|
|
|
|
bool bTriValid = GroupThreadID < TriRange.Num;
|
|
|
|
if (bTriValid)
|
|
{
|
|
VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
}
|
|
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
DeduplicateVertIndexes(VertIndexes, GroupThreadID, bTriValid, NumUniqueVerts, LaneVertIndex, VertIndexes);
|
|
#else
|
|
LaneVertIndex = GroupThreadID;
|
|
NumUniqueVerts = Cluster.NumVerts;
|
|
#endif
|
|
}
|
|
|
|
SetMeshOutputCounts(NumUniqueVerts, TriRange.Num);
|
|
|
|
BRANCH
|
|
if (bValidIndex)
|
|
{
|
|
uint PrimExportIndex = GroupThreadID;
|
|
if (PrimExportIndex < TriRange.Num)
|
|
{
|
|
MESH_SHADER_WRITE_TRIANGLE(PrimExportIndex, VertIndexes);
|
|
|
|
const uint PixelValue = ((VisibleIndex + 1) << 7) | TriIndex;
|
|
PrimitiveAttributes Attributes = MakePrimitiveAttributes(NaniteView, VisibleCluster, PixelValue, bReverseWindingOrder);
|
|
PrimitiveAttributesPacked AttributesPacked = PackPrimitiveAttributes(Attributes);
|
|
MESH_SHADER_WRITE_PRIMITIVE(PrimExportIndex, AttributesPacked);
|
|
}
|
|
|
|
uint VertExportIndex = GroupThreadID;
|
|
if (VertExportIndex < Cluster.NumVerts)
|
|
{
|
|
VSOut VertexOutput = CommonRasterizerVS(NaniteView, PrimitiveData, InstanceData, VisibleCluster, Cluster, LaneVertIndex, 0u, bReverseWindingOrder);
|
|
MESH_SHADER_WRITE_VERTEX(VertExportIndex, VertexOutput);
|
|
}
|
|
|
|
#if NANITE_MESH_SHADER_TG_SIZE == 128
|
|
VertExportIndex += 128;
|
|
if (VertExportIndex < Cluster.NumVerts)
|
|
{
|
|
VSOut VertexOutput = CommonRasterizerVS(NaniteView, PrimitiveData, InstanceData, VisibleCluster, Cluster, LaneVertIndex + 128, 0u, bReverseWindingOrder);
|
|
MESH_SHADER_WRITE_VERTEX(VertExportIndex, VertexOutput);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#endif // MESHSHADER || WORKGRAPH_NODE
|
|
|
|
#else // NANITE_MESH_SHADER / NANITE_PRIM_SHADER
|
|
|
|
VSOut HWRasterizeVS(
|
|
uint VertexID : SV_VertexID,
|
|
uint VisibleIndex : SV_InstanceID
|
|
)
|
|
{
|
|
FTriRange TriRange = GetIndexAndTriRangeHW( VisibleIndex );
|
|
|
|
uint LocalTriIndex = VertexID / 3;
|
|
VertexID = VertexID - LocalTriIndex * 3;
|
|
|
|
VSOut Out;
|
|
#if !PIXELSHADER
|
|
Out.Position = float4(0,0,0,1);
|
|
#endif
|
|
|
|
FVisibleCluster VisibleCluster = GetVisibleCluster( VisibleIndex, VIRTUAL_TEXTURE_TARGET );
|
|
|
|
FPrimitiveSceneData PrimitiveData;
|
|
FInstanceSceneData InstanceData;
|
|
GetNaniteMaterialSceneData(VisibleCluster, PrimitiveData, InstanceData);
|
|
|
|
FNaniteView NaniteView = GetNaniteView( VisibleCluster.ViewId );
|
|
const bool bReverseWindingOrder = ReverseWindingOrder(NaniteView, PrimitiveData, InstanceData);
|
|
|
|
#if NANITE_VERTEX_PROGRAMMABLE
|
|
ResolvedView = ResolveView(NaniteView);
|
|
#endif
|
|
|
|
FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
|
if( TriRange.Num == 0 )
|
|
TriRange.Num = Cluster.NumTris;
|
|
|
|
BRANCH
|
|
if( LocalTriIndex < TriRange.Num )
|
|
{
|
|
const uint TriIndex = TriRange.Start + LocalTriIndex;
|
|
uint3 VertIndexes = DecodeTriangleIndices(Cluster, TriIndex);
|
|
if( bReverseWindingOrder )
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
|
|
const uint PixelValue = ((VisibleIndex + 1) << 7) | TriIndex;
|
|
Out = CommonRasterizerVS(NaniteView, PrimitiveData, InstanceData, VisibleCluster, Cluster, VertIndexes[VertexID], PixelValue, bReverseWindingOrder);
|
|
#if BARYCENTRIC_MODE_EXPORT
|
|
const uint VIndex = bReverseWindingOrder ? 2 : 1;
|
|
Out.BarycentricsUV = float2(VertexID == 0, VertexID == VIndex);
|
|
#endif
|
|
}
|
|
|
|
return Out;
|
|
}
|
|
|
|
#endif // NANITE_PRIM_SHADER
|
|
|
|
bool QuadActiveAnyTrue(bool Expr)
|
|
{
|
|
// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_QuadAny_QuadAll.html
|
|
// NOTE: From that blog post, it seems like this approach is somewhat blessed, but the docs for
|
|
// QuadReadAcrossX state that the result is undefined when reading an inactive lane.
|
|
// So it seems, according to the docs, this could potentially give false positives, but never false negatives.
|
|
|
|
// Helper lanes are defined to be active, so this should only ever be an issue if the lanes of
|
|
// a quad are made partially inactive by an earler branch. For platforms where the undefined value
|
|
// isn't just zero, this could result in false positives, which should still be safe
|
|
// in the context of how this is currently used.
|
|
|
|
const uint UIntExpr = (uint)Expr;
|
|
uint Result = UIntExpr;
|
|
Result |= QuadReadAcrossX(UIntExpr);
|
|
Result |= QuadReadAcrossY(UIntExpr);
|
|
Result |= QuadReadAcrossDiagonal(UIntExpr);
|
|
return Result != 0u;
|
|
}
|
|
|
|
void HWRasterizePS(VSOut In
|
|
#if NANITE_MESH_SHADER
|
|
, PrimitiveAttributesPacked PrimitivePacked
|
|
#endif
|
|
#if MATERIAL_TWOSIDED
|
|
, bool bFrontFace : SV_IsFrontFace
|
|
#endif
|
|
)
|
|
{
|
|
#if NANITE_HW_RASTER_INTERPOLATE_DEPTH
|
|
// Interpolating SV_Position attributes manually can be significantly faster than having the hardware set up the registers.
|
|
// Unfortunately, it has also shown to have precision problems on some hardware for extremely long and narrow trinagles.
|
|
|
|
// The compromise is to always use SV_Position for .xy, so it is guaranteed to always hit the right pixels,
|
|
// but interpolate depth for shadow rendering, which is usually the more HW raster heavy pass.
|
|
// For visibility buffer rendering the depth imprecision alone has shown to cause issues for extremely narrow triangles (UE-177564),
|
|
// so there SV_Position is also used for depth.
|
|
|
|
// TODO: Have the builder detect and fix the problematic cases, so we can always safely interpolate?
|
|
|
|
float4 SvPosition = float4(In.Position.xy, In.ClipZW.x / In.ClipZW.y, In.ClipZW.y);
|
|
#else
|
|
float4 SvPosition = In.Position;
|
|
#endif
|
|
|
|
uint2 PixelPos = (uint2)SvPosition.xy;
|
|
|
|
PrimitiveAttributes Primitive;
|
|
#if NANITE_MESH_SHADER
|
|
Primitive = UnpackPrimitiveAttributes(PrimitivePacked);
|
|
#else
|
|
Primitive = UnpackPrimitiveAttributes(In.PrimitivePacked);
|
|
#endif
|
|
|
|
uint PixelValue = Primitive.PixelValue;
|
|
|
|
#if VERTEX_TO_TRIANGLE_MASKS
|
|
#if NANITE_VERT_REUSE_BATCH
|
|
uint2 Mask_TriRangeStart = GetAttributeAtVertex0( In.ToTriangleMask_TriRangeStart );
|
|
uint Mask0 = Mask_TriRangeStart.x;
|
|
uint Mask1 = GetAttributeAtVertex1( In.ToTriangleMask_TriRangeStart ).x;
|
|
uint Mask2 = GetAttributeAtVertex2( In.ToTriangleMask_TriRangeStart ).x;
|
|
uint Mask = Mask0 & Mask1 & Mask2;
|
|
uint TriangleIndex = Mask_TriRangeStart.y + firstbitlow(Mask);
|
|
PixelValue += TriangleIndex;
|
|
#else
|
|
uint4 Masks0 = GetAttributeAtVertex0( In.ToTriangleMasks );
|
|
uint4 Masks1 = GetAttributeAtVertex1( In.ToTriangleMasks );
|
|
uint4 Masks2 = GetAttributeAtVertex2( In.ToTriangleMasks );
|
|
|
|
uint4 Masks = Masks0 & Masks1 & Masks2;
|
|
uint TriangleIndex = Masks.x ? firstbitlow( Masks.x ) :
|
|
Masks.y ? firstbitlow( Masks.y ) + 32 :
|
|
Masks.z ? firstbitlow( Masks.z ) + 64 :
|
|
firstbitlow( Masks.w ) + 96;
|
|
|
|
PixelValue += TriangleIndex;
|
|
#endif
|
|
#endif
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
PixelPos += Primitive.ViewRect.xy;
|
|
if (all(PixelPos < Primitive.ViewRect.zw))
|
|
#else
|
|
// In multi-view mode every view has its own scissor, so we have to scissor manually.
|
|
if( all( (PixelPos >= Primitive.ViewRect.xy) & (PixelPos < Primitive.ViewRect.zw) ) )
|
|
#endif
|
|
{
|
|
const uint ViewId = Primitive.ViewId;
|
|
const bool bSwapVW = Primitive.bSwapVW;
|
|
|
|
float MaterialMask = 1.0f;
|
|
|
|
FVisBufferPixel Pixel = CreateVisBufferPixel( PixelPos, PixelValue, SvPosition.z );
|
|
#if VISUALIZE
|
|
Pixel.VisualizeValues = GetVisualizeValues();
|
|
#endif
|
|
|
|
|
|
#if VIRTUAL_TEXTURE_TARGET
|
|
const uint MipLevel = Primitive.MipLevel;
|
|
const uint ArrayIndex = Primitive.ArrayIndex;
|
|
const uint LevelOffset = Primitive.LevelOffset;
|
|
|
|
if( !VirtualToPhysicalTexelForRendering( FVirtualSMLevelOffset::Unpack(LevelOffset), MipLevel, Pixel.Position, Pixel.PhysicalPosition.xy ) )
|
|
{
|
|
// Not committed or should not be rendered into
|
|
return;
|
|
}
|
|
|
|
Pixel.PhysicalPosition.z = ArrayIndex;
|
|
#endif
|
|
|
|
Pixel.WriteOverdraw();
|
|
|
|
#if ENABLE_EARLY_Z_TEST
|
|
BRANCH
|
|
if( !QuadActiveAnyTrue( Pixel.EarlyDepthTest() ) )
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
// Note: NANITE_PIXEL_PROGRAMMABLE is currently too conservative and PDO / Masking needs to be checked explicitly to remove unused code
|
|
// See ShouldCompileProgrammablePermutation in NaniteCullRaster.cpp
|
|
#if NANITE_PIXEL_PROGRAMMABLE && (WANT_PIXEL_DEPTH_OFFSET || MATERIALBLENDING_MASKED)
|
|
const FNaniteView NaniteView = GetNaniteView(ViewId);
|
|
|
|
ResolvedView = ResolveView(NaniteView);
|
|
|
|
const uint DepthInt = asuint(SvPosition.z);
|
|
const UlongType PackedPixel = PackUlongType(uint2(PixelValue, DepthInt));
|
|
|
|
FVertexFactoryInterpolantsVSToPS Interpolants = (FVertexFactoryInterpolantsVSToPS)0;
|
|
|
|
// Material parameter inputs
|
|
FBarycentrics Barycentrics = (FBarycentrics)0;
|
|
|
|
bool bCalcVertIndexes = true;
|
|
uint3 VertIndexes = 0;
|
|
#if BARYCENTRIC_MODE_INTRINSICS
|
|
const uint VertexID0 = GetAttributeAtVertex0(In.VertexID);
|
|
const uint VertexID1 = GetAttributeAtVertex1(In.VertexID);
|
|
const uint VertexID2 = GetAttributeAtVertex2(In.VertexID);
|
|
VertIndexes = uint3(VertexID0, VertexID1, VertexID2);
|
|
|
|
// Recover barycentrics from hardware ViVj:
|
|
// v = v0 + I (v1 - v0) + J (v2 - v0) = (1 - I - J) v0 + I v1 + J v2
|
|
const float2 ViVj = GetViVjPerspectiveCenter();
|
|
const float3 UVW = float3(1.0f - ViVj.x - ViVj.y, ViVj);
|
|
|
|
// The vertex order can be rotated during the rasterization process,
|
|
// so the original order needs to be recovered to make sense of the barycentrics.
|
|
|
|
// Fortunately, for compression purposes, triangle indices already have the form (base, base+a, base+b), where a,b>0.
|
|
// This turns out to be convenient as it allows us to recover the original vertex order by simply rotating
|
|
// the lowest vertex index into the first position. This saves an export compared to the usual provoking vertex trick
|
|
// that compares with an additional nointerpolation export.
|
|
const uint MinVertexID = min3(VertexID0, VertexID1, VertexID2);
|
|
|
|
Barycentrics.Value = (MinVertexID == VertexID1) ? UVW.yzx :
|
|
(MinVertexID == VertexID2) ? UVW.zxy :
|
|
UVW;
|
|
|
|
// As we already have the indices on hand, so we might as well use them instead of decoding them again from memory
|
|
VertIndexes = (MinVertexID == VertexID1) ? VertIndexes.yzx :
|
|
(MinVertexID == VertexID2) ? VertIndexes.zxy :
|
|
VertIndexes;
|
|
|
|
if (bSwapVW)
|
|
{
|
|
Barycentrics.Value.yz = Barycentrics.Value.zy;
|
|
VertIndexes.yz = VertIndexes.zy;
|
|
}
|
|
|
|
bCalcVertIndexes = false;
|
|
#elif BARYCENTRIC_MODE_SV_BARYCENTRICS && PIXELSHADER
|
|
Barycentrics.Value = In.Barycentrics;
|
|
if (bSwapVW)
|
|
{
|
|
Barycentrics.Value.yz = Barycentrics.Value.zy;
|
|
}
|
|
#elif BARYCENTRIC_MODE_EXPORT
|
|
Barycentrics.Value = float3(In.BarycentricsUV, 1.0f - In.BarycentricsUV.x - In.BarycentricsUV.y);
|
|
#endif
|
|
|
|
FMaterialPixelParameters MaterialParameters = FetchNaniteMaterialPixelParameters(NaniteView, PackedPixel, VIRTUAL_TEXTURE_TARGET, Barycentrics, false, VertIndexes, bCalcVertIndexes, Interpolants, SvPosition );
|
|
#if MATERIAL_TWOSIDED
|
|
MaterialParameters.TwoSidedSign = bFrontFace ? -1.0f : 1.0f;
|
|
#endif
|
|
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 0
|
|
MaterialParameters.TexCoords[0] = In.TexCoords.xy;
|
|
MaterialParameters.TexCoords_DDX[0] = ddx( In.TexCoords.xy );
|
|
MaterialParameters.TexCoords_DDY[0] = ddy( In.TexCoords.xy );
|
|
#endif
|
|
|
|
#if NUM_TEX_COORD_INTERPOLATORS > 1
|
|
MaterialParameters.TexCoords[1] = In.TexCoords.zw;
|
|
MaterialParameters.TexCoords_DDX[1] = ddx( In.TexCoords.zw );
|
|
MaterialParameters.TexCoords_DDY[1] = ddy( In.TexCoords.zw );
|
|
#endif
|
|
|
|
FPixelMaterialInputs PixelMaterialInputs;
|
|
#if USE_WORLD_POSITION_EXCLUDING_SHADER_OFFSETS
|
|
CalcMaterialParametersEx(MaterialParameters, PixelMaterialInputs, SvPosition, MaterialParameters.ScreenPosition, true, MaterialParameters.WorldPosition_CamRelative, MaterialParameters.WorldPosition_NoOffsets_CamRelative);
|
|
#else
|
|
CalcMaterialParameters(MaterialParameters, PixelMaterialInputs, SvPosition, true /*bIsFrontFace*/);
|
|
#endif
|
|
|
|
// NOTE: Disable PDO in shadow passes (it does undesirable things and has always been disabled in these passes in Unreal)
|
|
#if WANT_PIXEL_DEPTH_OFFSET && SHADOW_DEPTH_SHADER == 0
|
|
ApplyPixelDepthOffsetToMaterialParameters(MaterialParameters, PixelMaterialInputs, Pixel.Depth);
|
|
#endif
|
|
|
|
#if MATERIALBLENDING_MASKED
|
|
MaterialMask = GetMaterialMask(PixelMaterialInputs);
|
|
#endif
|
|
#endif // NANITE_PIXEL_PROGRAMMABLE && (WANT_PIXEL_DEPTH_OFFSET || MATERIALBLENDING_MASKED)
|
|
|
|
BRANCH
|
|
if (MaterialMask >= 0)
|
|
{
|
|
Pixel.Write();
|
|
}
|
|
}
|
|
}
|
|
|