Files
UnrealEngine/Engine/Plugins/Experimental/VirtualHeightfieldMesh/Shaders/Private/VirtualHeightfieldMesh.usf
2025-05-18 13:04:45 +08:00

417 lines
14 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "/Engine/Private/Common.ush"
#include "/Engine/Private/MortonCode.ush"
//#include "/Engine/Private/ShaderPrint.ush"
#include "VirtualHeightfieldMesh.ush"
groupshared uint NumGroupTasks;
RWStructuredBuffer<WorkerQueueInfo> RWQueueInfo;
RWCoherentBuffer(uint) RWQueueBuffer;
uint QueueBufferSizeMask;
RWBuffer<uint2> RWQuadBuffer;
Buffer<uint2> QuadBuffer;
RWBuffer<uint> RWIndirectArgsBuffer;
Buffer<uint> IndirectArgsBufferSRV;
RWStructuredBuffer<QuadRenderInstance> RWInstanceBuffer;
StructuredBuffer<QuadRenderInstance> InstanceBuffer;
RWBuffer<uint> RWFeedbackBuffer;
Texture2D<uint> PageTableTexture;
Texture2D<float4> HeightMinMaxTexture;
Texture2D<float4> LodBiasMinMaxTexture;
SamplerState MinMaxTextureSampler;
int MinMaxLevelOffset;
Texture2D<float> OcclusionTexture;
int OcclusionLevelOffset;
uint MaxLevel;
uint NumForceLoadLods;
uint PageTableFeedbackId;
uint NumPhysicalAddressBits;
float4 PageTableSize;
float4 PhysicalPageTransform;
float4 LodDistances;
float LodBiasScale;
float3 ViewOrigin;
float4 FrustumPlanes[5];
float4x4 UVToWorld;
float3 UVToWorldScale;
int NumIndices;
/** Unpack the virtual level for a PhysicalAddress entry in the virtual texture page table. */
uint GetVirtualLevelFromPhysicalAddress(uint InPhysicalAddress)
{
// See packing in PageTableUpdate.usf
return InPhysicalAddress & 0xf;
}
/** Compute physical UV from virtual UV in the tile with the given PhysicalAddress. */
float2 VirtualToPhysicalUV(float2 InVirtualUV, uint InPhysicalAddress, float4 InTransformFactors, uint InNumAddressBits)
{
// See packing in PageTableUpdate.usf
float PageX = (float)((InPhysicalAddress >> 4) & ((1u << InNumAddressBits) - 1));
float PageY = (float)(InPhysicalAddress >> (4 + InNumAddressBits));
float UVScale = 1.f / (float)(1u << GetVirtualLevelFromPhysicalAddress(InPhysicalAddress));
float2 BaseUV = float2(PageX, PageY) * InTransformFactors.x;
float2 PageUV = InVirtualUV * InTransformFactors.y;
float2 BorderUV = InTransformFactors.z;
float2 HalfTexelUV = InTransformFactors.w;
return BaseUV + PageUV + BorderUV - HalfTexelUV;
}
/** Returns transform from virtual to physical UV in the tile with the given PhysicalAddress. Returns float3 where .xy is bias and .z is scale. */
float3 GetVirtualToPhysicalUVTransform(uint2 InPos, uint InLevel, uint InPhysicalAddress, float4 InTransformFactors, uint InNumAddressBits)
{
uint LodShift = (uint)max((int)GetVirtualLevelFromPhysicalAddress(InPhysicalAddress) - (int)InLevel, 0);
float PosDivider = 1.f / (float)(1u << LodShift);
float2 MinVirtualUV = frac((float2)InPos * PosDivider);
float2 MaxVirtualUV = MinVirtualUV + PosDivider;
float2 MinPhysicalUV = VirtualToPhysicalUV(MinVirtualUV, InPhysicalAddress, InTransformFactors, InNumAddressBits);
float2 MaxPhysicalUV = VirtualToPhysicalUV(MaxVirtualUV, InPhysicalAddress, InTransformFactors, InNumAddressBits);
return float3(MinPhysicalUV, MaxPhysicalUV.x - MinPhysicalUV.x); // Assume Max.y - Min.y == Max.x - Min.x
}
/** Return false if location is marked as occluded in the occlusion texture. */
bool OcclusionTest(uint2 InPos, int InLevel)
{
int OcclusionLevel = InLevel - OcclusionLevelOffset;
if (OcclusionLevel < 0)
{
return true;
}
return OcclusionTexture.Load(uint3(InPos, OcclusionLevel)) == 0;
}
/** Unpack the values from the MinMaxHeight texture from the packed 8888 format. */
float2 UnPackMinMaxHeight(float4 InPacked)
{
uint4 PackedScaled = (uint4)floor(InPacked *= 255.f);
uint2 UnPackedScaled = uint2(PackedScaled.x << 8 | PackedScaled.y, PackedScaled.z << 8 | PackedScaled.w);
float2 UnPacked = (float2)UnPackedScaled / 65535.f;
return UnPacked;
}
/** Unpack the values from the MinMaxLodBias texture from the packed 8888 format. */
float2 UnPackMinMaxLodBias(float4 InPacked, float InLodBiasScale)
{
return float2(CalculateBiasLod(InPacked.x, InLodBiasScale), CalculateBiasLod(InPacked.y, InLodBiasScale));
}
/** Return false if the AABB is completely outside one of the planes. */
bool PlaneTestAABB(float4 InPlanes[5], float3 InCenter, float3 InExtent)
{
bool bPlaneTest = true;
[unroll]
for (uint PlaneIndex = 0; PlaneIndex < 5; ++PlaneIndex)
{
float3 PlaneSigns;
PlaneSigns.x = InPlanes[PlaneIndex].x >= 0.f ? 1.f : -1.f;
PlaneSigns.y = InPlanes[PlaneIndex].y >= 0.f ? 1.f : -1.f;
PlaneSigns.z = InPlanes[PlaneIndex].z >= 0.f ? 1.f : -1.f;
bool bInsidePlane = dot(InPlanes[PlaneIndex], float4(InCenter + InExtent * PlaneSigns, 1.0f)) > 0.f;
bPlaneTest = bPlaneTest && bInsidePlane;
}
return bPlaneTest;
}
/* Return squared distance of closest distance between a point and a bounding box. */
float SquaredMinDistanceToAABB(float3 InPos, float3 InMin, float3 InMax, float3 InScale)
{
float3 D1 = max(InMin - InPos, 0) * InScale;
float3 D2 = max(InPos - InMax, 0) * InScale;
return dot(D1, D1) + dot(D2, D2);
}
/* Return squared distance of furthest distance between a point and a bounding box. */
float SquaredMaxDistanceToAABB(float3 InPos, float3 InMin, float3 InMax, float3 InScale)
{
float3 D = max(abs(InPos - InMin), (InPos - InMax)) * InScale;
return dot(D, D);
}
/** Draw a bounding box using the ShaderDrawDebug system. */
void DebugDrawUVBox(float3 InUVMin, float3 InUVMax, float4x4 InTransform, float4 InColor)
{
#if 0 // Enable only if ShaderDrawDebug is enabled
float3 WorldPos[8];
WorldPos[0] = mul(float4(InUVMin.x, InUVMin.y, InUVMin.z, 1), InTransform);
WorldPos[1] = mul(float4(InUVMax.x, InUVMin.y, InUVMin.z, 1), InTransform);
WorldPos[2] = mul(float4(InUVMin.x, InUVMax.y, InUVMin.z, 1), InTransform);
WorldPos[3] = mul(float4(InUVMax.x, InUVMax.y, InUVMin.z, 1), InTransform);
WorldPos[4] = mul(float4(InUVMin.x, InUVMin.y, InUVMax.z, 1), InTransform);
WorldPos[5] = mul(float4(InUVMax.x, InUVMin.y, InUVMax.z, 1), InTransform);
WorldPos[6] = mul(float4(InUVMin.x, InUVMax.y, InUVMax.z, 1), InTransform);
WorldPos[7] = mul(float4(InUVMax.x, InUVMax.y, InUVMax.z, 1), InTransform);
AddQuadWS(WorldPos[0], WorldPos[2], WorldPos[3], WorldPos[1], InColor);
AddQuadWS(WorldPos[4], WorldPos[6], WorldPos[7], WorldPos[5], InColor);
AddLineWS(WorldPos[0], WorldPos[4], InColor, InColor);
AddLineWS(WorldPos[1], WorldPos[5], InColor, InColor);
AddLineWS(WorldPos[2], WorldPos[6], InColor, InColor);
AddLineWS(WorldPos[3], WorldPos[7], InColor, InColor);
#endif
}
/**
* Compute shader to initialize all buffers, including adding the lowest mip page(s) to the QuadBuffer.
*/
[numthreads(1, 1, 1)]
void InitBuffersCS()
{
// Seed with one item in the queue.
RWQueueInfo[0].Read = 0;
RWQueueInfo[0].Write = 1;
RWQueueInfo[0].NumActive = 1;
RWQueueBuffer[0] = Pack(InitQuadItem(0, MaxLevel));
// CullInstances indirect args
RWIndirectArgsBuffer[0] = 0; // Increment this wave counter during CollectQuadsCS.
RWIndirectArgsBuffer[1] = 1;
RWIndirectArgsBuffer[2] = 1;
RWIndirectArgsBuffer[3] = 0; // Increment this instance counter counter during CollectQuadsCS.
// Clear virtual texture feedback counter or prime feedback buffer with first n levels.
uint FeedbackCount = 0;
for (uint Level = 0; Level < NumForceLoadLods; ++Level)
{
for (uint Y = 0; Y < (1u << Level); ++Y)
{
for (uint X = 0; X < (1u << Level); ++X)
{
uint LevelPlusOne = 1 + MaxLevel - Level;
uint Feedback = X | (Y << 12) | (LevelPlusOne << 24) | PageTableFeedbackId;
RWFeedbackBuffer[++FeedbackCount] = Feedback;
}
}
}
RWFeedbackBuffer[0] = FeedbackCount;
}
/**
* Compute shader to traverse the virtual texture page table and generate an array of items to potentially render for a view.
*/
#if COMPILER_SUPPORTS_WAVE_SIZE
WAVESIZE(32)
#endif
[numthreads(64, 1, 1)]
void CollectQuadsCS(
uint3 DispatchThreadId : SV_DispatchThreadID,
uint GroupIndex : SV_GroupIndex )
{
// Persistant threads stay alive until the work queue is drained.
bool bExit = false;
while (!bExit)
{
// Sync and init group task count.
NumGroupTasks = 0;
GroupMemoryBarrierWithGroupSync();
// Try and pull a task.
int NumActive;
InterlockedAdd(RWQueueInfo[0].NumActive, -1, NumActive);
if (NumActive <= 0)
{
// No task pulled. Rewind.
InterlockedAdd(RWQueueInfo[0].NumActive, 1, NumActive);
}
else
{
// Increment group task count for this loop.
uint Dummy;
InterlockedAdd(NumGroupTasks, 1, Dummy);
// Read item to process from queue.
uint Read;
InterlockedAdd(RWQueueInfo[0].Read, 1, Read);
uint PackedItem = RWQueueBuffer[Read & QueueBufferSizeMask];
QuadItem Item = UnpackQuadItem(PackedItem);
uint Address = Item.Address;
uint2 Pos = MortonDecode(Address);
uint Level = Item.Level;
// Check if occluded.
bool bOcclude = !OcclusionTest(Pos, Level);
// Get UV bounding box
float2 Scale = (float)(1u << Level) * PageTableSize.zw;
float2 UV0 = ((float2)Pos + float2(0, 0)) * Scale;
float2 UV1 = ((float2)Pos + float2(1, 1)) * Scale;
float MinMaxTextureLevel = max((float)Level + (float)MinMaxLevelOffset, 0);
float2 MinMaxHeight = UnPackMinMaxHeight(HeightMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel));
float2 MinMaxLodBias = UnPackMinMaxLodBias(LodBiasMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel), LodBiasScale);
float3 UVMin = float3(UV0, MinMaxHeight.x);
float3 UVMax = float3(UV1, MinMaxHeight.y);
float3 UVCenter = (UVMax + UVMin) * 0.5f;
float3 UVExtent = UVMax - UVCenter;
float MinDistanceSq = SquaredMinDistanceToAABB(ViewOrigin, UVMin, UVMax, UVToWorldScale);
float MaxDistanceSq = SquaredMaxDistanceToAABB(ViewOrigin, UVMin, UVMax, UVToWorldScale);
float MinDistanceLod = CalculateDistanceLod(MinDistanceSq, LodDistances);
float MaxDistanceLod = CalculateDistanceLod(MaxDistanceSq, LodDistances);
// Check if frustum culled
bool bCull = !PlaneTestAABB(FrustumPlanes, UVCenter, UVExtent);
bool bSubdivide = false;
if (bCull || bOcclude)
{
// Store, but don't subdivide.
DebugDrawUVBox(UVMin, UVMax, UVToWorld, float4(0, 0, 1, 1));
}
else if (Level > 0)
{
// Subdivide if minimum continuous lod can be less than the current level.
bSubdivide = MinDistanceLod - MinMaxLodBias.y < (float)Level;
}
if (bSubdivide)
{
// Add children to queue.
uint Write;
InterlockedAdd(RWQueueInfo[0].Write, 4, Write);
RWQueueBuffer[(Write + 0) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 0, Level - 1));
RWQueueBuffer[(Write + 1) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 1, Level - 1));
RWQueueBuffer[(Write + 2) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 2, Level - 1));
RWQueueBuffer[(Write + 3) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 3, Level - 1));
InterlockedAdd(RWQueueInfo[0].NumActive, 4, NumActive);
}
else
{
// Add to output list.
uint PhysicalAddress = PageTableTexture.Load(int3(Pos, Level));
uint Write;
InterlockedAdd(RWIndirectArgsBuffer[3], 1, Write);
InterlockedMax(RWIndirectArgsBuffer[0], ((Write + 1) + 63) / 64);
RWQuadBuffer[Write] = Pack(InitQuadRenderItem(Pos, Level, PhysicalAddress, bCull || bOcclude));
// Add all possible pages that vertex shader might read to the virtual texture feedback buffer.
int MinFeedbackLevel = (int)floor(clamp(MinDistanceLod - MinMaxLodBias.y, Level, MaxLevel));
int MaxFeedbackLevel = (int)ceil(clamp(MaxDistanceLod - MinMaxLodBias.x, Level, MaxLevel));
uint NumFeedbackItems = (uint)max(MaxFeedbackLevel - MinFeedbackLevel + 1, 0);
uint FeedbackPos;
InterlockedAdd(RWFeedbackBuffer[0], NumFeedbackItems, FeedbackPos);
for (int FeedbackLevel = MinFeedbackLevel; FeedbackLevel <= MaxFeedbackLevel; ++FeedbackLevel)
{
// Note that our general virtual texture feedback buffer convention is to write Level+1
uint LevelPlusOne = FeedbackLevel + 1;
uint LodShift = FeedbackLevel - Level;
RWFeedbackBuffer[FeedbackPos + FeedbackLevel] = (Pos.x >> LodShift) | ((Pos.y >> LodShift) << 12) | (LevelPlusOne << 24) | PageTableFeedbackId;
}
// Debug draw the bounds.
if (!(bCull || bOcclude))
{
DebugDrawUVBox(UVMin, UVMax, UVToWorld, float4(1, 0, 0, 1));
}
}
}
// Exit if no work was found.
DeviceMemoryBarrier();
if (NumGroupTasks == 0)
{
bExit = true;
}
}
}
/**
* Initialise the indirect args for the final culled indirect draw call.
*/
[numthreads(1, 1, 1)]
void InitInstanceBufferCS()
{
RWIndirectArgsBuffer[0] = NumIndices;
RWIndirectArgsBuffer[1] = 0; // Increment this counter during CullInstancesCS.
RWIndirectArgsBuffer[2] = 0;
RWIndirectArgsBuffer[3] = 0;
RWIndirectArgsBuffer[4] = 0;
}
/**
* Cull the potentially visible render items for a view and generate the final buffer of instances to render.
*/
[numthreads(64, 1, 1)]
void CullInstancesCS( uint3 DispatchThreadId : SV_DispatchThreadID )
{
uint QuadIndex = DispatchThreadId.x;
if (QuadIndex >= IndirectArgsBufferSRV[3])
return;
uint2 PackedItem = QuadBuffer[QuadIndex];
QuadRenderItem Item = UnpackQuadRenderItem(PackedItem);
uint2 Pos = Item.Pos;
uint Level = Item.Level;
#if REUSE_CULL
// Reuse main view culling flag set in the Collect pass.
bool bCull = Item.bCull;
#else
// Cull against planes for this view.
float2 Scale = (float)(1u << Level) * PageTableSize.zw;
float2 UV0 = ((float2)Pos + float2(0, 0)) * Scale;
float2 UV1 = ((float2)Pos + float2(1, 1)) * Scale;
float MinMaxTextureLevel = max((float)Level + (float)MinMaxLevelOffset, 0);
float2 MinMaxHeight = UnPackMinMaxHeight(HeightMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel));
float3 UVMin = float3(UV0, MinMaxHeight.x);
float3 UVMax = float3(UV1, MinMaxHeight.y);
float3 UVCenter = (UVMax + UVMin) * 0.5f;
float3 UVExtent = UVMax - UVCenter;
bool bCull = !PlaneTestAABB(FrustumPlanes, UVCenter, UVExtent);
#endif
if (!bCull)
{
// Add to final render intance list.
QuadRenderInstance OutInstance;
OutInstance.PosLevelPacked = Pos.x | (Pos.y << 12) | (Level << 24);
// Unpack physical address for first sample into full LocalToPhysicalUV here ready for use by vertex shader.
OutInstance.UVTransform.xyz = GetVirtualToPhysicalUVTransform(Pos, Level, Item.PhysicalAddress, PhysicalPageTransform, NumPhysicalAddressBits);
uint Write;
InterlockedAdd(RWIndirectArgsBuffer[1], 1, Write);
RWInstanceBuffer[Write] = OutInstance;
}
}