// Copyright Epic Games, Inc. All Rights Reserved. #include "/Engine/Private/Common.ush" #include "/Engine/Private/MortonCode.ush" //#include "/Engine/Private/ShaderPrint.ush" #include "VirtualHeightfieldMesh.ush" groupshared uint NumGroupTasks; RWStructuredBuffer RWQueueInfo; RWCoherentBuffer(uint) RWQueueBuffer; uint QueueBufferSizeMask; RWBuffer RWQuadBuffer; Buffer QuadBuffer; RWBuffer RWIndirectArgsBuffer; Buffer IndirectArgsBufferSRV; RWStructuredBuffer RWInstanceBuffer; StructuredBuffer InstanceBuffer; RWBuffer RWFeedbackBuffer; Texture2D PageTableTexture; Texture2D HeightMinMaxTexture; Texture2D LodBiasMinMaxTexture; SamplerState MinMaxTextureSampler; int MinMaxLevelOffset; Texture2D OcclusionTexture; int OcclusionLevelOffset; uint MaxLevel; uint NumForceLoadLods; uint PageTableFeedbackId; uint NumPhysicalAddressBits; float4 PageTableSize; float4 PhysicalPageTransform; float4 LodDistances; float LodBiasScale; float3 ViewOrigin; float4 FrustumPlanes[5]; float4x4 UVToWorld; float3 UVToWorldScale; int NumIndices; /** Unpack the virtual level for a PhysicalAddress entry in the virtual texture page table. */ uint GetVirtualLevelFromPhysicalAddress(uint InPhysicalAddress) { // See packing in PageTableUpdate.usf return InPhysicalAddress & 0xf; } /** Compute physical UV from virtual UV in the tile with the given PhysicalAddress. */ float2 VirtualToPhysicalUV(float2 InVirtualUV, uint InPhysicalAddress, float4 InTransformFactors, uint InNumAddressBits) { // See packing in PageTableUpdate.usf float PageX = (float)((InPhysicalAddress >> 4) & ((1u << InNumAddressBits) - 1)); float PageY = (float)(InPhysicalAddress >> (4 + InNumAddressBits)); float UVScale = 1.f / (float)(1u << GetVirtualLevelFromPhysicalAddress(InPhysicalAddress)); float2 BaseUV = float2(PageX, PageY) * InTransformFactors.x; float2 PageUV = InVirtualUV * InTransformFactors.y; float2 BorderUV = InTransformFactors.z; float2 HalfTexelUV = InTransformFactors.w; return BaseUV + PageUV + BorderUV - HalfTexelUV; } /** Returns transform from virtual to physical UV in the tile with the given PhysicalAddress. Returns float3 where .xy is bias and .z is scale. */ float3 GetVirtualToPhysicalUVTransform(uint2 InPos, uint InLevel, uint InPhysicalAddress, float4 InTransformFactors, uint InNumAddressBits) { uint LodShift = (uint)max((int)GetVirtualLevelFromPhysicalAddress(InPhysicalAddress) - (int)InLevel, 0); float PosDivider = 1.f / (float)(1u << LodShift); float2 MinVirtualUV = frac((float2)InPos * PosDivider); float2 MaxVirtualUV = MinVirtualUV + PosDivider; float2 MinPhysicalUV = VirtualToPhysicalUV(MinVirtualUV, InPhysicalAddress, InTransformFactors, InNumAddressBits); float2 MaxPhysicalUV = VirtualToPhysicalUV(MaxVirtualUV, InPhysicalAddress, InTransformFactors, InNumAddressBits); return float3(MinPhysicalUV, MaxPhysicalUV.x - MinPhysicalUV.x); // Assume Max.y - Min.y == Max.x - Min.x } /** Return false if location is marked as occluded in the occlusion texture. */ bool OcclusionTest(uint2 InPos, int InLevel) { int OcclusionLevel = InLevel - OcclusionLevelOffset; if (OcclusionLevel < 0) { return true; } return OcclusionTexture.Load(uint3(InPos, OcclusionLevel)) == 0; } /** Unpack the values from the MinMaxHeight texture from the packed 8888 format. */ float2 UnPackMinMaxHeight(float4 InPacked) { uint4 PackedScaled = (uint4)floor(InPacked *= 255.f); uint2 UnPackedScaled = uint2(PackedScaled.x << 8 | PackedScaled.y, PackedScaled.z << 8 | PackedScaled.w); float2 UnPacked = (float2)UnPackedScaled / 65535.f; return UnPacked; } /** Unpack the values from the MinMaxLodBias texture from the packed 8888 format. */ float2 UnPackMinMaxLodBias(float4 InPacked, float InLodBiasScale) { return float2(CalculateBiasLod(InPacked.x, InLodBiasScale), CalculateBiasLod(InPacked.y, InLodBiasScale)); } /** Return false if the AABB is completely outside one of the planes. */ bool PlaneTestAABB(float4 InPlanes[5], float3 InCenter, float3 InExtent) { bool bPlaneTest = true; [unroll] for (uint PlaneIndex = 0; PlaneIndex < 5; ++PlaneIndex) { float3 PlaneSigns; PlaneSigns.x = InPlanes[PlaneIndex].x >= 0.f ? 1.f : -1.f; PlaneSigns.y = InPlanes[PlaneIndex].y >= 0.f ? 1.f : -1.f; PlaneSigns.z = InPlanes[PlaneIndex].z >= 0.f ? 1.f : -1.f; bool bInsidePlane = dot(InPlanes[PlaneIndex], float4(InCenter + InExtent * PlaneSigns, 1.0f)) > 0.f; bPlaneTest = bPlaneTest && bInsidePlane; } return bPlaneTest; } /* Return squared distance of closest distance between a point and a bounding box. */ float SquaredMinDistanceToAABB(float3 InPos, float3 InMin, float3 InMax, float3 InScale) { float3 D1 = max(InMin - InPos, 0) * InScale; float3 D2 = max(InPos - InMax, 0) * InScale; return dot(D1, D1) + dot(D2, D2); } /* Return squared distance of furthest distance between a point and a bounding box. */ float SquaredMaxDistanceToAABB(float3 InPos, float3 InMin, float3 InMax, float3 InScale) { float3 D = max(abs(InPos - InMin), (InPos - InMax)) * InScale; return dot(D, D); } /** Draw a bounding box using the ShaderDrawDebug system. */ void DebugDrawUVBox(float3 InUVMin, float3 InUVMax, float4x4 InTransform, float4 InColor) { #if 0 // Enable only if ShaderDrawDebug is enabled float3 WorldPos[8]; WorldPos[0] = mul(float4(InUVMin.x, InUVMin.y, InUVMin.z, 1), InTransform); WorldPos[1] = mul(float4(InUVMax.x, InUVMin.y, InUVMin.z, 1), InTransform); WorldPos[2] = mul(float4(InUVMin.x, InUVMax.y, InUVMin.z, 1), InTransform); WorldPos[3] = mul(float4(InUVMax.x, InUVMax.y, InUVMin.z, 1), InTransform); WorldPos[4] = mul(float4(InUVMin.x, InUVMin.y, InUVMax.z, 1), InTransform); WorldPos[5] = mul(float4(InUVMax.x, InUVMin.y, InUVMax.z, 1), InTransform); WorldPos[6] = mul(float4(InUVMin.x, InUVMax.y, InUVMax.z, 1), InTransform); WorldPos[7] = mul(float4(InUVMax.x, InUVMax.y, InUVMax.z, 1), InTransform); AddQuadWS(WorldPos[0], WorldPos[2], WorldPos[3], WorldPos[1], InColor); AddQuadWS(WorldPos[4], WorldPos[6], WorldPos[7], WorldPos[5], InColor); AddLineWS(WorldPos[0], WorldPos[4], InColor, InColor); AddLineWS(WorldPos[1], WorldPos[5], InColor, InColor); AddLineWS(WorldPos[2], WorldPos[6], InColor, InColor); AddLineWS(WorldPos[3], WorldPos[7], InColor, InColor); #endif } /** * Compute shader to initialize all buffers, including adding the lowest mip page(s) to the QuadBuffer. */ [numthreads(1, 1, 1)] void InitBuffersCS() { // Seed with one item in the queue. RWQueueInfo[0].Read = 0; RWQueueInfo[0].Write = 1; RWQueueInfo[0].NumActive = 1; RWQueueBuffer[0] = Pack(InitQuadItem(0, MaxLevel)); // CullInstances indirect args RWIndirectArgsBuffer[0] = 0; // Increment this wave counter during CollectQuadsCS. RWIndirectArgsBuffer[1] = 1; RWIndirectArgsBuffer[2] = 1; RWIndirectArgsBuffer[3] = 0; // Increment this instance counter counter during CollectQuadsCS. // Clear virtual texture feedback counter or prime feedback buffer with first n levels. uint FeedbackCount = 0; for (uint Level = 0; Level < NumForceLoadLods; ++Level) { for (uint Y = 0; Y < (1u << Level); ++Y) { for (uint X = 0; X < (1u << Level); ++X) { uint LevelPlusOne = 1 + MaxLevel - Level; uint Feedback = X | (Y << 12) | (LevelPlusOne << 24) | PageTableFeedbackId; RWFeedbackBuffer[++FeedbackCount] = Feedback; } } } RWFeedbackBuffer[0] = FeedbackCount; } /** * Compute shader to traverse the virtual texture page table and generate an array of items to potentially render for a view. */ #if COMPILER_SUPPORTS_WAVE_SIZE WAVESIZE(32) #endif [numthreads(64, 1, 1)] void CollectQuadsCS( uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupIndex : SV_GroupIndex ) { // Persistant threads stay alive until the work queue is drained. bool bExit = false; while (!bExit) { // Sync and init group task count. NumGroupTasks = 0; GroupMemoryBarrierWithGroupSync(); // Try and pull a task. int NumActive; InterlockedAdd(RWQueueInfo[0].NumActive, -1, NumActive); if (NumActive <= 0) { // No task pulled. Rewind. InterlockedAdd(RWQueueInfo[0].NumActive, 1, NumActive); } else { // Increment group task count for this loop. uint Dummy; InterlockedAdd(NumGroupTasks, 1, Dummy); // Read item to process from queue. uint Read; InterlockedAdd(RWQueueInfo[0].Read, 1, Read); uint PackedItem = RWQueueBuffer[Read & QueueBufferSizeMask]; QuadItem Item = UnpackQuadItem(PackedItem); uint Address = Item.Address; uint2 Pos = MortonDecode(Address); uint Level = Item.Level; // Check if occluded. bool bOcclude = !OcclusionTest(Pos, Level); // Get UV bounding box float2 Scale = (float)(1u << Level) * PageTableSize.zw; float2 UV0 = ((float2)Pos + float2(0, 0)) * Scale; float2 UV1 = ((float2)Pos + float2(1, 1)) * Scale; float MinMaxTextureLevel = max((float)Level + (float)MinMaxLevelOffset, 0); float2 MinMaxHeight = UnPackMinMaxHeight(HeightMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel)); float2 MinMaxLodBias = UnPackMinMaxLodBias(LodBiasMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel), LodBiasScale); float3 UVMin = float3(UV0, MinMaxHeight.x); float3 UVMax = float3(UV1, MinMaxHeight.y); float3 UVCenter = (UVMax + UVMin) * 0.5f; float3 UVExtent = UVMax - UVCenter; float MinDistanceSq = SquaredMinDistanceToAABB(ViewOrigin, UVMin, UVMax, UVToWorldScale); float MaxDistanceSq = SquaredMaxDistanceToAABB(ViewOrigin, UVMin, UVMax, UVToWorldScale); float MinDistanceLod = CalculateDistanceLod(MinDistanceSq, LodDistances); float MaxDistanceLod = CalculateDistanceLod(MaxDistanceSq, LodDistances); // Check if frustum culled bool bCull = !PlaneTestAABB(FrustumPlanes, UVCenter, UVExtent); bool bSubdivide = false; if (bCull || bOcclude) { // Store, but don't subdivide. DebugDrawUVBox(UVMin, UVMax, UVToWorld, float4(0, 0, 1, 1)); } else if (Level > 0) { // Subdivide if minimum continuous lod can be less than the current level. bSubdivide = MinDistanceLod - MinMaxLodBias.y < (float)Level; } if (bSubdivide) { // Add children to queue. uint Write; InterlockedAdd(RWQueueInfo[0].Write, 4, Write); RWQueueBuffer[(Write + 0) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 0, Level - 1)); RWQueueBuffer[(Write + 1) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 1, Level - 1)); RWQueueBuffer[(Write + 2) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 2, Level - 1)); RWQueueBuffer[(Write + 3) & QueueBufferSizeMask] = Pack(InitQuadItem(Address * 4 + 3, Level - 1)); InterlockedAdd(RWQueueInfo[0].NumActive, 4, NumActive); } else { // Add to output list. uint PhysicalAddress = PageTableTexture.Load(int3(Pos, Level)); uint Write; InterlockedAdd(RWIndirectArgsBuffer[3], 1, Write); InterlockedMax(RWIndirectArgsBuffer[0], ((Write + 1) + 63) / 64); RWQuadBuffer[Write] = Pack(InitQuadRenderItem(Pos, Level, PhysicalAddress, bCull || bOcclude)); // Add all possible pages that vertex shader might read to the virtual texture feedback buffer. int MinFeedbackLevel = (int)floor(clamp(MinDistanceLod - MinMaxLodBias.y, Level, MaxLevel)); int MaxFeedbackLevel = (int)ceil(clamp(MaxDistanceLod - MinMaxLodBias.x, Level, MaxLevel)); uint NumFeedbackItems = (uint)max(MaxFeedbackLevel - MinFeedbackLevel + 1, 0); uint FeedbackPos; InterlockedAdd(RWFeedbackBuffer[0], NumFeedbackItems, FeedbackPos); for (int FeedbackLevel = MinFeedbackLevel; FeedbackLevel <= MaxFeedbackLevel; ++FeedbackLevel) { // Note that our general virtual texture feedback buffer convention is to write Level+1 uint LevelPlusOne = FeedbackLevel + 1; uint LodShift = FeedbackLevel - Level; RWFeedbackBuffer[FeedbackPos + FeedbackLevel] = (Pos.x >> LodShift) | ((Pos.y >> LodShift) << 12) | (LevelPlusOne << 24) | PageTableFeedbackId; } // Debug draw the bounds. if (!(bCull || bOcclude)) { DebugDrawUVBox(UVMin, UVMax, UVToWorld, float4(1, 0, 0, 1)); } } } // Exit if no work was found. DeviceMemoryBarrier(); if (NumGroupTasks == 0) { bExit = true; } } } /** * Initialise the indirect args for the final culled indirect draw call. */ [numthreads(1, 1, 1)] void InitInstanceBufferCS() { RWIndirectArgsBuffer[0] = NumIndices; RWIndirectArgsBuffer[1] = 0; // Increment this counter during CullInstancesCS. RWIndirectArgsBuffer[2] = 0; RWIndirectArgsBuffer[3] = 0; RWIndirectArgsBuffer[4] = 0; } /** * Cull the potentially visible render items for a view and generate the final buffer of instances to render. */ [numthreads(64, 1, 1)] void CullInstancesCS( uint3 DispatchThreadId : SV_DispatchThreadID ) { uint QuadIndex = DispatchThreadId.x; if (QuadIndex >= IndirectArgsBufferSRV[3]) return; uint2 PackedItem = QuadBuffer[QuadIndex]; QuadRenderItem Item = UnpackQuadRenderItem(PackedItem); uint2 Pos = Item.Pos; uint Level = Item.Level; #if REUSE_CULL // Reuse main view culling flag set in the Collect pass. bool bCull = Item.bCull; #else // Cull against planes for this view. float2 Scale = (float)(1u << Level) * PageTableSize.zw; float2 UV0 = ((float2)Pos + float2(0, 0)) * Scale; float2 UV1 = ((float2)Pos + float2(1, 1)) * Scale; float MinMaxTextureLevel = max((float)Level + (float)MinMaxLevelOffset, 0); float2 MinMaxHeight = UnPackMinMaxHeight(HeightMinMaxTexture.SampleLevel(MinMaxTextureSampler, UV0, MinMaxTextureLevel)); float3 UVMin = float3(UV0, MinMaxHeight.x); float3 UVMax = float3(UV1, MinMaxHeight.y); float3 UVCenter = (UVMax + UVMin) * 0.5f; float3 UVExtent = UVMax - UVCenter; bool bCull = !PlaneTestAABB(FrustumPlanes, UVCenter, UVExtent); #endif if (!bCull) { // Add to final render intance list. QuadRenderInstance OutInstance; OutInstance.PosLevelPacked = Pos.x | (Pos.y << 12) | (Level << 24); // Unpack physical address for first sample into full LocalToPhysicalUV here ready for use by vertex shader. OutInstance.UVTransform.xyz = GetVirtualToPhysicalUVTransform(Pos, Level, Item.PhysicalAddress, PhysicalPageTransform, NumPhysicalAddressBits); uint Write; InterlockedAdd(RWIndirectArgsBuffer[1], 1, Write); RWInstanceBuffer[Write] = OutInstance; } }