908 lines
30 KiB
HLSL
908 lines
30 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "/Engine/Private/Common.ush"
|
|
#include "/Engine/Private/PackUnpack.ush"
|
|
#include "WaterQuadTreeCommon.ush"
|
|
|
|
uint PackNodeCoord(uint4 InNode)
|
|
{
|
|
uint Result = 0;
|
|
Result |= uint(InNode.x & 0x7FFu);
|
|
Result |= uint(InNode.y & 0x7FFu) << 11u;
|
|
Result |= uint(InNode.z & 0x1Fu) << 22u;
|
|
Result |= uint(InNode.w & 0x1Fu) << 27u;
|
|
return Result;
|
|
}
|
|
|
|
uint4 UnpackNodeCoord(uint InPacked)
|
|
{
|
|
uint4 Result;
|
|
Result.x = InPacked & 0x7FFu;
|
|
Result.y = (InPacked >> 11u) & 0x7FFu;
|
|
Result.z = (InPacked >> 22u) & 0x1Fu;
|
|
Result.w = (InPacked >> 27u) & 0x1Fu;
|
|
return Result;
|
|
}
|
|
|
|
struct FWaterLODParams
|
|
{
|
|
int LowestLOD;
|
|
float HeightLODFactor;
|
|
};
|
|
|
|
FWaterLODParams GetWaterLODParams(float InObserverHeight, float InWaterHeightForLOD, float InLODScale, uint InTreeDepth)
|
|
{
|
|
float DistToWater = abs(InObserverHeight - InWaterHeightForLOD) / InLODScale;
|
|
DistToWater = max(DistToWater - 2.0f, 0.0f);
|
|
DistToWater *= 2.0f;
|
|
|
|
const float FloatLOD = clamp(log2(DistToWater), 0.0f, InTreeDepth - 1.0f);
|
|
|
|
FWaterLODParams WaterLODParams;
|
|
WaterLODParams.LowestLOD = clamp(floor(FloatLOD), 0, InTreeDepth - 1);
|
|
WaterLODParams.HeightLODFactor = frac(FloatLOD);
|
|
|
|
return WaterLODParams;
|
|
}
|
|
|
|
float4 GetNodeAABB2D(uint3 InNodeCoord, float3 InQuadTreePosition, float InLeafSize)
|
|
{
|
|
const float Scale = (1u << InNodeCoord.z) * InLeafSize;
|
|
return float4(InNodeCoord.xy, InNodeCoord.xy + 1.0f) * Scale + InQuadTreePosition.xyxy;
|
|
}
|
|
|
|
#ifdef INITIALIZE_INDIRECT_ARGS
|
|
|
|
#ifndef PRECISE_OCCLUSION_QUERIES
|
|
#define PRECISE_OCCLUSION_QUERIES 0
|
|
#endif
|
|
|
|
RWBuffer<uint> IndirectArgs;
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
RWBuffer<uint> OcclusionQueryArgs;
|
|
#endif
|
|
uint NumDrawBuckets;
|
|
uint NumViews;
|
|
uint NumQuads;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint BucketIndex = DispatchThreadId.x;
|
|
|
|
if (BucketIndex >= NumDrawBuckets)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const uint IndexCountPerInstance = NumQuads * NumQuads * 6;
|
|
|
|
for (uint ViewIndex = 0; ViewIndex < NumViews; ++ViewIndex)
|
|
{
|
|
const uint IndirectArgIndex = NumDrawBuckets * ViewIndex + BucketIndex;
|
|
IndirectArgs[IndirectArgIndex * 5 + 0] = IndexCountPerInstance;
|
|
IndirectArgs[IndirectArgIndex * 5 + 1] = 0; // InstanceCount
|
|
IndirectArgs[IndirectArgIndex * 5 + 2] = 0; // StartIndexLocation
|
|
IndirectArgs[IndirectArgIndex * 5 + 3] = 0; // BaseVertexLocation
|
|
IndirectArgs[IndirectArgIndex * 5 + 4] = 0; // StartInstanceLocation
|
|
}
|
|
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
if (all(DispatchThreadId == 0))
|
|
{
|
|
for (uint ViewIndex = 0; ViewIndex < NumViews; ++ViewIndex)
|
|
{
|
|
OcclusionQueryArgs[ViewIndex * 5 + 0] = 36;// IndexCountPerInstance; 12 triangles per cube
|
|
OcclusionQueryArgs[ViewIndex * 5 + 1] = 0; // InstanceCount
|
|
OcclusionQueryArgs[ViewIndex * 5 + 2] = 0; // StartIndexLocation
|
|
OcclusionQueryArgs[ViewIndex * 5 + 3] = 0; // BaseVertexLocation
|
|
OcclusionQueryArgs[ViewIndex * 5 + 4] = 0; // StartInstanceLocation
|
|
}
|
|
}
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
}
|
|
|
|
#endif // INITIALIZE_INDIRECT_ARGS
|
|
|
|
#ifdef CLEAR_PER_VIEW_BUFFERS
|
|
|
|
RWByteAddressBuffer BucketCounts;
|
|
RWByteAddressBuffer PackedNodes;
|
|
uint NumDrawBuckets;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
if (all(DispatchThreadId == 0))
|
|
{
|
|
// PackedNodes stores a counter at index 0 which we need to clear.
|
|
PackedNodes.Store(0, 0);
|
|
}
|
|
|
|
if (DispatchThreadId.x < NumDrawBuckets)
|
|
{
|
|
BucketCounts.Store(DispatchThreadId.x << 2u, 0);
|
|
}
|
|
}
|
|
|
|
#endif // CLEAR_PER_VIEW_BUFFERS
|
|
|
|
#ifdef QUAD_TREE_TRAVERSE
|
|
|
|
#include "/Engine/Private/Nanite/NaniteHZBCull.ush" // BoxCullFrustum()
|
|
|
|
#ifndef PRECISE_OCCLUSION_QUERIES
|
|
#define PRECISE_OCCLUSION_QUERIES 0
|
|
#endif
|
|
|
|
RWByteAddressBuffer PackedNodes;
|
|
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
RWBuffer<float4> OcclusionQueryBoxes;
|
|
RWBuffer<uint> OcclusionVisibility;
|
|
RWBuffer<uint> OcclusionQueryArgs;
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
|
|
Texture2D<float4> QuadTreeTexture;
|
|
Texture2D<float4> WaterZBoundsTexture;
|
|
StructuredBuffer<FWaterBodyRenderData> WaterBodyRenderData;
|
|
float4 CullingBoundsAABB;
|
|
float3 QuadTreePosition;
|
|
float3 ObserverPosition;
|
|
uint QuadTreeResolutionX;
|
|
uint QuadTreeResolutionY;
|
|
uint ViewIndex;
|
|
float LeafSize;
|
|
float LODScale;
|
|
float CaptureDepthRange;
|
|
int ForceCollapseDensityLevel;
|
|
uint NumLODs;
|
|
uint NumDispatchedThreads;
|
|
uint bHZBOcclusionCullingEnabled;
|
|
|
|
float ComputeSquaredDistanceToPoint(float2 InBox2DMin, float2 InBox2DMax, float2 InPoint)
|
|
{
|
|
// Accumulates the distance as we iterate axis
|
|
float DistSquared = 0.0f;
|
|
|
|
if (InPoint.x < InBox2DMin.x)
|
|
{
|
|
DistSquared += Square(InPoint.x - InBox2DMin.x);
|
|
}
|
|
else if (InPoint.x > InBox2DMax.x)
|
|
{
|
|
DistSquared += Square(InPoint.x - InBox2DMax.x);
|
|
}
|
|
|
|
if (InPoint.y < InBox2DMin.y)
|
|
{
|
|
DistSquared += Square(InPoint.y - InBox2DMin.y);
|
|
}
|
|
else if (InPoint.y > InBox2DMax.y)
|
|
{
|
|
DistSquared += Square(InPoint.y - InBox2DMax.y);
|
|
}
|
|
|
|
return DistSquared;
|
|
}
|
|
|
|
float GetLODDistance(int InLODLevel, float InLODScale)
|
|
{
|
|
return pow(2.0f, (float)(InLODLevel + 1)) * InLODScale;
|
|
}
|
|
|
|
bool CanRender(FWaterQuadTreeNode Node, int InDensityLevel, int InForceCollapseDensityLevel)
|
|
{
|
|
int MaterialIndex = -1;
|
|
// There is a dummy entry at index 0 but we know that it has no valid MaterialIndex, so we can skip this case
|
|
if (Node.WaterBodyRenderDataIndex > 0)
|
|
{
|
|
const FWaterBodyRenderData WBRenderData = WaterBodyRenderData[Node.WaterBodyRenderDataIndex];
|
|
MaterialIndex = WBRenderData.MaterialIndex;
|
|
}
|
|
// Can render if the density level is (in addition to same water bodies in all descendants) either above the force collapse level or if the subtree is complete
|
|
return MaterialIndex >= 0 && Node.bIsSubtreeSameWaterBody && ((InDensityLevel > InForceCollapseDensityLevel) || Node.bHasCompleteSubtree);
|
|
}
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
if (DispatchThreadId.x >= NumDispatchedThreads)
|
|
{
|
|
return;
|
|
}
|
|
|
|
ResolvedView = ResolveView();
|
|
|
|
// Compute which LOD level this thread belongs to and then find the node index relative to that LOD level
|
|
uint3 NodeCoord = 0;
|
|
{
|
|
uint LODLevelIndex = 0;
|
|
uint2 Resolution = uint2(QuadTreeResolutionX, QuadTreeResolutionY);
|
|
uint NumPreviousNodes = 0;
|
|
while (LODLevelIndex < NumLODs && DispatchThreadId.x >= (Resolution.x * Resolution.y + NumPreviousNodes))
|
|
{
|
|
NumPreviousNodes += Resolution.x * Resolution.y;
|
|
Resolution = uint2(max(1u, Resolution.x / 2), max(1u, Resolution.y / 2));
|
|
++LODLevelIndex;
|
|
}
|
|
|
|
const uint LocalNodeIndex = DispatchThreadId.x - NumPreviousNodes;
|
|
NodeCoord = uint3(LocalNodeIndex % Resolution.x, LocalNodeIndex / Resolution.x, LODLevelIndex);
|
|
}
|
|
|
|
const uint LODLevel = NodeCoord.z;
|
|
const float4 NodeAABB2D = GetNodeAABB2D(NodeCoord, QuadTreePosition, LeafSize);
|
|
|
|
// Bounds culling
|
|
{
|
|
const float4 CullingBoundsAABBTWS = CullingBoundsAABB + QuadTreePosition.xyxy;
|
|
if (any(NodeAABB2D.zw < CullingBoundsAABBTWS.xy) || any(NodeAABB2D.xy > CullingBoundsAABBTWS.zw))
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Frustum and HZB occlusion culling
|
|
float2 WaterZBounds = 0.0f;
|
|
bool bCrossesNearPlane = false;
|
|
{
|
|
WaterZBounds = WaterZBoundsTexture.Load(NodeCoord).xy * CaptureDepthRange + QuadTreePosition.z;
|
|
const float3 NodeAABBMin = float3(NodeAABB2D.xy, WaterZBounds.x);
|
|
const float3 NodeAABBMax = float3(NodeAABB2D.zw, WaterZBounds.y);
|
|
const float3 NodeCenter = (NodeAABBMin + NodeAABBMax) * 0.5f;
|
|
const float3 NodeExtent = float3((NodeAABBMax.xy - NodeAABBMin.xy) * 0.5f, (NodeAABBMax.z - NodeAABBMin.z) * 0.5f + 1e-5f);
|
|
|
|
const bool bIsOrtho = IsOrthoProjection(ResolvedView.ViewToClip);
|
|
FFrustumCullData Cull = BoxCullFrustum(NodeCenter, NodeExtent, ResolvedView.TranslatedWorldToClip, ResolvedView.ViewToClip, bIsOrtho, true /* near clip */, false /* skip culling */);
|
|
|
|
if (bHZBOcclusionCullingEnabled && !Cull.bCrossesNearPlane)
|
|
{
|
|
const FScreenRect Rect = GetScreenRect(int4(0, 0, HZBViewSize), Cull, 4);
|
|
Cull.bIsVisible = IsVisibleHZB(Rect, true);
|
|
}
|
|
|
|
if (!Cull.bIsVisible)
|
|
{
|
|
return;
|
|
}
|
|
|
|
bCrossesNearPlane = Cull.bCrossesNearPlane;
|
|
}
|
|
|
|
// Compute the 2D distance between the observer and the tile. Will be zero if the observer is within the tile
|
|
const float ClosestDistanceToTile = sqrt(ComputeSquaredDistanceToPoint(NodeAABB2D.xy, NodeAABB2D.zw, ObserverPosition.xy));
|
|
|
|
// Compute the lowest LOD we want to render, based on the observer height above the water surface
|
|
const float3 ObserverPos = ObserverPosition - QuadTreePosition;
|
|
const int2 TextureLoadCoord = (int2)clamp(ObserverPos.xy / LeafSize, 0.0f, float2(QuadTreeResolutionX, QuadTreeResolutionY) - 0.5f);
|
|
const float WaterHeightForLOD = WaterZBoundsTexture.Load(int3(TextureLoadCoord, 0)).z * CaptureDepthRange;
|
|
const FWaterLODParams WaterLODParams = GetWaterLODParams(ObserverPos.z, WaterHeightForLOD, LODScale, NumLODs - 1);
|
|
|
|
uint DensityLevel = 0;
|
|
bool bShouldRender = false;
|
|
|
|
// We might need to draw this tile even if it is outside the LOD range for tiles in this level of the quadtree.
|
|
// This is the case if tiles at a higher LOD can't be rendered, so we need to emulate a higher LOD tile with
|
|
// multiple lower LOD tiles, but using a lower density/tessellation.
|
|
const bool bOutsideLODRange = ClosestDistanceToTile > GetLODDistance(LODLevel, LODScale) || LODLevel < WaterLODParams.LowestLOD;
|
|
if ((LODLevel < (NumLODs - 1)) && bOutsideLODRange)
|
|
{
|
|
// Does the parent tile intersect the LOD range of tiles at this LOD?
|
|
uint3 ParentNodeCoord = uint3(NodeCoord.xy / 2, NodeCoord.z + 1);
|
|
float4 ParentNodeAABB2D = GetNodeAABB2D(ParentNodeCoord, QuadTreePosition, LeafSize);
|
|
const float ClosestDistanceToParentTile = sqrt(ComputeSquaredDistanceToPoint(ParentNodeAABB2D.xy, ParentNodeAABB2D.zw, ObserverPosition.xy));
|
|
const bool bParentTileIntersectsThisLOD = ClosestDistanceToParentTile <= GetLODDistance(LODLevel, LODScale) && LODLevel >= WaterLODParams.LowestLOD;
|
|
|
|
// Is the parent tile renderable?
|
|
const FWaterQuadTreeNode ParentNode = WaterQuadTreeUnpackNodeRGBA8(QuadTreeTexture.Load(ParentNodeCoord));
|
|
const bool bParentTileCanRender = CanRender(ParentNode, DensityLevel, ForceCollapseDensityLevel);
|
|
|
|
// If the parent tile intersects this LOD, we don't render the parent tile and instead render its children.
|
|
// If the parent tile can't render at all, then we need to emulate it by rendering its children at the highest renderable LOD.
|
|
if (bParentTileIntersectsThisLOD || !bParentTileCanRender)
|
|
{
|
|
// In both cases we need to compute the density we need to use to make it appear like we rendered the higher LOD tile.
|
|
for (uint ParentLOD = LODLevel + 1; ParentLOD < NumLODs; ++ParentLOD)
|
|
{
|
|
DensityLevel = ParentLOD - LODLevel;
|
|
|
|
ParentNodeCoord = uint3(NodeCoord.xy >> (ParentLOD - LODLevel), ParentLOD);
|
|
ParentNodeAABB2D = GetNodeAABB2D(ParentNodeCoord, QuadTreePosition, LeafSize);
|
|
|
|
const float Dist = sqrt(ComputeSquaredDistanceToPoint(ParentNodeAABB2D.xy, ParentNodeAABB2D.zw, ObserverPosition.xy));
|
|
if (Dist <= GetLODDistance(ParentLOD, LODScale) && ParentLOD >= WaterLODParams.LowestLOD)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
bShouldRender = true;
|
|
}
|
|
}
|
|
// The tile is fully within its LOD range and does not intersect the LOD range of its children (Distance <= GetLODDistance(LODLevel) && Distance > GetLODDistance(LODLevel - 1)),
|
|
// so we can render it as is.
|
|
else if (LODLevel == WaterLODParams.LowestLOD || ClosestDistanceToTile > GetLODDistance(LODLevel - 1, LODScale))
|
|
{
|
|
bShouldRender = true;
|
|
}
|
|
|
|
if (bShouldRender)
|
|
{
|
|
const FWaterQuadTreeNode Node = WaterQuadTreeUnpackNodeRGBA8(QuadTreeTexture.Load(NodeCoord));
|
|
if (CanRender(Node, DensityLevel, ForceCollapseDensityLevel))
|
|
{
|
|
uint WritePos = 0;
|
|
PackedNodes.InterlockedAdd(0, 1, WritePos);
|
|
PackedNodes.Store((WritePos + 1) << 2u, PackNodeCoord(uint4(NodeCoord, DensityLevel))); // Num is stored at index 0, so we offset all writes by 1
|
|
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
// Increase the InstanceCount by 1.
|
|
uint Dummy = 0;
|
|
InterlockedAdd(OcclusionQueryArgs[ViewIndex * 5 + 1], 1, Dummy);
|
|
|
|
// Write out bounding box and store a flag indicating if the box crosses the near plane.
|
|
const float3 NodeAABBMin = float3(NodeAABB2D.xy, WaterZBounds.x);
|
|
const float3 NodeAABBMax = float3(NodeAABB2D.zw, WaterZBounds.y);
|
|
const float3 NodeCenter = (NodeAABBMin + NodeAABBMax) * 0.5f;
|
|
const float3 NodeExtent = float3((NodeAABBMax.xy - NodeAABBMin.xy) * 0.5f, (NodeAABBMax.z - NodeAABBMin.z) * 0.5f + 1e-5f);
|
|
OcclusionQueryBoxes[WritePos * 2 + 0] = float4(NodeCenter, bCrossesNearPlane ? 1.0f : 0.0f);
|
|
OcclusionQueryBoxes[WritePos * 2 + 1] = float4(NodeExtent, 0.0f);
|
|
|
|
// Set bounding boxes crossing the near plane as always visible and initialize to 0 otherwise.
|
|
OcclusionVisibility[WritePos] = bCrossesNearPlane ? 1 : 0;
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // QUAD_TREE_TRAVERSE
|
|
|
|
#ifdef OCCLUSION_QUERY_RASTER_VS
|
|
|
|
Buffer<float4> OcclusionQueryBoxes;
|
|
|
|
void MainVS(
|
|
in float3 InPosition : ATTRIBUTE0,
|
|
in uint InInstanceId : SV_InstanceID,
|
|
out nointerpolation uint OutQueryIndex : QUERY_INDEX,
|
|
out float4 OutPosition : SV_Position
|
|
)
|
|
{
|
|
ResolvedView = ResolveView();
|
|
|
|
OutQueryIndex = InInstanceId;
|
|
|
|
const float4 CenterAndCrossesNearPlane = OcclusionQueryBoxes[InInstanceId * 2 + 0];
|
|
const bool bCrossesNearPlane = CenterAndCrossesNearPlane.w != 0.0f;
|
|
|
|
if (!bCrossesNearPlane)
|
|
{
|
|
const float3 Center = CenterAndCrossesNearPlane.xyz;
|
|
const float3 Extent = OcclusionQueryBoxes[InInstanceId * 2 + 1].xyz;
|
|
const float3 TranslatedWorldPosition = (InPosition * Extent) + Center;
|
|
OutPosition = mul(float4(TranslatedWorldPosition, 1.0f), ResolvedView.TranslatedWorldToClip);
|
|
}
|
|
else
|
|
{
|
|
// Kill instance by setting all vertices to NaN. If the box crosses the near plane, we already set it as visible in the result buffer.
|
|
OutPosition = asfloat(0xFFFFFFFF).xxxx;
|
|
}
|
|
}
|
|
|
|
#endif // OCCLUSION_QUERY_RASTER_VS
|
|
|
|
#ifdef OCCLUSION_QUERY_RASTER_PS
|
|
|
|
RWBuffer<uint> Visibility;
|
|
|
|
EARLYDEPTHSTENCIL
|
|
void MainPS(
|
|
in uint InQueryIndex : QUERY_INDEX,
|
|
out float4 OutColor : SV_Target0
|
|
)
|
|
{
|
|
Visibility[InQueryIndex] = 1;
|
|
OutColor = float4(1.0f, 0.0f, 0.0f, 1.0f);
|
|
}
|
|
|
|
#endif // OCCLUSION_QUERY_RASTER_PS
|
|
|
|
#ifdef COMPUTE_BUCKET_COUNTS
|
|
|
|
#ifndef PRECISE_OCCLUSION_QUERIES
|
|
#define PRECISE_OCCLUSION_QUERIES 0
|
|
#endif
|
|
|
|
RWByteAddressBuffer BucketCounts;
|
|
Texture2D<float4> QuadTreeTexture;
|
|
StructuredBuffer<FWaterBodyRenderData> WaterBodyRenderData;
|
|
ByteAddressBuffer PackedNodes;
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
Buffer<uint> OcclusionResults;
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
uint NumDispatchedThreads;
|
|
uint NumDensities;
|
|
uint NumQuadsLOD0;
|
|
uint NumQuadsPerDraw;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint NumDraws = PackedNodes.Load(0);
|
|
|
|
// We are limited to 65535 threads, but might end up with more draws than that
|
|
for (uint DrawIndex = DispatchThreadId.x; DrawIndex < NumDraws; DrawIndex += NumDispatchedThreads)
|
|
{
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
const bool bIsVisible = OcclusionResults[DrawIndex] != 0;
|
|
if (!bIsVisible)
|
|
{
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
// Load packed node coord and density index
|
|
const uint4 NodeCoordAndDensity = UnpackNodeCoord(PackedNodes.Load((DrawIndex + 1) << 2));
|
|
const uint DensityIndexClamped = min(NodeCoordAndDensity.w, NumDensities - 1);
|
|
|
|
// Sample the quadtree to access the water body render data
|
|
const float4 QuadTreeSample = QuadTreeTexture.Load(NodeCoordAndDensity.xyz);
|
|
const FWaterQuadTreeNode Node = WaterQuadTreeUnpackNodeRGBA8(QuadTreeSample);
|
|
const FWaterBodyRenderData WBRenderData = WaterBodyRenderData[Node.WaterBodyRenderDataIndex];
|
|
|
|
// Determine the material
|
|
uint MaterialIndex = WBRenderData.MaterialIndex;
|
|
// Rivers can have transitions to other water bodies
|
|
if (WBRenderData.WaterBodyType == WATER_BODY_TYPE_RIVER && Node.TransitionWaterBodyRenderDataIndex > 0)
|
|
{
|
|
const FWaterBodyRenderData TransitionWBRenderData = WaterBodyRenderData[Node.TransitionWaterBodyRenderDataIndex];
|
|
if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_LAKE)
|
|
{
|
|
MaterialIndex = WBRenderData.RiverToLakeMaterialIndex;
|
|
}
|
|
else if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_OCEAN)
|
|
{
|
|
MaterialIndex = WBRenderData.RiverToOceanMaterialIndex;
|
|
}
|
|
}
|
|
|
|
// Increment bucket counter
|
|
const uint BucketIndex = MaterialIndex;
|
|
const uint NumTilesPerEdge = max(NumQuadsPerDraw, NumQuadsLOD0 >> DensityIndexClamped) / NumQuadsPerDraw;
|
|
const uint NumDraws = NumTilesPerEdge * NumTilesPerEdge;
|
|
uint Dummy;
|
|
BucketCounts.InterlockedAdd(BucketIndex << 2, NumDraws, Dummy);
|
|
}
|
|
}
|
|
|
|
#endif // COMPUTE_BUCKET_COUNTS
|
|
|
|
#ifdef COMPUTE_BUCKET_PREFIX_SUM
|
|
|
|
#ifndef PARALLEL_PREFIX_SUM
|
|
#define PARALLEL_PREFIX_SUM 0
|
|
#endif
|
|
|
|
RWBuffer<uint> BucketPrefixSums;
|
|
ByteAddressBuffer BucketCounts;
|
|
uint NumBuckets;
|
|
uint OutputOffset;
|
|
uint bWriteTotalSumAtBufferEnd;
|
|
|
|
#if PARALLEL_PREFIX_SUM
|
|
|
|
#include "/Engine/Private/WaveOpUtil.ush"
|
|
|
|
#define GROUP_SIZE 128
|
|
#define ARRAY_SIZE (GROUP_SIZE * 2)
|
|
groupshared uint SharedData[ARRAY_SIZE];
|
|
groupshared uint BlockPrefixSum;
|
|
groupshared uint TotalGlobalSum;
|
|
|
|
uint LoadFromBuffer(uint InIndex)
|
|
{
|
|
if (InIndex < NumBuckets)
|
|
{
|
|
return BucketCounts.Load(InIndex << 2);
|
|
}
|
|
else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void WriteToBuffer(uint InIndex, uint InWriteOffset, uint InGlobalPrefixSum)
|
|
{
|
|
if (InIndex < NumBuckets)
|
|
{
|
|
BucketPrefixSums[InIndex + InWriteOffset] = SharedData[InIndex] + InGlobalPrefixSum;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#if PARALLEL_PREFIX_SUM
|
|
[numthreads(GROUP_SIZE, 1, 1)]
|
|
#else
|
|
[numthreads(1, 1, 1)]
|
|
#endif
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
#if PARALLEL_PREFIX_SUM
|
|
|
|
const uint ThreadID = DispatchThreadId.x;
|
|
const uint NumBlocks = (NumBuckets + (ARRAY_SIZE - 1)) / ARRAY_SIZE;
|
|
|
|
if (ThreadID == 0)
|
|
{
|
|
// Apply global offset if this is not the first view. All views share the same prefix sum and instance data buffers.
|
|
if (OutputOffset > 0)
|
|
{
|
|
// Previous view must have written the total sum after the end of its range. This corresponds to the first element in this range.
|
|
BlockPrefixSum = BucketPrefixSums[OutputOffset];
|
|
}
|
|
else
|
|
{
|
|
BlockPrefixSum = 0;
|
|
}
|
|
TotalGlobalSum = BlockPrefixSum;
|
|
}
|
|
|
|
// Process array in blocks of fixed size, keeping track of the sum of all elements in previous blocks
|
|
for (uint BlockIndex = 0; BlockIndex < NumBlocks; ++BlockIndex)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
const uint BlockOffset = BlockIndex * ARRAY_SIZE;
|
|
uint Offset = 1;
|
|
|
|
// Load into LDS
|
|
const uint Value0 = LoadFromBuffer(2 * ThreadID + 0 + BlockOffset);
|
|
const uint Value1 = LoadFromBuffer(2 * ThreadID + 1 + BlockOffset);
|
|
const uint LocalSum = Value0 + Value1;
|
|
SharedData[2 * ThreadID + 0] = Value0;
|
|
SharedData[2 * ThreadID + 1] = Value1;
|
|
|
|
WaveInterlockedAdd(TotalGlobalSum, LocalSum);
|
|
|
|
// Up-sweep
|
|
for (uint d = ARRAY_SIZE >> 1; d > 0; d >>= 1)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ThreadID < d)
|
|
{
|
|
const uint IndexA = Offset * (2 * ThreadID + 1) - 1;
|
|
const uint IndexB = Offset * (2 * ThreadID + 2) - 1;
|
|
|
|
SharedData[IndexB] += SharedData[IndexA];
|
|
}
|
|
Offset <<= 1;
|
|
}
|
|
|
|
// Clear the last element
|
|
if (ThreadID == 0)
|
|
{
|
|
SharedData[ARRAY_SIZE - 1] = 0;
|
|
}
|
|
|
|
// Down-sweep
|
|
for (uint d = 1; d < ARRAY_SIZE; d <<= 1)
|
|
{
|
|
Offset >>= 1;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ThreadID < d)
|
|
{
|
|
const uint IndexA = Offset * (2 * ThreadID + 1) - 1;
|
|
const uint IndexB = Offset * (2 * ThreadID + 2) - 1;
|
|
|
|
const uint Temp = SharedData[IndexA];
|
|
SharedData[IndexA] = SharedData[IndexB];
|
|
SharedData[IndexB] += Temp;
|
|
}
|
|
}
|
|
|
|
const uint GlobalPrefixSum = BlockPrefixSum;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Write results to output buffer
|
|
WriteToBuffer(2 * ThreadID + 0, BlockOffset + OutputOffset, GlobalPrefixSum);
|
|
WriteToBuffer(2 * ThreadID + 1, BlockOffset + OutputOffset, GlobalPrefixSum);
|
|
|
|
if (NumBlocks > 1)
|
|
{
|
|
WaveInterlockedAdd(BlockPrefixSum, LocalSum);
|
|
}
|
|
}
|
|
|
|
// Write total of all values (including those of prior views) at element 0 of the next view.
|
|
// This way we can propagate the prefix sum offsets across views.
|
|
if (bWriteTotalSumAtBufferEnd != 0)
|
|
{
|
|
if (ThreadID == 0)
|
|
{
|
|
BucketPrefixSums[OutputOffset + NumBuckets] = TotalGlobalSum;
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
uint PrefixSum = 0;
|
|
|
|
// Apply global offset if this is not the first view. All views share the same prefix sum and instance data buffers.
|
|
if (OutputOffset > 0)
|
|
{
|
|
// Previous view must have written the total sum after the end of its range. This corresponds to the first element in this range.
|
|
PrefixSum = BucketPrefixSums[OutputOffset];
|
|
}
|
|
|
|
for (uint BucketIndex = 0; BucketIndex < NumBuckets; ++BucketIndex)
|
|
{
|
|
BucketPrefixSums[OutputOffset + BucketIndex] = PrefixSum;
|
|
const uint Count = BucketCounts.Load(BucketIndex << 2);
|
|
PrefixSum += Count;
|
|
}
|
|
|
|
// Write total of all values (including those of prior views) at element 0 of the next view.
|
|
// This way we can propagate the prefix sum offsets across views.
|
|
if (bWriteTotalSumAtBufferEnd != 0)
|
|
{
|
|
BucketPrefixSums[OutputOffset + NumBuckets] = PrefixSum;
|
|
}
|
|
|
|
#endif // PARALLEL_PREFIX_SUM
|
|
}
|
|
|
|
#endif // COMPUTE_BUCKET_PREFIX_SUM
|
|
|
|
#ifdef GENERATE_INSTANCE_DATA
|
|
|
|
#ifndef PRECISE_OCCLUSION_QUERIES
|
|
#define PRECISE_OCCLUSION_QUERIES 0
|
|
#endif
|
|
|
|
RWBuffer<uint> IndirectArgs;
|
|
RWBuffer<uint> InstanceData0;
|
|
RWBuffer<uint> InstanceData1;
|
|
RWBuffer<uint> InstanceData2;
|
|
RWBuffer<uint> InstanceData3;
|
|
Texture2D<float4> QuadTreeTexture;
|
|
Texture2D<float4> WaterZBoundsTexture;
|
|
StructuredBuffer<FWaterBodyRenderData> WaterBodyRenderData;
|
|
ByteAddressBuffer PackedNodes;
|
|
Buffer<uint> InstanceDataOffsets;
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
Buffer<uint> OcclusionResults;
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
|
|
float3 QuadTreePosition;
|
|
float3 ObserverPosition;
|
|
uint QuadTreeResolutionX;
|
|
uint QuadTreeResolutionY;
|
|
uint NumDensities;
|
|
uint NumMaterials;
|
|
uint NumDispatchedThreads;
|
|
uint BucketIndexOffset;
|
|
uint NumLODs;
|
|
uint NumQuadsLOD0;
|
|
uint NumQuadsPerDraw;
|
|
float LeafSize;
|
|
float LODScale;
|
|
float CaptureDepthRange;
|
|
uint StereoPassInstanceFactor;
|
|
uint bWithWaterSelectionSupport;
|
|
uint bLODMorphingEnabled;
|
|
uint bInstancedStereoRendering;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint NumDraws = PackedNodes.Load(0);
|
|
// We are limited to 65535 threads, but might end up with more draws than that
|
|
for (uint DrawIndex = DispatchThreadId.x; DrawIndex < NumDraws; DrawIndex += NumDispatchedThreads)
|
|
{
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
const bool bIsVisible = OcclusionResults[DrawIndex] != 0;
|
|
if (!bIsVisible)
|
|
{
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
// Load packed node coord and density index
|
|
const uint4 NodeCoordAndDensity = UnpackNodeCoord(PackedNodes.Load((DrawIndex + 1) << 2));
|
|
const uint2 NodeCoord = NodeCoordAndDensity.xy;
|
|
const uint LODLevel = NodeCoordAndDensity.z;
|
|
const uint DensityIndex = NodeCoordAndDensity.w;
|
|
const uint DensityIndexClamped = min(DensityIndex, NumDensities - 1);
|
|
|
|
// Sample the quadtree to access the water body render data
|
|
const float4 QuadTreeSample = QuadTreeTexture.Load(int3(NodeCoord, LODLevel));
|
|
const FWaterQuadTreeNode Node = WaterQuadTreeUnpackNodeRGBA8(QuadTreeSample);
|
|
const FWaterBodyRenderData WBRenderData = WaterBodyRenderData[Node.WaterBodyRenderDataIndex];
|
|
|
|
// Determine the material
|
|
uint MaterialIndex = WBRenderData.MaterialIndex;
|
|
uint WaterBodyIndex = WBRenderData.WaterBodyIndex;
|
|
// Rivers can have transitions to other water bodies
|
|
if (WBRenderData.WaterBodyType == WATER_BODY_TYPE_RIVER && Node.TransitionWaterBodyRenderDataIndex > 0)
|
|
{
|
|
const FWaterBodyRenderData TransitionWBRenderData = WaterBodyRenderData[Node.TransitionWaterBodyRenderDataIndex];
|
|
if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_LAKE)
|
|
{
|
|
MaterialIndex = WBRenderData.RiverToLakeMaterialIndex;
|
|
WaterBodyIndex = TransitionWBRenderData.WaterBodyIndex;
|
|
}
|
|
else if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_OCEAN)
|
|
{
|
|
MaterialIndex = WBRenderData.RiverToOceanMaterialIndex;
|
|
WaterBodyIndex = TransitionWBRenderData.WaterBodyIndex;
|
|
}
|
|
}
|
|
|
|
const uint BucketIndex = MaterialIndex + BucketIndexOffset;
|
|
|
|
const uint BucketInstanceDataOffset = InstanceDataOffsets.Load(BucketIndex);
|
|
const uint NumTilesPerEdge = max(NumQuadsPerDraw, NumQuadsLOD0 >> DensityIndexClamped) / NumQuadsPerDraw;
|
|
const uint NumInstances = NumTilesPerEdge * NumTilesPerEdge;
|
|
const uint NumInstancesStereo = NumInstances * StereoPassInstanceFactor;
|
|
|
|
// Increment InstanceCount
|
|
uint LocalInstanceDataOffsetStereo;
|
|
InterlockedAdd(IndirectArgs[BucketIndex * 5 + 1], NumInstancesStereo, LocalInstanceDataOffsetStereo);
|
|
const uint LocalInstanceDataOffset = LocalInstanceDataOffsetStereo / StereoPassInstanceFactor;
|
|
const uint InstanceDataBaseIndex = BucketInstanceDataOffset + LocalInstanceDataOffset;
|
|
|
|
// On the first write to this bucket, set StartInstanceLocation to offset where the per-instance vertex attributes are read from.
|
|
// With ISR, we use the InstanceId to manually fetch instance data buffers in the vertex factory.
|
|
// StartInstanceLocation affects InstanceId differently on different platforms, so when using InstanceId, StartInstanceLocation must be 0.
|
|
if (LocalInstanceDataOffset == 0 && bInstancedStereoRendering == 0)
|
|
{
|
|
IndirectArgs[BucketIndex * 5 + 4] = BucketInstanceDataOffset;
|
|
}
|
|
|
|
// Write packed instance data
|
|
{
|
|
// Data0:
|
|
// uint NodeCoord.x : 11;
|
|
// uint NodeCoord.y : 11;
|
|
// uint LODLevel : 5;
|
|
// uint DensityIndex : 5;
|
|
//
|
|
// Data1:
|
|
// half WaterSurfaceBaseHeight;
|
|
// uint TileX : 8;
|
|
// uint TileY : 8;
|
|
//
|
|
// Data2:
|
|
// uint HeightLODFactorUnorm : 8;
|
|
// uint WaterBodyIndex : 24; // Could potentially use fewer bits for this to cram additional data in here in the future
|
|
|
|
const float3 ObserverPos = ObserverPosition - QuadTreePosition;
|
|
const int2 TextureLoadCoord = (int2)clamp(ObserverPos.xy / LeafSize, 0.0f, float2(QuadTreeResolutionX, QuadTreeResolutionY) - 0.5f);
|
|
const float WaterHeightForLOD = WaterZBoundsTexture.Load(int3(TextureLoadCoord, 0)).z * CaptureDepthRange;
|
|
const FWaterLODParams WaterLODParams = GetWaterLODParams(ObserverPos.z, WaterHeightForLOD, LODScale, NumLODs - 1);
|
|
const uint LogicalLODLevel = LODLevel + DensityIndex;
|
|
|
|
// Lowest LOD isn't always 0, this increases with the height distance
|
|
const bool bIsLowestLOD = (LogicalLODLevel == WaterLODParams.LowestLOD);
|
|
const float HeightLODFactor = bIsLowestLOD ? WaterLODParams.HeightLODFactor : 0.0f;
|
|
const uint HeightLODFactorUnorm = uint(saturate(HeightLODFactor) * 255.0f);
|
|
const float WaterSurfaceBaseHeight = WaterZBoundsTexture.Load(int3(NodeCoord, LODLevel)).z;
|
|
|
|
const uint Data0 = (NodeCoord.x & 0x7FFu) | ((NodeCoord.y & 0x7FFu) << 11u) | ((LODLevel & 0x1Fu) << 22u) | ((DensityIndex & 0x1Fu) << 27u);
|
|
const uint WaterHeightF16 = f32tof16(WaterSurfaceBaseHeight);
|
|
const uint Data2 = (HeightLODFactorUnorm & 0xFFu) | (WaterBodyIndex << 8u);
|
|
|
|
// Render a single quadtree node by drawing one or multiple tile instances
|
|
uint TileX = 0;
|
|
uint TileY = 0;
|
|
for (uint InstanceIndex = 0; InstanceIndex < NumInstances; ++InstanceIndex)
|
|
{
|
|
const uint Data1 = WaterHeightF16 | ((TileX & 0xFFu) << 16u) | ((TileY & 0xFFu) << 24u);
|
|
InstanceData0[InstanceDataBaseIndex + InstanceIndex] = Data0;
|
|
InstanceData1[InstanceDataBaseIndex + InstanceIndex] = Data1;
|
|
InstanceData2[InstanceDataBaseIndex + InstanceIndex] = Data2;
|
|
|
|
TileY = (TileX + 1) >= NumTilesPerEdge ? (TileY + 1) : TileY;
|
|
TileX = (TileX + 1) >= NumTilesPerEdge ? 0 : (TileX + 1);
|
|
}
|
|
}
|
|
|
|
// Instance Hit Proxy ID
|
|
if (bWithWaterSelectionSupport)
|
|
{
|
|
for (uint InstanceIndex = 0; InstanceIndex < NumInstances; ++InstanceIndex)
|
|
{
|
|
InstanceData3[InstanceDataBaseIndex + InstanceIndex] = WBRenderData.HitProxyColorAndIsSelected;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // GENERATE_INSTANCE_DATA
|
|
|
|
#if DEBUG_SHOW_TILES
|
|
|
|
#include "/Engine/Private/ShaderPrint.ush"
|
|
|
|
#ifndef PRECISE_OCCLUSION_QUERIES
|
|
#define PRECISE_OCCLUSION_QUERIES 0
|
|
#endif
|
|
|
|
Texture2D<float4> QuadTreeTexture;
|
|
Texture2D<float4> WaterZBoundsTexture;
|
|
StructuredBuffer<FWaterBodyRenderData> WaterBodyRenderData;
|
|
ByteAddressBuffer PackedNodes;
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
Buffer<uint> OcclusionResults;
|
|
#endif // PRECISE_OCCLUSION_QUERIES
|
|
|
|
float3 QuadTreePosition;
|
|
uint NumDispatchedThreads;
|
|
float LeafSize;
|
|
float CaptureDepthRange;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint NumDraws = PackedNodes.Load(0);
|
|
// We are limited to 65535 threads, but might end up with more draws than that
|
|
for (uint DrawIndex = DispatchThreadId.x; DrawIndex < NumDraws; DrawIndex += NumDispatchedThreads)
|
|
{
|
|
#if PRECISE_OCCLUSION_QUERIES
|
|
const bool bIsVisible = OcclusionResults[DrawIndex] != 0;
|
|
if (!bIsVisible)
|
|
{
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
// Load packed node coord and density index
|
|
const uint4 NodeCoordAndDensity = UnpackNodeCoord(PackedNodes.Load((DrawIndex + 1) << 2));
|
|
const uint3 NodeCoord = NodeCoordAndDensity.xyz;
|
|
|
|
// Sample the quadtree to access the water body render data
|
|
const float4 QuadTreeSample = QuadTreeTexture.Load(int3(NodeCoord));
|
|
const FWaterQuadTreeNode Node = WaterQuadTreeUnpackNodeRGBA8(QuadTreeSample);
|
|
const FWaterBodyRenderData WBRenderData = WaterBodyRenderData[Node.WaterBodyRenderDataIndex];
|
|
|
|
// TODO: Support the two missing modes for visualizing the LOD level and the density
|
|
float4 DebugColor;
|
|
switch (WBRenderData.WaterBodyType)
|
|
{
|
|
case WATER_BODY_TYPE_RIVER: DebugColor = ColorRed; break;
|
|
case WATER_BODY_TYPE_LAKE: DebugColor = ColorGreen; break;
|
|
case WATER_BODY_TYPE_OCEAN: DebugColor = ColorBlue; break;
|
|
default: DebugColor = ColorWhite;
|
|
}
|
|
|
|
if (WBRenderData.WaterBodyType == WATER_BODY_TYPE_RIVER && Node.TransitionWaterBodyRenderDataIndex > 0)
|
|
{
|
|
const FWaterBodyRenderData TransitionWBRenderData = WaterBodyRenderData[Node.TransitionWaterBodyRenderDataIndex];
|
|
if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_LAKE)
|
|
{
|
|
DebugColor = ColorYellow;
|
|
}
|
|
else if (TransitionWBRenderData.WaterBodyType == WATER_BODY_TYPE_OCEAN)
|
|
{
|
|
DebugColor = ColorPurple;
|
|
}
|
|
}
|
|
|
|
const float4 NodeAABB2D = GetNodeAABB2D(NodeCoord, QuadTreePosition, LeafSize);
|
|
const float2 WaterZBounds = WaterZBoundsTexture.Load(NodeCoord).xy * CaptureDepthRange + QuadTreePosition.z;
|
|
const float3 AABBMin = float3(NodeAABB2D.xy + 20.0f, WaterZBounds.x);
|
|
const float3 AABBMax = float3(NodeAABB2D.zw - 20.0f, WaterZBounds.y);
|
|
|
|
AddOBBTWS(InitShaderPrintContext(), AABBMin, AABBMax, DebugColor, float4x4(
|
|
1, 0, 0, 0,
|
|
0, 1, 0, 0,
|
|
0, 0, 1, 0,
|
|
0, 0, 0, 1));
|
|
}
|
|
}
|
|
|
|
#endif // DEBUG_SHOW_TILES |