Files
UnrealEngine/Engine/Shaders/Private/DistanceFieldShadowing.usf
2025-05-18 13:04:45 +08:00

866 lines
31 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DistanceFieldShadowing.usf
=============================================================================*/
#include "Common.ush"
#include "ComputeShaderUtils.ush"
#include "DeferredShadingCommon.ush"
#include "DistanceFieldLightingShared.ush"
#include "DistanceFieldShadowingShared.ush"
#include "Substrate/Substrate.ush"
#ifdef UPSAMPLE_PASS
# include "ShadowFactorsUpsampleCommon.ush"
#endif
#define SCATTER_TILE_CULLING (CULLING_TYPE == 0)
#define POINT_LIGHT (CULLING_TYPE == 2)
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
#define MAX_TRACE_SPHERE_RADIUS 100
#define SELF_SHADOW_VERTICAL_BIAS 0
#define SELF_SHADOW_VIEW_BIAS 0
#else
#define MAX_TRACE_SPHERE_RADIUS 500
#define SELF_SHADOW_VERTICAL_BIAS 100
#define SELF_SHADOW_VIEW_BIAS 50
#endif
uint ObjectBoundingGeometryIndexCount;
float ObjectExpandScale;
float4 ShadowConvexHull[12];
float4 ShadowBoundingSphere;
uint NumShadowHullPlanes;
uint bDrawNaniteMeshes;
uint bCullHeighfieldsNotInAtlas;
bool ShadowConvexHullIntersectSphere(float3 SphereOrigin, float SphereRadius)
{
for (uint PlaneIndex = 0; PlaneIndex < NumShadowHullPlanes; PlaneIndex++)
{
float4 PlaneData = ShadowConvexHull[PlaneIndex];
float PlaneDistance = dot(PlaneData.xyz, SphereOrigin) - PlaneData.w;
if (PlaneDistance > SphereRadius)
{
return false;
}
}
return true;
}
bool ShadowConvexHullIntersectBox(float3 BoxOrigin, float3 BoxExtent)
{
for (uint PlaneIndex = 0; PlaneIndex < NumShadowHullPlanes; ++PlaneIndex)
{
float4 PlaneData = ShadowConvexHull[PlaneIndex];
float PlaneDistance = dot(PlaneData.xyz, BoxOrigin) - PlaneData.w;
float PushOut = dot(abs(PlaneData.xyz), BoxExtent);
if (PlaneDistance > PushOut)
{
return false;
}
}
return true;
}
uint GetNumSceneObjects()
{
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
return NumSceneObjects;
#else
return NumSceneHeightfieldObjects;
#endif
}
[numthreads(UPDATEOBJECTS_THREADGROUP_SIZE, 1, 1)]
void CullObjectsForShadowCS(
uint GroupIndex : SV_GroupIndex,
uint3 GroupId : SV_GroupID)
{
const uint ThreadIndex = GetUnWrappedDispatchThreadId(GroupId, GroupIndex, UPDATEOBJECTS_THREADGROUP_SIZE);
const uint ObjectIndex = ThreadIndex;
#define USE_FRUSTUM_CULLING 1
#if USE_FRUSTUM_CULLING
if (ThreadIndex == 0)
{
// RWObjectIndirectArguments is zeroed by a clear before this shader, only need to set things that are non-zero (and are not read by this shader as that would be a race condition)
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
}
GroupMemoryBarrierWithGroupSync();
if (ObjectIndex < GetNumSceneObjects())
{
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
const FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
const float3 TranslatedCenter = DFFastToTranslatedWorld(DFObjectBounds.Center, PrimaryView.PreViewTranslation);
const float3 CenterToShadowBoundingSphere = ShadowBoundingSphere.xyz - TranslatedCenter;
const bool bConvexHullIntersect = ShadowBoundingSphere.w == 0 && ShadowConvexHullIntersectSphere(TranslatedCenter, DFObjectBounds.SphereRadius);
const bool bBoundingSphereIntersect = ShadowBoundingSphere.w > 0 && dot(CenterToShadowBoundingSphere, CenterToShadowBoundingSphere) < Square(ShadowBoundingSphere.w + DFObjectBounds.SphereRadius);
if (DFObjectBounds.bCastShadow
&& (bConvexHullIntersect || bBoundingSphereIntersect)
&& (bDrawNaniteMeshes || !DFObjectBounds.bIsNaniteMesh))
{
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
// Assume ObjectBoundingSphere is located at (0, 0, 0) in local space
const float3 TranslatedWorldViewOrigin = DFFastToTranslatedWorld(PrimaryView.WorldViewOrigin, PrimaryView.PreViewTranslation);
float ViewDist2 = length2(TranslatedCenter - TranslatedWorldViewOrigin);
if ((DFObjectData.MinMaxDrawDistance2.x < 0.0001 || ViewDist2 > DFObjectData.MinMaxDrawDistance2.x)
&& (DFObjectData.MinMaxDrawDistance2.y < 0.0001 || ViewDist2 < DFObjectData.MinMaxDrawDistance2.y))
{
uint DestIndex;
InterlockedAdd(RWObjectIndirectArguments[1], 1U, DestIndex);
RWCulledObjectIndices[DestIndex] = ObjectIndex;
}
}
#else
FHeightfieldObjectBounds Bounds = LoadHeightfieldObjectBounds(ObjectIndex);
const float3 TranslatedBoxOrigin = DFFastToTranslatedWorld(Bounds.BoxOrigin, PrimaryView.PreViewTranslation);
if ((Bounds.bInAtlas || !bCullHeighfieldsNotInAtlas) && ShadowConvexHullIntersectBox(TranslatedBoxOrigin, Bounds.BoxExtent))
{
uint DestIndex;
InterlockedAdd(RWObjectIndirectArguments[1], 1U, DestIndex);
RWCulledObjectIndices[DestIndex] = ObjectIndex;
}
#endif
}
#else
if (ThreadIndex == 0)
{
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
RWObjectIndirectArguments[1] = GetNumSceneObjects();
}
if (ObjectIndex < GetNumSceneObjects())
{
RWCulledObjectIndices[ObjectIndex] = ObjectIndex;
}
#endif
}
RWBuffer<uint> RWNextStartOffset;
RWBuffer<uint> RWShadowTileStartOffsets;
groupshared uint GroupNumIntersectingObjects;
groupshared uint GroupStartOffset;
#ifndef COMPUTE_START_OFFSET_GROUP_SIZE
#define COMPUTE_START_OFFSET_GROUP_SIZE 1
#endif
[numthreads(COMPUTE_START_OFFSET_GROUP_SIZE, COMPUTE_START_OFFSET_GROUP_SIZE, 1)]
void ComputeCulledTilesStartOffsetCS(
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
bool bFirstThreadInGroup = all(GroupThreadId.xy == 0);
if (bFirstThreadInGroup)
{
GroupNumIntersectingObjects = 0;
}
GroupMemoryBarrierWithGroupSync();
uint2 TileCoordinate = DispatchThreadId.xy;
bool bValidTile = all(TileCoordinate < ShadowTileListGroupSize);
uint TileIndex = TileCoordinate.y * ShadowTileListGroupSize.x + TileCoordinate.x;
uint TileStartOffset = 0;
if (bValidTile)
{
uint NumIntersectingObjects = ShadowTileNumCulledObjects[TileIndex];
InterlockedAdd(GroupNumIntersectingObjects, NumIntersectingObjects, TileStartOffset);
}
GroupMemoryBarrierWithGroupSync();
if (bFirstThreadInGroup)
{
InterlockedAdd(RWNextStartOffset[0], GroupNumIntersectingObjects, GroupStartOffset);
}
GroupMemoryBarrierWithGroupSync();
if (bValidTile)
{
TileStartOffset += GroupStartOffset;
RWShadowTileStartOffsets[TileIndex] = TileStartOffset;
}
}
RWBuffer<uint> RWShadowTileNumCulledObjects;
RWBuffer<uint> RWShadowTileArrayData;
struct FShadowObjectCullVertexOutput
{
nointerpolation uint ObjectIndex : TEXCOORD0;
};
float MinExpandRadius;
/** Used when culling objects into screenspace tile lists */
void ShadowObjectCullVS(
float4 InPosition : ATTRIBUTE0,
uint InstanceIndex : SV_InstanceID,
out FShadowObjectCullVertexOutput Output,
out float4 OutPosition : SV_POSITION
)
{
uint ObjectIndex = CulledObjectIndices[InstanceIndex];
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
float4x4 ObjectOBBToTranslatedWorld = DFFastToTranslatedWorld(DFObjectData.VolumeToWorld, PrimaryView.PreViewTranslation);
float ObjectScaleX = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[0].xyz));
float ObjectScaleY = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[1].xyz));
float ObjectScaleZ = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[2].xyz));
// This is written without [].xyz *= Scale; to workaround a bad codegen on Switch
ObjectOBBToTranslatedWorld[0] = float4(ObjectOBBToTranslatedWorld[0].xyz * ObjectScaleX, ObjectOBBToTranslatedWorld[0].w);
ObjectOBBToTranslatedWorld[1] = float4(ObjectOBBToTranslatedWorld[1].xyz * ObjectScaleY, ObjectOBBToTranslatedWorld[1].w);
ObjectOBBToTranslatedWorld[2] = float4(ObjectOBBToTranslatedWorld[2].xyz * ObjectScaleZ, ObjectOBBToTranslatedWorld[2].w);
float3 TranslatedWorldPosition = mul(float4(InPosition.xyz, 1.0f), ObjectOBBToTranslatedWorld).xyz;
#else
FHeightfieldObjectBounds Bounds = LoadHeightfieldObjectBounds(ObjectIndex);
float3 BoxTranslatedOrigin = DFFastToTranslatedWorld(Bounds.BoxOrigin, PrimaryView.PreViewTranslation);
float3 BoxExtent = Bounds.BoxExtent;
BoxExtent += MinExpandRadius;
float3 TranslatedWorldPosition = InPosition.xyz * BoxExtent + BoxTranslatedOrigin;
#endif
OutPosition = mul(float4(TranslatedWorldPosition, 1.0), TranslatedWorldToShadow);
// Clamp the vertex to the near plane if it is in front of the near plane
if (OutPosition.z > 1)
{
OutPosition.z = 0.999999f;
OutPosition.w = 1.0f;
}
Output.ObjectIndex = ObjectIndex;
}
void HandleShadowTileObjectIntersection(uint TileIndex, uint ObjectIndex)
{
#if SCATTER_CULLING_COUNT_PASS
InterlockedAdd(RWShadowTileNumCulledObjects[TileIndex], 1U);
#else
uint ArrayIndex;
InterlockedAdd(RWShadowTileNumCulledObjects[TileIndex], 1U, ArrayIndex);
#if COMPACT_CULLED_SHADOW_OBJECTS
uint StartOffset = ShadowTileStartOffsets[TileIndex];
#else
uint StartOffset = TileIndex * ShadowMaxObjectsPerTile;
#endif
uint DataIndex = (ArrayIndex + StartOffset);
RWShadowTileArrayData[DataIndex] = ObjectIndex;
#endif
}
struct FObjectViewSpaceBox
{
float3 Min;
float3 Max;
float3 XAxis;
float3 YAxis;
float3 ZAxis;
};
FObjectViewSpaceBox GetObjectViewSpaceBox(uint ObjectIndex)
{
float3 MinViewSpacePosition = float3(2000000, 2000000, 2000000);
float3 MaxViewSpacePosition = float3(-2000000, -2000000, -2000000);
float3 ViewSpaceBoundsVertices[8];
for (uint i = 0; i < 8; i++)
{
float3 UnitBoxVertex;
UnitBoxVertex.x = i & 0x1 ? 1.0f : -1.0f;
UnitBoxVertex.y = i & 0x2 ? 1.0f : -1.0f;
UnitBoxVertex.z = i & 0x4 ? 1.0f : -1.0f;
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
float3 TranslatedWorldBoundsPosition = DFTransformLocalToTranslatedWorld(UnitBoxVertex * DFObjectData.VolumePositionExtent, DFObjectData.VolumeToWorld, PrimaryView.PreViewTranslation).xyz;
#else
FHeightfieldObjectBounds HeightfieldObjectBounds = LoadHeightfieldObjectBounds(ObjectIndex);
float3 TranslatedWorldBoundsPosition = UnitBoxVertex * HeightfieldObjectBounds.BoxExtent + DFFastToTranslatedWorld(HeightfieldObjectBounds.BoxOrigin, PrimaryView.PreViewTranslation);
#endif
float3 ViewSpacePosition = mul(float4(TranslatedWorldBoundsPosition, 1.0f), TranslatedWorldToShadow).xyz;
MinViewSpacePosition = min(MinViewSpacePosition, ViewSpacePosition);
MaxViewSpacePosition = max(MaxViewSpacePosition, ViewSpacePosition);
ViewSpaceBoundsVertices[i] = ViewSpacePosition;
}
float3 ObjectXAxis = (ViewSpaceBoundsVertices[1] - ViewSpaceBoundsVertices[0]) / 2.0f;
float3 ObjectYAxis = (ViewSpaceBoundsVertices[2] - ViewSpaceBoundsVertices[0]) / 2.0f;
float3 ObjectZAxis = (ViewSpaceBoundsVertices[4] - ViewSpaceBoundsVertices[0]) / 2.0f;
MinViewSpacePosition.xy -= ObjectExpandScale * MAX_TRACE_SPHERE_RADIUS;
MaxViewSpacePosition.xy += ObjectExpandScale * MAX_TRACE_SPHERE_RADIUS;
FObjectViewSpaceBox ViewBox;
ViewBox.Min = MinViewSpacePosition;
ViewBox.Max = MaxViewSpacePosition;
ViewBox.XAxis = ObjectXAxis / max(dot(ObjectXAxis, ObjectXAxis), .0001f);
ViewBox.YAxis = ObjectYAxis / max(dot(ObjectYAxis, ObjectYAxis), .0001f);
ViewBox.ZAxis = ObjectZAxis / max(dot(ObjectZAxis, ObjectZAxis), .0001f);
return ViewBox;
}
/** Intersects a single object with the tile and adds to the intersection list if needed. */
void ShadowObjectCullPS(
FShadowObjectCullVertexOutput Input,
in float4 SVPos : SV_POSITION,
out float4 OutColor : SV_Target0)
{
OutColor = 0;
uint2 TilePosition = (uint2)SVPos.xy;
uint TileIndex = TilePosition.y * ShadowTileListGroupSize.x + TilePosition.x;
#define OBJECT_OBB_INTERSECTION 1
#if OBJECT_OBB_INTERSECTION
float2 TilePositionForCulling = float2(TilePosition.x, ShadowTileListGroupSize.y - 1 - TilePosition.y);
float3 ShadowTileMin;
float3 ShadowTileMax;
ShadowTileMin.xy = (TilePositionForCulling + 0.0f) / (float2)ShadowTileListGroupSize * 2 - 1;
ShadowTileMax.xy = (TilePositionForCulling + 1.0f) / (float2)ShadowTileListGroupSize * 2 - 1;
// Extrude toward light to avoid culling objects between the light and the shadow frustum
ShadowTileMin.z = 0;
ShadowTileMax.z = 1000;
FObjectViewSpaceBox ObjectViewSpaceBox = GetObjectViewSpaceBox(Input.ObjectIndex);
BRANCH
// Separating axis test on the AABB
// Note: don't clip by near plane, objects closer to the light can still cast into the frustum
if (all(ObjectViewSpaceBox.Max > ShadowTileMin) && all(ObjectViewSpaceBox.Min.xy < ShadowTileMax.xy))
{
float3 ObjectCenter = .5f * (ObjectViewSpaceBox.Min + ObjectViewSpaceBox.Max);
float3 MinProjections = 500000;
float3 MaxProjections = -500000;
{
float3 Corners[8];
Corners[0] = float3(ShadowTileMin.x, ShadowTileMin.y, ShadowTileMin.z);
Corners[1] = float3(ShadowTileMax.x, ShadowTileMin.y, ShadowTileMin.z);
Corners[2] = float3(ShadowTileMin.x, ShadowTileMax.y, ShadowTileMin.z);
Corners[3] = float3(ShadowTileMax.x, ShadowTileMax.y, ShadowTileMin.z);
Corners[4] = float3(ShadowTileMin.x, ShadowTileMin.y, ShadowTileMax.z);
Corners[5] = float3(ShadowTileMax.x, ShadowTileMin.y, ShadowTileMax.z);
Corners[6] = float3(ShadowTileMin.x, ShadowTileMax.y, ShadowTileMax.z);
Corners[7] = float3(ShadowTileMax.x, ShadowTileMax.y, ShadowTileMax.z);
float3 ObjectAxisX = ObjectViewSpaceBox.XAxis;
float3 ObjectAxisY = ObjectViewSpaceBox.YAxis;
float3 ObjectAxisZ = ObjectViewSpaceBox.ZAxis;
UNROLL
for (int i = 0; i < 8; i++)
{
float3 CenterToVertex = Corners[i] - ObjectCenter;
float3 Projections = float3(dot(CenterToVertex, ObjectAxisX), dot(CenterToVertex, ObjectAxisY), dot(CenterToVertex, ObjectAxisZ));
MinProjections = min(MinProjections, Projections);
MaxProjections = max(MaxProjections, Projections);
}
}
BRANCH
// Separating axis test on the OBB
if (all(MinProjections < 1) && all(MaxProjections > -1))
{
HandleShadowTileObjectIntersection(TileIndex, Input.ObjectIndex);
}
}
#else
{
HandleShadowTileObjectIntersection(TileIndex, Input.ObjectIndex);
}
#endif
}
RWTexture2D<float2> RWShadowFactors;
float2 NumGroups;
/** From point being shaded toward light, for directional lights. */
float3 LightDirection;
float4 LightTranslatedPositionAndInvRadius;
float LightSourceRadius;
float RayStartOffsetDepthScale;
float3 TanLightAngleAndNormalThreshold;
int4 ScissorRectMinAndSize;
/** Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ;
groupshared uint IntegerTileMaxZ;
/** Inner Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ2;
groupshared uint IntegerTileMaxZ2;
/** Number of objects affecting the tile, after culling. */
groupshared uint TileNumObjects0;
groupshared uint TileNumObjects1;
float4x4 ScreenToView;
void CullObjectsToTileWithGather(
float SceneDepth,
uint ThreadIndex,
uint2 GroupId,
float TraceDistance,
float MinDepth,
float MaxDepth,
out uint NumIntersectingObjects,
out uint GroupIndex)
{
// Initialize per-tile variables
if (ThreadIndex == 0)
{
IntegerTileMinZ = 0x7F7FFFFF;
IntegerTileMaxZ = 0;
IntegerTileMinZ2 = 0x7F7FFFFF;
IntegerTileMaxZ2 = 0;
TileNumObjects0 = 0;
TileNumObjects1 = 0;
}
GroupMemoryBarrierWithGroupSync();
if (SceneDepth > MinDepth && SceneDepth < MaxDepth)
{
// Use shared memory atomics to build the depth bounds for this tile
// Each thread is assigned to a pixel at this point
//@todo - move depth range computation to a central point where it can be reused by all the frame's tiled deferred passes!
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
}
GroupMemoryBarrierWithGroupSync();
float MinTileZ = asfloat(IntegerTileMinZ);
float MaxTileZ = asfloat(IntegerTileMaxZ);
float HalfZ = .5f * (MinTileZ + MaxTileZ);
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
// This results in more conservative tile depth bounds and fewer intersections
if (SceneDepth >= HalfZ && SceneDepth < MaxDepth)
{
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
}
if (SceneDepth <= HalfZ && SceneDepth > MinDepth)
{
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
}
GroupMemoryBarrierWithGroupSync();
float MinTileZ2 = asfloat(IntegerTileMinZ2);
float MaxTileZ2 = asfloat(IntegerTileMaxZ2);
float3 ViewTileMin;
float3 ViewTileMax;
float3 ViewTileMin2;
float3 ViewTileMax2;
float ExpandRadius = 0;
// We operate within both a view rect (for multiple side-by-side views) and a scissor rect relative to the view rect (circumscribing the light's attenuation radius on screen)
float2 TileSize = 2 * ScissorRectMinAndSize.zw / ((float2)View_ViewSizeAndInvSize.xy * NumGroups);
float2 ScissorRectMin = float2(-1,-1) + 2 * (ScissorRectMinAndSize.xy - View_ViewRectMin.xy) * View_ViewSizeAndInvSize.zw;
float2 ScreenTileMin = (GroupId.xy * TileSize + ScissorRectMin) * float2(1, -1);
float2 ScreenTileMax = ((GroupId.xy + 1) * TileSize + ScissorRectMin) * float2(1, -1);
// Get the bounding box for this tile in view space
// Project the corners into view space using both MinZ and MaxZ, and ensure the bounding box contains both results
#if STEREO_RENDERING
// Stereo rendering has asymmetrical FOVs for each eye so we need to account for off-center projection and use full view matrices when projecting into view space
ViewTileMin.xy = min(mul(float4(ScreenTileMin * MinTileZ, MinTileZ, 1), ScreenToView).xy, mul(float4(ScreenTileMin * MaxTileZ2, MaxTileZ2, 1), ScreenToView).xy) - ExpandRadius;
ViewTileMax.xy = max(mul(float4(ScreenTileMax * MinTileZ, MinTileZ, 1), ScreenToView).xy, mul(float4(ScreenTileMax * MaxTileZ2, MaxTileZ2, 1), ScreenToView).xy) + ExpandRadius;
ViewTileMin2.xy = min(mul(float4(ScreenTileMin * MinTileZ2, MinTileZ2, 1), ScreenToView).xy, mul(float4(ScreenTileMin * MaxTileZ, MaxTileZ, 1), ScreenToView).xy) - ExpandRadius;
ViewTileMax2.xy = max(mul(float4(ScreenTileMax * MinTileZ2, MinTileZ2, 1), ScreenToView).xy, mul(float4(ScreenTileMax * MaxTileZ, MaxTileZ, 1), ScreenToView).xy) + ExpandRadius;
#else
// If we can assume centered projection (symmetrical FOV), we can do a fast trig projection into view space using tan(FOV/2)
// We start with coordinates in screen space ranging from (-1,-1) to (1,1)
// The (x,y,z) coordinates for a given screen space point (x,y) in view space are (x * z * tan(H_FOV/2), y * z * tan(V_FOV/2), z)
float2 TanViewFOV = GetTanHalfFieldOfView();
ViewTileMin.xy = min(MinTileZ * ScreenTileMin * TanViewFOV, MaxTileZ2 * ScreenTileMin * TanViewFOV) - ExpandRadius;
ViewTileMax.xy = max(MinTileZ * ScreenTileMax * TanViewFOV, MaxTileZ2 * ScreenTileMax * TanViewFOV) + ExpandRadius;
ViewTileMin2.xy = min(MinTileZ2 * ScreenTileMin * TanViewFOV, MaxTileZ * ScreenTileMin * TanViewFOV) - ExpandRadius;
ViewTileMax2.xy = max(MinTileZ2 * ScreenTileMax * TanViewFOV, MaxTileZ * ScreenTileMax * TanViewFOV) + ExpandRadius;
#endif
ViewTileMin.z = MinTileZ - ExpandRadius;
ViewTileMax.z = MaxTileZ2 + ExpandRadius;
ViewTileMin2.z = MinTileZ2 - ExpandRadius;
ViewTileMax2.z = MaxTileZ + ExpandRadius;
// Convert the view space bounding box to a world space bounding sphere
float3 ViewGroup0Center = (ViewTileMax + ViewTileMin) / 2;
float3 TranslatedWorldGroup0Center = mul(float4(ViewGroup0Center, 1), View.ViewToTranslatedWorld).xyz;
float Group0BoundingRadius = length(ViewGroup0Center - ViewTileMax);
float3 ViewGroup1Center = (ViewTileMax2 + ViewTileMin2) / 2;
float3 TranslatedWorldGroup1Center = mul(float4(ViewGroup1Center, 1), View.ViewToTranslatedWorld).xyz;
float Group1BoundingRadius = length(ViewGroup1Center - ViewTileMax2);
#if POINT_LIGHT
float3 LightVector0 = LightTranslatedPositionAndInvRadius.xyz - TranslatedWorldGroup0Center;
float LightVector0Length = length(LightVector0);
float3 LightVector1 = LightTranslatedPositionAndInvRadius.xyz - TranslatedWorldGroup1Center;
float LightVector1Length = length(LightVector1);
float3 LightDirection0 = LightVector0 / LightVector0Length;
float3 LightDirection1 = LightVector1 / LightVector1Length;;
float RayLength0 = LightVector0Length;
float RayLength1 = LightVector1Length;
// Don't operate on tiles completely outside of the light's influence
bool bTileShouldComputeShadowing = LightVector0Length < 1.0f / LightTranslatedPositionAndInvRadius.w + Group0BoundingRadius
|| LightVector1Length < 1.0f / LightTranslatedPositionAndInvRadius.w + Group1BoundingRadius;
#else
float3 LightDirection0 = LightDirection;
float3 LightDirection1 = LightDirection;
float RayLength0 = TraceDistance;
float RayLength1 = TraceDistance;
// Don't operate on tiles completely outside of the [MinDepth, MaxDepth] range
bool bTileShouldComputeShadowing = MaxTileZ > MinDepth && MinTileZ < MaxDepth;
#endif
BRANCH
if (bTileShouldComputeShadowing)
{
uint NumCulledObjects = GetCulledNumObjects();
// Compute per-tile lists of affecting objects through bounds culling
// Each thread now operates on a sample instead of a pixel
LOOP
for (uint IndexInCulledList = ThreadIndex; IndexInCulledList < NumCulledObjects; IndexInCulledList += THREADGROUP_TOTALSIZE)
{
const uint ObjectIndex = CulledObjectIndices[IndexInCulledList];
const FDFObjectBounds Bounds = LoadDFObjectBounds(ObjectIndex);
const float3 TranslatedBoundsCenter = DFFastToTranslatedWorld(Bounds.Center, PrimaryView.PreViewTranslation);
BRANCH
if (RaySegmentHitSphere(TranslatedWorldGroup0Center, LightDirection0, RayLength0, TranslatedBoundsCenter, Bounds.SphereRadius + Group0BoundingRadius))
{
uint ListIndex;
InterlockedAdd(TileNumObjects0, 1U, ListIndex);
// Don't overwrite on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_OBJECTS - 1));
IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS * 0 + ListIndex] = ObjectIndex;
}
BRANCH
if (RaySegmentHitSphere(TranslatedWorldGroup1Center, LightDirection1, RayLength1, TranslatedBoundsCenter, Bounds.SphereRadius + Group1BoundingRadius))
{
uint ListIndex;
InterlockedAdd(TileNumObjects1, 1U, ListIndex);
// Don't write out of bounds on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_OBJECTS - 1));
IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS * 1 + ListIndex] = ObjectIndex;
}
}
}
GroupMemoryBarrierWithGroupSync();
GroupIndex = SceneDepth > MaxTileZ2 ? 1 : 0;
NumIntersectingObjects = min(GroupIndex == 0 ? TileNumObjects0 : TileNumObjects1, (uint)MAX_INTERSECTING_OBJECTS);
}
float MinDepth;
float MaxDepth;
uint DownsampleFactor;
float2 InvOutputBufferSize;
Texture2D ShadowFactorsTexture;
SamplerState ShadowFactorsSampler;
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void DistanceFieldShadowingCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
float2 ScreenUV = float2((DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy + .5f) * View.BufferSizeAndInvSize.zw);
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float SceneDepth;
float FullResFurthestSceneDepth;
{
int2 TopLeftPixelPosition = DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy;
if (DownsampleFactor == 2)
{
#if CULLING_SUBSAMPLE_DEPTH
SceneDepth = FullResFurthestSceneDepth = CalcSceneDepth(ScreenUV);
#else
// cull shadow only if no full-resolution pixel falls within range, preventing edge artifacts
float4 SceneDepths = GatherDeviceZ(float2(TopLeftPixelPosition + 1) * View.BufferSizeAndInvSize.zw);
SceneDepth = ConvertFromDeviceZ(SceneDepths.x);
FullResFurthestSceneDepth = ConvertFromDeviceZ(FarthestDeviceDepth(FarthestDeviceDepth(SceneDepths.x, SceneDepths.y, SceneDepths.z), SceneDepths.w));
#endif
}
else
{
float SceneDepth00 = LookupDeviceZ(TopLeftPixelPosition);
SceneDepth = ConvertFromDeviceZ(SceneDepth00);
FullResFurthestSceneDepth = SceneDepth;
}
}
float3 OpaqueTranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, SceneDepth), SceneDepth, 1), PrimaryView.ScreenToTranslatedWorld).xyz;
// Distance for directional lights to trace
float TraceDistance = TanLightAngleAndNormalThreshold.z;
uint NumIntersectingObjects = GetCulledNumObjects();
uint CulledDataParameter = 0;
bool bShouldComputeShadowing = FullResFurthestSceneDepth > MinDepth && SceneDepth < MaxDepth;
#define USE_CULLING 1
#if USE_CULLING
#if SCATTER_TILE_CULLING
if (bShouldComputeShadowing)
{
GetShadowTileCulledData(OpaqueTranslatedWorldPosition, CulledDataParameter, NumIntersectingObjects);
}
#else
CullObjectsToTileWithGather(SceneDepth, ThreadIndex, GroupId.xy, TraceDistance, MinDepth, MaxDepth, NumIntersectingObjects, CulledDataParameter);
#endif
#endif // USE_CULLING
float Result = 1.0;
#define COMPUTE_SHADOWING 1
#if COMPUTE_SHADOWING
BRANCH
if (bShouldComputeShadowing && NumIntersectingObjects > 0)
{
// Keeps result from going all the way sharp
float MinSphereRadius = .4f;
// Maintain reasonable culling bounds
float MaxSphereRadius = MAX_TRACE_SPHERE_RADIUS;
// Reduce shadowing when we are close the ray origin (only used for heightfield)
float SelfShadowFadeDistance = 100;
// Mitigate self-shadow caused by discontinuity on heightfield borders (only used for heightfield)
float SelfShadowVerticalBias = SELF_SHADOW_VERTICAL_BIAS;
// Mitigate self-shadow on steep terrain surfces
float SelfShadowViewBias = SELF_SHADOW_VIEW_BIAS;
OpaqueTranslatedWorldPosition.z += SelfShadowVerticalBias;
OpaqueTranslatedWorldPosition -= SelfShadowViewBias * View.ViewForward;
// World space offset along the start of the ray to avoid incorrect self-shadowing
float RayStartOffset = 2 + RayStartOffsetDepthScale * SceneDepth;
#if POINT_LIGHT
float3 LightVector = LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition;
float LightVectorLength = length(LightVector);
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightVector / LightVectorLength * RayStartOffset;
float3 TranslatedWorldRayEnd = LightTranslatedPositionAndInvRadius.xyz;
float MaxRayTime = LightVectorLength;
float MaxAngle = tan(10 * PI / 180.0f);
// Comparing tangents instead of angles, but tangent is always increasing in this range
float TanLightAngle = min(LightSourceRadius / LightVectorLength, MaxAngle);
#else
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightDirection * RayStartOffset;
float3 TranslatedWorldRayEnd = OpaqueTranslatedWorldPosition + LightDirection * TraceDistance;
float MaxRayTime = TraceDistance;
float TanLightAngle = TanLightAngleAndNormalThreshold.x;
#endif
#if SCATTER_TILE_CULLING
bool bUseScatterTileCulling = true;
#else
bool bUseScatterTileCulling = false;
#endif
#if USE_CULLING
bool bUseCulling = true;
#else
bool bUseCulling = false;
#endif
bool bIsHeightField = DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_HeightField;
if (bIsHeightField)
{
Result = ShadowRayTraceThroughCulledHeightFieldObjects(
TranslatedWorldRayStart,
TranslatedWorldRayEnd,
TanLightAngle,
MaxSphereRadius,
SelfShadowFadeDistance,
CulledDataParameter,
NumIntersectingObjects,
bUseCulling,
bUseScatterTileCulling);
}
else
{
float SubsurfaceDensity = 0;
bool bUseSubsurfaceTransmission = false;
#if !FORWARD_SHADING && DF_SHADOW_QUALITY > 1 && !SHADING_PATH_MOBILE
#if SUBTRATE_GBUFFER_FORMAT==1
const FSubstrateSubsurfaceHeader SSSHeader = SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, ScreenPosition);
BRANCH
if (SubstrateSubSurfaceHeaderGetIsValid(SSSHeader))
{
SubsurfaceDensity = SubstrateSubSurfaceHeaderGetProfileRadiusScale(SSSHeader);
bUseSubsurfaceTransmission = true;
}
#else
FGBufferData GBufferData = GetGBufferData(ScreenUV);
BRANCH
if (IsSubsurfaceModel(GBufferData.ShadingModelID))
{
SubsurfaceDensity = SubsurfaceDensityFromOpacity(GBufferData.CustomData.a);
bUseSubsurfaceTransmission = true;
}
#endif
#endif
Result = ShadowRayTraceThroughCulledObjects(
TranslatedWorldRayStart,
TranslatedWorldRayEnd,
MaxRayTime,
TanLightAngle,
MinSphereRadius,
MaxSphereRadius,
SubsurfaceDensity,
CulledDataParameter,
NumIntersectingObjects,
bUseCulling,
bUseScatterTileCulling,
bUseSubsurfaceTransmission,
/*bExpandSurface*/ false);
}
}
#if HAS_PREVIOUS_OUTPUT
if (bShouldComputeShadowing)
{
# if PLATFORM_SUPPORTS_TYPED_UAV_LOAD
float PrevResult = RWShadowFactors[DispatchThreadId.xy].x;
# else
float2 PrevResultUV = (DispatchThreadId.xy + 0.5) * InvOutputBufferSize;
float PrevResult = Texture2DSampleLevel(ShadowFactorsTexture, ShadowFactorsSampler, PrevResultUV, 0).x;
# endif
Result = min(Result, PrevResult);
}
#endif
#else
//Result = bShouldComputeShadowing;
Result = bShouldComputeShadowing ? NumIntersectingObjects / 256.0f : 0.0f;
#endif
#if METAL_ES3_1_PROFILE
// clamp max depth to avoid #inf
SceneDepth = min(SceneDepth, 65500.0f);
#endif
RWShadowFactors[DispatchThreadId.xy] = float2(Result, SceneDepth);
}
#ifdef UPSAMPLE_PASS
float FadePlaneOffset;
float InvFadePlaneLength;
float NearFadePlaneOffset;
float InvNearFadePlaneLength;
float OneOverDownsampleFactor;
float2 ShadowFactorsUVBilinearMax;
void DistanceFieldShadowingUpsamplePS(
in float4 SVPos : SV_POSITION,
out float4 OutColor : SV_Target0)
{
float Output;
float SceneDepth;
UpsampleShadowFactors(SVPos, ScissorRectMinAndSize, OneOverDownsampleFactor, MinDepth, MaxDepth, ShadowFactorsTexture, ShadowFactorsSampler, ShadowFactorsUVBilinearMax, Output, SceneDepth);
float FarBlendFactor = 1.0f - saturate((SceneDepth - FadePlaneOffset) * InvFadePlaneLength);
Output = lerp(1, Output, FarBlendFactor);
float NearBlendFactor = saturate((SceneDepth - NearFadePlaneOffset) * InvNearFadePlaneLength);
Output = lerp(1, Output, NearBlendFactor);
OutColor = EncodeLightAttenuation(half4(Output, Output, Output, Output));
}
#endif // UPSAMPLE_PASS
#ifdef SHADOW_TILE_VS
Buffer<uint> TileListData;
void ShadowTileVS(
in uint InstanceId : SV_InstanceID,
in uint VertexId : SV_VertexID,
out float4 Position : SV_POSITION)
{
uint TileData = TileListData[InstanceId.x];
#if PERMUTATION_TILE_TYPE == 1
const uint2 TileOrigin = UnpackTileCoord12bits(TileData) * WORK_TILE_SIZE;
#else
const uint2 TileOrigin = UnpackTileCoord16bits(TileData) * WORK_TILE_SIZE;
#endif
uint2 TileVertex = TileOrigin;
TileVertex.x += VertexId == 1 || VertexId == 2 || VertexId == 4 ? WORK_TILE_SIZE : 0;
TileVertex.y += VertexId == 2 || VertexId == 4 || VertexId == 5 ? WORK_TILE_SIZE : 0;
// View port is set on the view rect. So no offset are needed.
Position = float4(float2(TileVertex) * View.ViewSizeAndInvSize.zw * float2(2.0f, -2.0f) + float2(-1.0, 1.0f), 0.5f, 1.0f);
}
#endif // SHADOW_TILE_VS