866 lines
31 KiB
HLSL
866 lines
31 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DistanceFieldShadowing.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "ComputeShaderUtils.ush"
|
|
#include "DeferredShadingCommon.ush"
|
|
#include "DistanceFieldLightingShared.ush"
|
|
#include "DistanceFieldShadowingShared.ush"
|
|
#include "Substrate/Substrate.ush"
|
|
|
|
#ifdef UPSAMPLE_PASS
|
|
# include "ShadowFactorsUpsampleCommon.ush"
|
|
#endif
|
|
|
|
#define SCATTER_TILE_CULLING (CULLING_TYPE == 0)
|
|
#define POINT_LIGHT (CULLING_TYPE == 2)
|
|
|
|
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
|
#define MAX_TRACE_SPHERE_RADIUS 100
|
|
#define SELF_SHADOW_VERTICAL_BIAS 0
|
|
#define SELF_SHADOW_VIEW_BIAS 0
|
|
#else
|
|
#define MAX_TRACE_SPHERE_RADIUS 500
|
|
#define SELF_SHADOW_VERTICAL_BIAS 100
|
|
#define SELF_SHADOW_VIEW_BIAS 50
|
|
#endif
|
|
|
|
uint ObjectBoundingGeometryIndexCount;
|
|
float ObjectExpandScale;
|
|
|
|
float4 ShadowConvexHull[12];
|
|
float4 ShadowBoundingSphere;
|
|
uint NumShadowHullPlanes;
|
|
uint bDrawNaniteMeshes;
|
|
uint bCullHeighfieldsNotInAtlas;
|
|
|
|
bool ShadowConvexHullIntersectSphere(float3 SphereOrigin, float SphereRadius)
|
|
{
|
|
for (uint PlaneIndex = 0; PlaneIndex < NumShadowHullPlanes; PlaneIndex++)
|
|
{
|
|
float4 PlaneData = ShadowConvexHull[PlaneIndex];
|
|
float PlaneDistance = dot(PlaneData.xyz, SphereOrigin) - PlaneData.w;
|
|
|
|
if (PlaneDistance > SphereRadius)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ShadowConvexHullIntersectBox(float3 BoxOrigin, float3 BoxExtent)
|
|
{
|
|
for (uint PlaneIndex = 0; PlaneIndex < NumShadowHullPlanes; ++PlaneIndex)
|
|
{
|
|
float4 PlaneData = ShadowConvexHull[PlaneIndex];
|
|
float PlaneDistance = dot(PlaneData.xyz, BoxOrigin) - PlaneData.w;
|
|
float PushOut = dot(abs(PlaneData.xyz), BoxExtent);
|
|
|
|
if (PlaneDistance > PushOut)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
uint GetNumSceneObjects()
|
|
{
|
|
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
|
return NumSceneObjects;
|
|
#else
|
|
return NumSceneHeightfieldObjects;
|
|
#endif
|
|
}
|
|
|
|
[numthreads(UPDATEOBJECTS_THREADGROUP_SIZE, 1, 1)]
|
|
void CullObjectsForShadowCS(
|
|
uint GroupIndex : SV_GroupIndex,
|
|
uint3 GroupId : SV_GroupID)
|
|
{
|
|
const uint ThreadIndex = GetUnWrappedDispatchThreadId(GroupId, GroupIndex, UPDATEOBJECTS_THREADGROUP_SIZE);
|
|
const uint ObjectIndex = ThreadIndex;
|
|
|
|
#define USE_FRUSTUM_CULLING 1
|
|
#if USE_FRUSTUM_CULLING
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
// RWObjectIndirectArguments is zeroed by a clear before this shader, only need to set things that are non-zero (and are not read by this shader as that would be a race condition)
|
|
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
|
|
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ObjectIndex < GetNumSceneObjects())
|
|
{
|
|
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
|
const FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
|
|
const float3 TranslatedCenter = DFFastToTranslatedWorld(DFObjectBounds.Center, PrimaryView.PreViewTranslation);
|
|
|
|
const float3 CenterToShadowBoundingSphere = ShadowBoundingSphere.xyz - TranslatedCenter;
|
|
const bool bConvexHullIntersect = ShadowBoundingSphere.w == 0 && ShadowConvexHullIntersectSphere(TranslatedCenter, DFObjectBounds.SphereRadius);
|
|
const bool bBoundingSphereIntersect = ShadowBoundingSphere.w > 0 && dot(CenterToShadowBoundingSphere, CenterToShadowBoundingSphere) < Square(ShadowBoundingSphere.w + DFObjectBounds.SphereRadius);
|
|
|
|
if (DFObjectBounds.bCastShadow
|
|
&& (bConvexHullIntersect || bBoundingSphereIntersect)
|
|
&& (bDrawNaniteMeshes || !DFObjectBounds.bIsNaniteMesh))
|
|
{
|
|
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
|
|
|
|
// Assume ObjectBoundingSphere is located at (0, 0, 0) in local space
|
|
const float3 TranslatedWorldViewOrigin = DFFastToTranslatedWorld(PrimaryView.WorldViewOrigin, PrimaryView.PreViewTranslation);
|
|
float ViewDist2 = length2(TranslatedCenter - TranslatedWorldViewOrigin);
|
|
|
|
if ((DFObjectData.MinMaxDrawDistance2.x < 0.0001 || ViewDist2 > DFObjectData.MinMaxDrawDistance2.x)
|
|
&& (DFObjectData.MinMaxDrawDistance2.y < 0.0001 || ViewDist2 < DFObjectData.MinMaxDrawDistance2.y))
|
|
{
|
|
uint DestIndex;
|
|
InterlockedAdd(RWObjectIndirectArguments[1], 1U, DestIndex);
|
|
RWCulledObjectIndices[DestIndex] = ObjectIndex;
|
|
}
|
|
}
|
|
#else
|
|
FHeightfieldObjectBounds Bounds = LoadHeightfieldObjectBounds(ObjectIndex);
|
|
const float3 TranslatedBoxOrigin = DFFastToTranslatedWorld(Bounds.BoxOrigin, PrimaryView.PreViewTranslation);
|
|
if ((Bounds.bInAtlas || !bCullHeighfieldsNotInAtlas) && ShadowConvexHullIntersectBox(TranslatedBoxOrigin, Bounds.BoxExtent))
|
|
{
|
|
uint DestIndex;
|
|
InterlockedAdd(RWObjectIndirectArguments[1], 1U, DestIndex);
|
|
RWCulledObjectIndices[DestIndex] = ObjectIndex;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#else
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
|
|
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
|
|
RWObjectIndirectArguments[1] = GetNumSceneObjects();
|
|
}
|
|
|
|
if (ObjectIndex < GetNumSceneObjects())
|
|
{
|
|
RWCulledObjectIndices[ObjectIndex] = ObjectIndex;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
RWBuffer<uint> RWNextStartOffset;
|
|
RWBuffer<uint> RWShadowTileStartOffsets;
|
|
|
|
groupshared uint GroupNumIntersectingObjects;
|
|
groupshared uint GroupStartOffset;
|
|
|
|
#ifndef COMPUTE_START_OFFSET_GROUP_SIZE
|
|
#define COMPUTE_START_OFFSET_GROUP_SIZE 1
|
|
#endif
|
|
|
|
[numthreads(COMPUTE_START_OFFSET_GROUP_SIZE, COMPUTE_START_OFFSET_GROUP_SIZE, 1)]
|
|
void ComputeCulledTilesStartOffsetCS(
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
bool bFirstThreadInGroup = all(GroupThreadId.xy == 0);
|
|
|
|
if (bFirstThreadInGroup)
|
|
{
|
|
GroupNumIntersectingObjects = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint2 TileCoordinate = DispatchThreadId.xy;
|
|
bool bValidTile = all(TileCoordinate < ShadowTileListGroupSize);
|
|
uint TileIndex = TileCoordinate.y * ShadowTileListGroupSize.x + TileCoordinate.x;
|
|
uint TileStartOffset = 0;
|
|
|
|
if (bValidTile)
|
|
{
|
|
uint NumIntersectingObjects = ShadowTileNumCulledObjects[TileIndex];
|
|
InterlockedAdd(GroupNumIntersectingObjects, NumIntersectingObjects, TileStartOffset);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (bFirstThreadInGroup)
|
|
{
|
|
InterlockedAdd(RWNextStartOffset[0], GroupNumIntersectingObjects, GroupStartOffset);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (bValidTile)
|
|
{
|
|
TileStartOffset += GroupStartOffset;
|
|
RWShadowTileStartOffsets[TileIndex] = TileStartOffset;
|
|
}
|
|
}
|
|
|
|
RWBuffer<uint> RWShadowTileNumCulledObjects;
|
|
RWBuffer<uint> RWShadowTileArrayData;
|
|
|
|
struct FShadowObjectCullVertexOutput
|
|
{
|
|
nointerpolation uint ObjectIndex : TEXCOORD0;
|
|
};
|
|
|
|
float MinExpandRadius;
|
|
|
|
/** Used when culling objects into screenspace tile lists */
|
|
void ShadowObjectCullVS(
|
|
float4 InPosition : ATTRIBUTE0,
|
|
uint InstanceIndex : SV_InstanceID,
|
|
out FShadowObjectCullVertexOutput Output,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
uint ObjectIndex = CulledObjectIndices[InstanceIndex];
|
|
|
|
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
|
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
|
|
float4x4 ObjectOBBToTranslatedWorld = DFFastToTranslatedWorld(DFObjectData.VolumeToWorld, PrimaryView.PreViewTranslation);
|
|
|
|
float ObjectScaleX = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[0].xyz));
|
|
float ObjectScaleY = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[1].xyz));
|
|
float ObjectScaleZ = (1.0f + MinExpandRadius / length(ObjectOBBToTranslatedWorld[2].xyz));
|
|
|
|
// This is written without [].xyz *= Scale; to workaround a bad codegen on Switch
|
|
ObjectOBBToTranslatedWorld[0] = float4(ObjectOBBToTranslatedWorld[0].xyz * ObjectScaleX, ObjectOBBToTranslatedWorld[0].w);
|
|
ObjectOBBToTranslatedWorld[1] = float4(ObjectOBBToTranslatedWorld[1].xyz * ObjectScaleY, ObjectOBBToTranslatedWorld[1].w);
|
|
ObjectOBBToTranslatedWorld[2] = float4(ObjectOBBToTranslatedWorld[2].xyz * ObjectScaleZ, ObjectOBBToTranslatedWorld[2].w);
|
|
|
|
float3 TranslatedWorldPosition = mul(float4(InPosition.xyz, 1.0f), ObjectOBBToTranslatedWorld).xyz;
|
|
#else
|
|
|
|
FHeightfieldObjectBounds Bounds = LoadHeightfieldObjectBounds(ObjectIndex);
|
|
float3 BoxTranslatedOrigin = DFFastToTranslatedWorld(Bounds.BoxOrigin, PrimaryView.PreViewTranslation);
|
|
float3 BoxExtent = Bounds.BoxExtent;
|
|
|
|
BoxExtent += MinExpandRadius;
|
|
|
|
float3 TranslatedWorldPosition = InPosition.xyz * BoxExtent + BoxTranslatedOrigin;
|
|
#endif
|
|
|
|
OutPosition = mul(float4(TranslatedWorldPosition, 1.0), TranslatedWorldToShadow);
|
|
|
|
// Clamp the vertex to the near plane if it is in front of the near plane
|
|
if (OutPosition.z > 1)
|
|
{
|
|
OutPosition.z = 0.999999f;
|
|
OutPosition.w = 1.0f;
|
|
}
|
|
|
|
Output.ObjectIndex = ObjectIndex;
|
|
}
|
|
|
|
void HandleShadowTileObjectIntersection(uint TileIndex, uint ObjectIndex)
|
|
{
|
|
#if SCATTER_CULLING_COUNT_PASS
|
|
InterlockedAdd(RWShadowTileNumCulledObjects[TileIndex], 1U);
|
|
#else
|
|
uint ArrayIndex;
|
|
InterlockedAdd(RWShadowTileNumCulledObjects[TileIndex], 1U, ArrayIndex);
|
|
|
|
#if COMPACT_CULLED_SHADOW_OBJECTS
|
|
uint StartOffset = ShadowTileStartOffsets[TileIndex];
|
|
#else
|
|
uint StartOffset = TileIndex * ShadowMaxObjectsPerTile;
|
|
#endif
|
|
uint DataIndex = (ArrayIndex + StartOffset);
|
|
RWShadowTileArrayData[DataIndex] = ObjectIndex;
|
|
#endif
|
|
}
|
|
|
|
struct FObjectViewSpaceBox
|
|
{
|
|
float3 Min;
|
|
float3 Max;
|
|
float3 XAxis;
|
|
float3 YAxis;
|
|
float3 ZAxis;
|
|
};
|
|
|
|
FObjectViewSpaceBox GetObjectViewSpaceBox(uint ObjectIndex)
|
|
{
|
|
float3 MinViewSpacePosition = float3(2000000, 2000000, 2000000);
|
|
float3 MaxViewSpacePosition = float3(-2000000, -2000000, -2000000);
|
|
float3 ViewSpaceBoundsVertices[8];
|
|
|
|
for (uint i = 0; i < 8; i++)
|
|
{
|
|
float3 UnitBoxVertex;
|
|
UnitBoxVertex.x = i & 0x1 ? 1.0f : -1.0f;
|
|
UnitBoxVertex.y = i & 0x2 ? 1.0f : -1.0f;
|
|
UnitBoxVertex.z = i & 0x4 ? 1.0f : -1.0f;
|
|
|
|
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
|
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
|
|
float3 TranslatedWorldBoundsPosition = DFTransformLocalToTranslatedWorld(UnitBoxVertex * DFObjectData.VolumePositionExtent, DFObjectData.VolumeToWorld, PrimaryView.PreViewTranslation).xyz;
|
|
#else
|
|
FHeightfieldObjectBounds HeightfieldObjectBounds = LoadHeightfieldObjectBounds(ObjectIndex);
|
|
float3 TranslatedWorldBoundsPosition = UnitBoxVertex * HeightfieldObjectBounds.BoxExtent + DFFastToTranslatedWorld(HeightfieldObjectBounds.BoxOrigin, PrimaryView.PreViewTranslation);
|
|
#endif
|
|
|
|
float3 ViewSpacePosition = mul(float4(TranslatedWorldBoundsPosition, 1.0f), TranslatedWorldToShadow).xyz;
|
|
MinViewSpacePosition = min(MinViewSpacePosition, ViewSpacePosition);
|
|
MaxViewSpacePosition = max(MaxViewSpacePosition, ViewSpacePosition);
|
|
ViewSpaceBoundsVertices[i] = ViewSpacePosition;
|
|
}
|
|
|
|
float3 ObjectXAxis = (ViewSpaceBoundsVertices[1] - ViewSpaceBoundsVertices[0]) / 2.0f;
|
|
float3 ObjectYAxis = (ViewSpaceBoundsVertices[2] - ViewSpaceBoundsVertices[0]) / 2.0f;
|
|
float3 ObjectZAxis = (ViewSpaceBoundsVertices[4] - ViewSpaceBoundsVertices[0]) / 2.0f;
|
|
|
|
MinViewSpacePosition.xy -= ObjectExpandScale * MAX_TRACE_SPHERE_RADIUS;
|
|
MaxViewSpacePosition.xy += ObjectExpandScale * MAX_TRACE_SPHERE_RADIUS;
|
|
|
|
FObjectViewSpaceBox ViewBox;
|
|
ViewBox.Min = MinViewSpacePosition;
|
|
ViewBox.Max = MaxViewSpacePosition;
|
|
ViewBox.XAxis = ObjectXAxis / max(dot(ObjectXAxis, ObjectXAxis), .0001f);
|
|
ViewBox.YAxis = ObjectYAxis / max(dot(ObjectYAxis, ObjectYAxis), .0001f);
|
|
ViewBox.ZAxis = ObjectZAxis / max(dot(ObjectZAxis, ObjectZAxis), .0001f);
|
|
return ViewBox;
|
|
}
|
|
|
|
/** Intersects a single object with the tile and adds to the intersection list if needed. */
|
|
void ShadowObjectCullPS(
|
|
FShadowObjectCullVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
OutColor = 0;
|
|
|
|
uint2 TilePosition = (uint2)SVPos.xy;
|
|
uint TileIndex = TilePosition.y * ShadowTileListGroupSize.x + TilePosition.x;
|
|
|
|
#define OBJECT_OBB_INTERSECTION 1
|
|
#if OBJECT_OBB_INTERSECTION
|
|
float2 TilePositionForCulling = float2(TilePosition.x, ShadowTileListGroupSize.y - 1 - TilePosition.y);
|
|
|
|
float3 ShadowTileMin;
|
|
float3 ShadowTileMax;
|
|
ShadowTileMin.xy = (TilePositionForCulling + 0.0f) / (float2)ShadowTileListGroupSize * 2 - 1;
|
|
ShadowTileMax.xy = (TilePositionForCulling + 1.0f) / (float2)ShadowTileListGroupSize * 2 - 1;
|
|
|
|
// Extrude toward light to avoid culling objects between the light and the shadow frustum
|
|
ShadowTileMin.z = 0;
|
|
ShadowTileMax.z = 1000;
|
|
|
|
FObjectViewSpaceBox ObjectViewSpaceBox = GetObjectViewSpaceBox(Input.ObjectIndex);
|
|
|
|
BRANCH
|
|
// Separating axis test on the AABB
|
|
// Note: don't clip by near plane, objects closer to the light can still cast into the frustum
|
|
if (all(ObjectViewSpaceBox.Max > ShadowTileMin) && all(ObjectViewSpaceBox.Min.xy < ShadowTileMax.xy))
|
|
{
|
|
float3 ObjectCenter = .5f * (ObjectViewSpaceBox.Min + ObjectViewSpaceBox.Max);
|
|
float3 MinProjections = 500000;
|
|
float3 MaxProjections = -500000;
|
|
|
|
{
|
|
float3 Corners[8];
|
|
Corners[0] = float3(ShadowTileMin.x, ShadowTileMin.y, ShadowTileMin.z);
|
|
Corners[1] = float3(ShadowTileMax.x, ShadowTileMin.y, ShadowTileMin.z);
|
|
Corners[2] = float3(ShadowTileMin.x, ShadowTileMax.y, ShadowTileMin.z);
|
|
Corners[3] = float3(ShadowTileMax.x, ShadowTileMax.y, ShadowTileMin.z);
|
|
Corners[4] = float3(ShadowTileMin.x, ShadowTileMin.y, ShadowTileMax.z);
|
|
Corners[5] = float3(ShadowTileMax.x, ShadowTileMin.y, ShadowTileMax.z);
|
|
Corners[6] = float3(ShadowTileMin.x, ShadowTileMax.y, ShadowTileMax.z);
|
|
Corners[7] = float3(ShadowTileMax.x, ShadowTileMax.y, ShadowTileMax.z);
|
|
|
|
float3 ObjectAxisX = ObjectViewSpaceBox.XAxis;
|
|
float3 ObjectAxisY = ObjectViewSpaceBox.YAxis;
|
|
float3 ObjectAxisZ = ObjectViewSpaceBox.ZAxis;
|
|
|
|
UNROLL
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
float3 CenterToVertex = Corners[i] - ObjectCenter;
|
|
float3 Projections = float3(dot(CenterToVertex, ObjectAxisX), dot(CenterToVertex, ObjectAxisY), dot(CenterToVertex, ObjectAxisZ));
|
|
MinProjections = min(MinProjections, Projections);
|
|
MaxProjections = max(MaxProjections, Projections);
|
|
}
|
|
}
|
|
|
|
BRANCH
|
|
// Separating axis test on the OBB
|
|
if (all(MinProjections < 1) && all(MaxProjections > -1))
|
|
{
|
|
HandleShadowTileObjectIntersection(TileIndex, Input.ObjectIndex);
|
|
}
|
|
}
|
|
|
|
#else
|
|
{
|
|
HandleShadowTileObjectIntersection(TileIndex, Input.ObjectIndex);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
RWTexture2D<float2> RWShadowFactors;
|
|
float2 NumGroups;
|
|
|
|
/** From point being shaded toward light, for directional lights. */
|
|
float3 LightDirection;
|
|
float4 LightTranslatedPositionAndInvRadius;
|
|
float LightSourceRadius;
|
|
float RayStartOffsetDepthScale;
|
|
float3 TanLightAngleAndNormalThreshold;
|
|
int4 ScissorRectMinAndSize;
|
|
|
|
/** Min and Max depth for this tile. */
|
|
groupshared uint IntegerTileMinZ;
|
|
groupshared uint IntegerTileMaxZ;
|
|
|
|
/** Inner Min and Max depth for this tile. */
|
|
groupshared uint IntegerTileMinZ2;
|
|
groupshared uint IntegerTileMaxZ2;
|
|
|
|
/** Number of objects affecting the tile, after culling. */
|
|
groupshared uint TileNumObjects0;
|
|
groupshared uint TileNumObjects1;
|
|
|
|
float4x4 ScreenToView;
|
|
|
|
void CullObjectsToTileWithGather(
|
|
float SceneDepth,
|
|
uint ThreadIndex,
|
|
uint2 GroupId,
|
|
float TraceDistance,
|
|
float MinDepth,
|
|
float MaxDepth,
|
|
out uint NumIntersectingObjects,
|
|
out uint GroupIndex)
|
|
{
|
|
// Initialize per-tile variables
|
|
if (ThreadIndex == 0)
|
|
{
|
|
IntegerTileMinZ = 0x7F7FFFFF;
|
|
IntegerTileMaxZ = 0;
|
|
IntegerTileMinZ2 = 0x7F7FFFFF;
|
|
IntegerTileMaxZ2 = 0;
|
|
TileNumObjects0 = 0;
|
|
TileNumObjects1 = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (SceneDepth > MinDepth && SceneDepth < MaxDepth)
|
|
{
|
|
// Use shared memory atomics to build the depth bounds for this tile
|
|
// Each thread is assigned to a pixel at this point
|
|
//@todo - move depth range computation to a central point where it can be reused by all the frame's tiled deferred passes!
|
|
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
|
|
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ = asfloat(IntegerTileMinZ);
|
|
float MaxTileZ = asfloat(IntegerTileMaxZ);
|
|
|
|
float HalfZ = .5f * (MinTileZ + MaxTileZ);
|
|
|
|
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
|
|
// This results in more conservative tile depth bounds and fewer intersections
|
|
if (SceneDepth >= HalfZ && SceneDepth < MaxDepth)
|
|
{
|
|
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
if (SceneDepth <= HalfZ && SceneDepth > MinDepth)
|
|
{
|
|
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ2 = asfloat(IntegerTileMinZ2);
|
|
float MaxTileZ2 = asfloat(IntegerTileMaxZ2);
|
|
|
|
float3 ViewTileMin;
|
|
float3 ViewTileMax;
|
|
|
|
float3 ViewTileMin2;
|
|
float3 ViewTileMax2;
|
|
|
|
float ExpandRadius = 0;
|
|
|
|
// We operate within both a view rect (for multiple side-by-side views) and a scissor rect relative to the view rect (circumscribing the light's attenuation radius on screen)
|
|
float2 TileSize = 2 * ScissorRectMinAndSize.zw / ((float2)View_ViewSizeAndInvSize.xy * NumGroups);
|
|
float2 ScissorRectMin = float2(-1,-1) + 2 * (ScissorRectMinAndSize.xy - View_ViewRectMin.xy) * View_ViewSizeAndInvSize.zw;
|
|
|
|
float2 ScreenTileMin = (GroupId.xy * TileSize + ScissorRectMin) * float2(1, -1);
|
|
float2 ScreenTileMax = ((GroupId.xy + 1) * TileSize + ScissorRectMin) * float2(1, -1);
|
|
|
|
// Get the bounding box for this tile in view space
|
|
// Project the corners into view space using both MinZ and MaxZ, and ensure the bounding box contains both results
|
|
#if STEREO_RENDERING
|
|
// Stereo rendering has asymmetrical FOVs for each eye so we need to account for off-center projection and use full view matrices when projecting into view space
|
|
ViewTileMin.xy = min(mul(float4(ScreenTileMin * MinTileZ, MinTileZ, 1), ScreenToView).xy, mul(float4(ScreenTileMin * MaxTileZ2, MaxTileZ2, 1), ScreenToView).xy) - ExpandRadius;
|
|
ViewTileMax.xy = max(mul(float4(ScreenTileMax * MinTileZ, MinTileZ, 1), ScreenToView).xy, mul(float4(ScreenTileMax * MaxTileZ2, MaxTileZ2, 1), ScreenToView).xy) + ExpandRadius;
|
|
ViewTileMin2.xy = min(mul(float4(ScreenTileMin * MinTileZ2, MinTileZ2, 1), ScreenToView).xy, mul(float4(ScreenTileMin * MaxTileZ, MaxTileZ, 1), ScreenToView).xy) - ExpandRadius;
|
|
ViewTileMax2.xy = max(mul(float4(ScreenTileMax * MinTileZ2, MinTileZ2, 1), ScreenToView).xy, mul(float4(ScreenTileMax * MaxTileZ, MaxTileZ, 1), ScreenToView).xy) + ExpandRadius;
|
|
#else
|
|
// If we can assume centered projection (symmetrical FOV), we can do a fast trig projection into view space using tan(FOV/2)
|
|
// We start with coordinates in screen space ranging from (-1,-1) to (1,1)
|
|
// The (x,y,z) coordinates for a given screen space point (x,y) in view space are (x * z * tan(H_FOV/2), y * z * tan(V_FOV/2), z)
|
|
float2 TanViewFOV = GetTanHalfFieldOfView();
|
|
ViewTileMin.xy = min(MinTileZ * ScreenTileMin * TanViewFOV, MaxTileZ2 * ScreenTileMin * TanViewFOV) - ExpandRadius;
|
|
ViewTileMax.xy = max(MinTileZ * ScreenTileMax * TanViewFOV, MaxTileZ2 * ScreenTileMax * TanViewFOV) + ExpandRadius;
|
|
ViewTileMin2.xy = min(MinTileZ2 * ScreenTileMin * TanViewFOV, MaxTileZ * ScreenTileMin * TanViewFOV) - ExpandRadius;
|
|
ViewTileMax2.xy = max(MinTileZ2 * ScreenTileMax * TanViewFOV, MaxTileZ * ScreenTileMax * TanViewFOV) + ExpandRadius;
|
|
#endif
|
|
ViewTileMin.z = MinTileZ - ExpandRadius;
|
|
ViewTileMax.z = MaxTileZ2 + ExpandRadius;
|
|
ViewTileMin2.z = MinTileZ2 - ExpandRadius;
|
|
ViewTileMax2.z = MaxTileZ + ExpandRadius;
|
|
|
|
// Convert the view space bounding box to a world space bounding sphere
|
|
float3 ViewGroup0Center = (ViewTileMax + ViewTileMin) / 2;
|
|
float3 TranslatedWorldGroup0Center = mul(float4(ViewGroup0Center, 1), View.ViewToTranslatedWorld).xyz;
|
|
float Group0BoundingRadius = length(ViewGroup0Center - ViewTileMax);
|
|
|
|
float3 ViewGroup1Center = (ViewTileMax2 + ViewTileMin2) / 2;
|
|
float3 TranslatedWorldGroup1Center = mul(float4(ViewGroup1Center, 1), View.ViewToTranslatedWorld).xyz;
|
|
float Group1BoundingRadius = length(ViewGroup1Center - ViewTileMax2);
|
|
|
|
#if POINT_LIGHT
|
|
float3 LightVector0 = LightTranslatedPositionAndInvRadius.xyz - TranslatedWorldGroup0Center;
|
|
float LightVector0Length = length(LightVector0);
|
|
float3 LightVector1 = LightTranslatedPositionAndInvRadius.xyz - TranslatedWorldGroup1Center;
|
|
float LightVector1Length = length(LightVector1);
|
|
float3 LightDirection0 = LightVector0 / LightVector0Length;
|
|
float3 LightDirection1 = LightVector1 / LightVector1Length;;
|
|
float RayLength0 = LightVector0Length;
|
|
float RayLength1 = LightVector1Length;
|
|
|
|
// Don't operate on tiles completely outside of the light's influence
|
|
bool bTileShouldComputeShadowing = LightVector0Length < 1.0f / LightTranslatedPositionAndInvRadius.w + Group0BoundingRadius
|
|
|| LightVector1Length < 1.0f / LightTranslatedPositionAndInvRadius.w + Group1BoundingRadius;
|
|
#else
|
|
float3 LightDirection0 = LightDirection;
|
|
float3 LightDirection1 = LightDirection;
|
|
float RayLength0 = TraceDistance;
|
|
float RayLength1 = TraceDistance;
|
|
|
|
// Don't operate on tiles completely outside of the [MinDepth, MaxDepth] range
|
|
bool bTileShouldComputeShadowing = MaxTileZ > MinDepth && MinTileZ < MaxDepth;
|
|
#endif
|
|
|
|
BRANCH
|
|
if (bTileShouldComputeShadowing)
|
|
{
|
|
uint NumCulledObjects = GetCulledNumObjects();
|
|
|
|
// Compute per-tile lists of affecting objects through bounds culling
|
|
// Each thread now operates on a sample instead of a pixel
|
|
LOOP
|
|
for (uint IndexInCulledList = ThreadIndex; IndexInCulledList < NumCulledObjects; IndexInCulledList += THREADGROUP_TOTALSIZE)
|
|
{
|
|
const uint ObjectIndex = CulledObjectIndices[IndexInCulledList];
|
|
const FDFObjectBounds Bounds = LoadDFObjectBounds(ObjectIndex);
|
|
const float3 TranslatedBoundsCenter = DFFastToTranslatedWorld(Bounds.Center, PrimaryView.PreViewTranslation);
|
|
|
|
BRANCH
|
|
if (RaySegmentHitSphere(TranslatedWorldGroup0Center, LightDirection0, RayLength0, TranslatedBoundsCenter, Bounds.SphereRadius + Group0BoundingRadius))
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(TileNumObjects0, 1U, ListIndex);
|
|
// Don't overwrite on overflow
|
|
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_OBJECTS - 1));
|
|
IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS * 0 + ListIndex] = ObjectIndex;
|
|
}
|
|
|
|
BRANCH
|
|
if (RaySegmentHitSphere(TranslatedWorldGroup1Center, LightDirection1, RayLength1, TranslatedBoundsCenter, Bounds.SphereRadius + Group1BoundingRadius))
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(TileNumObjects1, 1U, ListIndex);
|
|
// Don't write out of bounds on overflow
|
|
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_OBJECTS - 1));
|
|
IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS * 1 + ListIndex] = ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
GroupIndex = SceneDepth > MaxTileZ2 ? 1 : 0;
|
|
NumIntersectingObjects = min(GroupIndex == 0 ? TileNumObjects0 : TileNumObjects1, (uint)MAX_INTERSECTING_OBJECTS);
|
|
}
|
|
|
|
float MinDepth;
|
|
float MaxDepth;
|
|
uint DownsampleFactor;
|
|
|
|
float2 InvOutputBufferSize;
|
|
Texture2D ShadowFactorsTexture;
|
|
SamplerState ShadowFactorsSampler;
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void DistanceFieldShadowingCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
|
|
|
|
float2 ScreenUV = float2((DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy + .5f) * View.BufferSizeAndInvSize.zw);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
|
|
float SceneDepth;
|
|
float FullResFurthestSceneDepth;
|
|
{
|
|
int2 TopLeftPixelPosition = DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy;
|
|
|
|
if (DownsampleFactor == 2)
|
|
{
|
|
#if CULLING_SUBSAMPLE_DEPTH
|
|
SceneDepth = FullResFurthestSceneDepth = CalcSceneDepth(ScreenUV);
|
|
#else
|
|
// cull shadow only if no full-resolution pixel falls within range, preventing edge artifacts
|
|
float4 SceneDepths = GatherDeviceZ(float2(TopLeftPixelPosition + 1) * View.BufferSizeAndInvSize.zw);
|
|
SceneDepth = ConvertFromDeviceZ(SceneDepths.x);
|
|
FullResFurthestSceneDepth = ConvertFromDeviceZ(FarthestDeviceDepth(FarthestDeviceDepth(SceneDepths.x, SceneDepths.y, SceneDepths.z), SceneDepths.w));
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
float SceneDepth00 = LookupDeviceZ(TopLeftPixelPosition);
|
|
SceneDepth = ConvertFromDeviceZ(SceneDepth00);
|
|
FullResFurthestSceneDepth = SceneDepth;
|
|
}
|
|
}
|
|
|
|
float3 OpaqueTranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, SceneDepth), SceneDepth, 1), PrimaryView.ScreenToTranslatedWorld).xyz;
|
|
|
|
// Distance for directional lights to trace
|
|
float TraceDistance = TanLightAngleAndNormalThreshold.z;
|
|
|
|
uint NumIntersectingObjects = GetCulledNumObjects();
|
|
uint CulledDataParameter = 0;
|
|
bool bShouldComputeShadowing = FullResFurthestSceneDepth > MinDepth && SceneDepth < MaxDepth;
|
|
|
|
#define USE_CULLING 1
|
|
#if USE_CULLING
|
|
|
|
#if SCATTER_TILE_CULLING
|
|
|
|
if (bShouldComputeShadowing)
|
|
{
|
|
GetShadowTileCulledData(OpaqueTranslatedWorldPosition, CulledDataParameter, NumIntersectingObjects);
|
|
}
|
|
|
|
#else
|
|
|
|
CullObjectsToTileWithGather(SceneDepth, ThreadIndex, GroupId.xy, TraceDistance, MinDepth, MaxDepth, NumIntersectingObjects, CulledDataParameter);
|
|
|
|
#endif
|
|
#endif // USE_CULLING
|
|
|
|
float Result = 1.0;
|
|
|
|
#define COMPUTE_SHADOWING 1
|
|
#if COMPUTE_SHADOWING
|
|
BRANCH
|
|
if (bShouldComputeShadowing && NumIntersectingObjects > 0)
|
|
{
|
|
// Keeps result from going all the way sharp
|
|
float MinSphereRadius = .4f;
|
|
// Maintain reasonable culling bounds
|
|
float MaxSphereRadius = MAX_TRACE_SPHERE_RADIUS;
|
|
|
|
// Reduce shadowing when we are close the ray origin (only used for heightfield)
|
|
float SelfShadowFadeDistance = 100;
|
|
// Mitigate self-shadow caused by discontinuity on heightfield borders (only used for heightfield)
|
|
float SelfShadowVerticalBias = SELF_SHADOW_VERTICAL_BIAS;
|
|
// Mitigate self-shadow on steep terrain surfces
|
|
float SelfShadowViewBias = SELF_SHADOW_VIEW_BIAS;
|
|
|
|
OpaqueTranslatedWorldPosition.z += SelfShadowVerticalBias;
|
|
OpaqueTranslatedWorldPosition -= SelfShadowViewBias * View.ViewForward;
|
|
|
|
// World space offset along the start of the ray to avoid incorrect self-shadowing
|
|
float RayStartOffset = 2 + RayStartOffsetDepthScale * SceneDepth;
|
|
|
|
#if POINT_LIGHT
|
|
|
|
float3 LightVector = LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition;
|
|
float LightVectorLength = length(LightVector);
|
|
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightVector / LightVectorLength * RayStartOffset;
|
|
float3 TranslatedWorldRayEnd = LightTranslatedPositionAndInvRadius.xyz;
|
|
float MaxRayTime = LightVectorLength;
|
|
float MaxAngle = tan(10 * PI / 180.0f);
|
|
// Comparing tangents instead of angles, but tangent is always increasing in this range
|
|
float TanLightAngle = min(LightSourceRadius / LightVectorLength, MaxAngle);
|
|
|
|
#else
|
|
|
|
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightDirection * RayStartOffset;
|
|
float3 TranslatedWorldRayEnd = OpaqueTranslatedWorldPosition + LightDirection * TraceDistance;
|
|
float MaxRayTime = TraceDistance;
|
|
float TanLightAngle = TanLightAngleAndNormalThreshold.x;
|
|
|
|
#endif
|
|
|
|
#if SCATTER_TILE_CULLING
|
|
bool bUseScatterTileCulling = true;
|
|
#else
|
|
bool bUseScatterTileCulling = false;
|
|
#endif
|
|
|
|
#if USE_CULLING
|
|
bool bUseCulling = true;
|
|
#else
|
|
bool bUseCulling = false;
|
|
#endif
|
|
|
|
bool bIsHeightField = DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_HeightField;
|
|
|
|
if (bIsHeightField)
|
|
{
|
|
Result = ShadowRayTraceThroughCulledHeightFieldObjects(
|
|
TranslatedWorldRayStart,
|
|
TranslatedWorldRayEnd,
|
|
TanLightAngle,
|
|
MaxSphereRadius,
|
|
SelfShadowFadeDistance,
|
|
CulledDataParameter,
|
|
NumIntersectingObjects,
|
|
bUseCulling,
|
|
bUseScatterTileCulling);
|
|
}
|
|
else
|
|
{
|
|
float SubsurfaceDensity = 0;
|
|
bool bUseSubsurfaceTransmission = false;
|
|
|
|
#if !FORWARD_SHADING && DF_SHADOW_QUALITY > 1 && !SHADING_PATH_MOBILE
|
|
#if SUBTRATE_GBUFFER_FORMAT==1
|
|
const FSubstrateSubsurfaceHeader SSSHeader = SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, ScreenPosition);
|
|
BRANCH
|
|
if (SubstrateSubSurfaceHeaderGetIsValid(SSSHeader))
|
|
{
|
|
SubsurfaceDensity = SubstrateSubSurfaceHeaderGetProfileRadiusScale(SSSHeader);
|
|
bUseSubsurfaceTransmission = true;
|
|
}
|
|
#else
|
|
FGBufferData GBufferData = GetGBufferData(ScreenUV);
|
|
BRANCH
|
|
if (IsSubsurfaceModel(GBufferData.ShadingModelID))
|
|
{
|
|
SubsurfaceDensity = SubsurfaceDensityFromOpacity(GBufferData.CustomData.a);
|
|
bUseSubsurfaceTransmission = true;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
Result = ShadowRayTraceThroughCulledObjects(
|
|
TranslatedWorldRayStart,
|
|
TranslatedWorldRayEnd,
|
|
MaxRayTime,
|
|
TanLightAngle,
|
|
MinSphereRadius,
|
|
MaxSphereRadius,
|
|
SubsurfaceDensity,
|
|
CulledDataParameter,
|
|
NumIntersectingObjects,
|
|
bUseCulling,
|
|
bUseScatterTileCulling,
|
|
bUseSubsurfaceTransmission,
|
|
/*bExpandSurface*/ false);
|
|
}
|
|
}
|
|
|
|
#if HAS_PREVIOUS_OUTPUT
|
|
if (bShouldComputeShadowing)
|
|
{
|
|
# if PLATFORM_SUPPORTS_TYPED_UAV_LOAD
|
|
float PrevResult = RWShadowFactors[DispatchThreadId.xy].x;
|
|
# else
|
|
float2 PrevResultUV = (DispatchThreadId.xy + 0.5) * InvOutputBufferSize;
|
|
float PrevResult = Texture2DSampleLevel(ShadowFactorsTexture, ShadowFactorsSampler, PrevResultUV, 0).x;
|
|
# endif
|
|
|
|
Result = min(Result, PrevResult);
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
//Result = bShouldComputeShadowing;
|
|
Result = bShouldComputeShadowing ? NumIntersectingObjects / 256.0f : 0.0f;
|
|
#endif
|
|
|
|
#if METAL_ES3_1_PROFILE
|
|
// clamp max depth to avoid #inf
|
|
SceneDepth = min(SceneDepth, 65500.0f);
|
|
#endif
|
|
RWShadowFactors[DispatchThreadId.xy] = float2(Result, SceneDepth);
|
|
}
|
|
|
|
#ifdef UPSAMPLE_PASS
|
|
|
|
float FadePlaneOffset;
|
|
float InvFadePlaneLength;
|
|
float NearFadePlaneOffset;
|
|
float InvNearFadePlaneLength;
|
|
|
|
float OneOverDownsampleFactor;
|
|
float2 ShadowFactorsUVBilinearMax;
|
|
|
|
void DistanceFieldShadowingUpsamplePS(
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float Output;
|
|
float SceneDepth;
|
|
UpsampleShadowFactors(SVPos, ScissorRectMinAndSize, OneOverDownsampleFactor, MinDepth, MaxDepth, ShadowFactorsTexture, ShadowFactorsSampler, ShadowFactorsUVBilinearMax, Output, SceneDepth);
|
|
|
|
float FarBlendFactor = 1.0f - saturate((SceneDepth - FadePlaneOffset) * InvFadePlaneLength);
|
|
Output = lerp(1, Output, FarBlendFactor);
|
|
|
|
float NearBlendFactor = saturate((SceneDepth - NearFadePlaneOffset) * InvNearFadePlaneLength);
|
|
Output = lerp(1, Output, NearBlendFactor);
|
|
|
|
OutColor = EncodeLightAttenuation(half4(Output, Output, Output, Output));
|
|
}
|
|
|
|
#endif // UPSAMPLE_PASS
|
|
|
|
#ifdef SHADOW_TILE_VS
|
|
|
|
Buffer<uint> TileListData;
|
|
|
|
void ShadowTileVS(
|
|
in uint InstanceId : SV_InstanceID,
|
|
in uint VertexId : SV_VertexID,
|
|
out float4 Position : SV_POSITION)
|
|
{
|
|
uint TileData = TileListData[InstanceId.x];
|
|
#if PERMUTATION_TILE_TYPE == 1
|
|
const uint2 TileOrigin = UnpackTileCoord12bits(TileData) * WORK_TILE_SIZE;
|
|
#else
|
|
const uint2 TileOrigin = UnpackTileCoord16bits(TileData) * WORK_TILE_SIZE;
|
|
#endif
|
|
uint2 TileVertex = TileOrigin;
|
|
TileVertex.x += VertexId == 1 || VertexId == 2 || VertexId == 4 ? WORK_TILE_SIZE : 0;
|
|
TileVertex.y += VertexId == 2 || VertexId == 4 || VertexId == 5 ? WORK_TILE_SIZE : 0;
|
|
|
|
// View port is set on the view rect. So no offset are needed.
|
|
Position = float4(float2(TileVertex) * View.ViewSizeAndInvSize.zw * float2(2.0f, -2.0f) + float2(-1.0, 1.0f), 0.5f, 1.0f);
|
|
}
|
|
|
|
#endif // SHADOW_TILE_VS
|