Files
UnrealEngine/Engine/Shaders/Private/CapsuleShadowShaders.usf
2025-05-18 13:04:45 +08:00

1208 lines
47 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
CapsuleShadowShaders.usf: Tiled deferred culling and shadowing from capsule shapes
=============================================================================*/
#include "Common.ush"
#include "DeferredShadingCommon.ush"
#include "FastMath.ush"
#include "DistanceFieldLightingShared.ush"
#include "SHCommon.ush"
#include "VolumetricLightmapShared.ush"
#include "ReflectionEnvironmentShared.ush"
#include "IntersectionUtils.ush"
#include "Substrate/Substrate.ush"
#ifdef UPSAMPLE_PASS
# include "ShadowFactorsUpsampleCommon.ush"
#endif
#ifndef THREADGROUP_SIZEX
# define THREADGROUP_SIZEX 1
#endif
#ifndef THREADGROUP_SIZEY
# define THREADGROUP_SIZEY 1
#endif
#ifndef LIGHT_SOURCE_MODE
#define LIGHT_SOURCE_MODE 0
#endif
// must match CapsuleShadowRendering.cpp
#define LIGHT_SOURCE_PUNCTUAL 0
#define LIGHT_SOURCE_FROM_CAPSULE 1
#define LIGHT_SOURCE_FROM_RECEIVER 2
struct FCapsuleShape
{
float3 TranslatedCenter;
float Radius;
float3 Orientation;
float Length;
};
// SUPPORT_CAPSULE_SHAPES
/** Number of capsules affecting the tile, after culling. */
groupshared uint TileNumCapsules0;
groupshared uint TileNumCapsules1;
#define MAX_INTERSECTING_SHAPES 512
groupshared uint IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 2];
uint NumShadowCapsules;
StructuredBuffer<FCapsuleShape> ShadowCapsuleShapes;
// SUPPORT_MESH_DISTANCE_FIELDS
/** Number of distance fields affecting the tile, after culling. */
groupshared uint TileNumDistanceFields0;
groupshared uint TileNumDistanceFields1;
#define MAX_INTERSECTING_DISTANCE_FIELDS 64
groupshared uint IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 2];
uint NumMeshDistanceFieldCasters;
StructuredBuffer<uint> MeshDistanceFieldCasterIndices;
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
StructuredBuffer<float4> LightDirectionData;
void DecodeLightDirectionW(float LightDirectionW, out float LightAngle, out float MinVisibility)
{
uint WInt = asuint(LightDirectionW);
LightAngle = f16tof32(WInt);
MinVisibility = f16tof32(WInt >> 16);
}
float EncodeLightDirectionW(float LightAngle, float MinVisibility)
{
return asfloat(f32tof16(LightAngle) | f32tof16(MinVisibility) << 16);
}
void GetLightDirectionData(uint ShapeIndex, bool bDistanceFieldCaster, out float3 LightDirection, out float LightAngle, out float MinVisibility)
{
// Light data for distance field casters is placed after light data for capsules in SetupIndirectCapsuleShadows
uint BaseLightDataIndex = bDistanceFieldCaster ? NumShadowCapsules : 0;
float4 VectorValue = LightDirectionData[ShapeIndex + BaseLightDataIndex];
LightDirection = VectorValue.xyz;
DecodeLightDirectionW(VectorValue.w, LightAngle, MinVisibility);
}
uint SkyLightMode;
float CapsuleIndirectConeAngle;
float CapsuleSkyAngleScale;
float CapsuleMinSkyAngle;
uint NumLightDirectionData;
RWStructuredBuffer<float4> RWComputedLightDirectionData;
[numthreads(THREADGROUP_SIZEX, 1, 1)]
void ComputeLightDirectionFromVolumetricLightmapCS(
uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (DispatchThreadId.x < NumLightDirectionData)
{
float4 LightData = LightDirectionData[DispatchThreadId.x];
float3 ObjectPosition = LightData.xyz;
float3 BrickTextureUVs = ComputeVolumetricLightmapBrickTextureUVs(ObjectPosition);
float3 LightDirection;
float LightAngle;
if (SkyLightMode == 2)
{
FTwoBandSHVectorRGB SkyIrradianceSH;
// See ComputeSkyEnvMapDiffuseIrradianceCS for the coefficient and sign adaptation. N need to rescale the coefficients, direction is preserve.
SkyIrradianceSH.R.V = SkyIrradianceEnvironmentMap[0].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f);
SkyIrradianceSH.G.V = SkyIrradianceEnvironmentMap[1].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f);
SkyIrradianceSH.B.V = SkyIrradianceEnvironmentMap[2].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f);
LightDirection = GetMaximumDirection(GetLuminance(SkyIrradianceSH));
LightAngle = CapsuleIndirectConeAngle;
}
else if (SkyLightMode == 1)
{
// Stationary sky light shadowing
float3 SkyBentNormal = GetVolumetricLightmapSkyBentNormal(BrickTextureUVs);
float SkyBentNormalLength = length(SkyBentNormal);
float ConeAngle = max(SkyBentNormalLength * CapsuleSkyAngleScale * .5f * PI, CapsuleMinSkyAngle * PI / 180.0f);
LightDirection = SkyBentNormal / max(SkyBentNormalLength, .0001f);
LightAngle = ConeAngle;
}
else
{
FTwoBandSHVectorRGB IrradianceSH = GetVolumetricLightmapSH2(BrickTextureUVs);
LightDirection = GetMaximumDirection(GetLuminance(IrradianceSH));
LightAngle = CapsuleIndirectConeAngle;
}
if (dot(LightDirection, LightDirection) < .1f)
{
LightDirection = float3(0, 0, 1);
}
float Unused;
float MinVisibility;
DecodeLightDirectionW(LightData.w, Unused, MinVisibility);
float4 PackedLightDirection = float4(LightDirection, EncodeLightDirectionW(LightAngle, MinVisibility));
RWComputedLightDirectionData[DispatchThreadId.x] = PackedLightDirection;
}
}
#endif
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
/** From point being shaded toward light, for directional lights. */
float3 LightDirection;
float4 LightTranslatedPositionAndInvRadius;
float LightSourceRadius;
float RayStartOffsetDepthScale;
float3 LightAngleAndNormalThreshold;
#endif
uint4 ScissorRectMinAndSize;
float2 NumGroups;
/** Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ;
groupshared uint IntegerTileMaxZ;
/** Inner Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ2;
groupshared uint IntegerTileMaxZ2;
struct FTileCullingData
{
float4 TranslatedBoundingSphere;
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
float3 ConeAxis;
float ConeAngleCos;
float ConeAngleSin;
#endif
};
void SetupTileCullingData(
float SceneDepth,
float MaxDepth,
uint ThreadIndex,
uint2 GroupId,
out FTileCullingData TileCullingData0,
out FTileCullingData TileCullingData1,
out bool bTileShouldComputeShadowing,
out uint GroupIndex)
{
// Initialize per-tile variables
if (ThreadIndex == 0)
{
IntegerTileMinZ = 0x7F7FFFFF;
IntegerTileMaxZ = 0;
IntegerTileMinZ2 = 0x7F7FFFFF;
IntegerTileMaxZ2 = 0;
TileNumCapsules0 = 0;
TileNumCapsules1 = 0;
TileNumDistanceFields0 = 0;
TileNumDistanceFields1 = 0;
}
GroupMemoryBarrierWithGroupSync();
// Use shared memory atomics to build the depth bounds for this tile
// Each thread is assigned to a pixel at this point
//@todo - move depth range computation to a central point where it can be reused by all the frame's tiled deferred passes!
if (SceneDepth < MaxDepth)
{
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
}
GroupMemoryBarrierWithGroupSync();
float MinTileZ = asfloat(IntegerTileMinZ);
float MaxTileZ = asfloat(IntegerTileMaxZ);
float HalfZ = .5f * (MinTileZ + MaxTileZ);
if (SceneDepth < MaxDepth)
{
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
// This results in more conservative tile depth bounds and fewer intersections
if (SceneDepth >= HalfZ)
{
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
}
if (SceneDepth <= HalfZ)
{
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
}
}
GroupMemoryBarrierWithGroupSync();
float MinTileZ2 = asfloat(IntegerTileMinZ2);
float MaxTileZ2 = asfloat(IntegerTileMaxZ2);
bTileShouldComputeShadowing = true;
if (IntegerTileMinZ == 0x7F7FFFFF && IntegerTileMaxZ == 0)
{
bTileShouldComputeShadowing = false;
}
float3 ViewTileMin;
float3 ViewTileMax;
float3 ViewTileMin2;
float3 ViewTileMax2;
bool bCenteredProjection = abs(View.ViewToClip[2][0]) < .00001f && abs(View.ViewToClip[2][1]) < .00001f;
BRANCH
// Off center projection path uses 37 more asm instructions
if (bCenteredProjection)
{
float2 TanViewFOV = GetTanHalfFieldOfView();
// tan(FOV) = HalfUnitPlaneWidth / 1, so TanViewFOV * 2 is the size of the whole unit view plane
// We are operating on a subset of that defined by ScissorRectMinAndSize
float2 TileSize = TanViewFOV * 2 * ScissorRectMinAndSize.zw / ((float2)View.ViewSizeAndInvSize.xy * NumGroups);
float2 UnitPlaneMin = -TanViewFOV + TanViewFOV * 2 * (ScissorRectMinAndSize.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw;
float2 UnitPlaneTileMin = (GroupId.xy * TileSize + UnitPlaneMin) * float2(1, -1);
float2 UnitPlaneTileMax = ((GroupId.xy + 1) * TileSize + UnitPlaneMin) * float2(1, -1);
ViewTileMin.xy = min(MinTileZ * UnitPlaneTileMin, MaxTileZ2 * UnitPlaneTileMin);
ViewTileMax.xy = max(MinTileZ * UnitPlaneTileMax, MaxTileZ2 * UnitPlaneTileMax);
ViewTileMin.z = MinTileZ;
ViewTileMax.z = MaxTileZ2;
ViewTileMin2.xy = min(MinTileZ2 * UnitPlaneTileMin, MaxTileZ * UnitPlaneTileMin);
ViewTileMax2.xy = max(MinTileZ2 * UnitPlaneTileMax, MaxTileZ * UnitPlaneTileMax);
ViewTileMin2.z = MinTileZ2;
ViewTileMax2.z = MaxTileZ;
}
else
{
float2 TileSize = 2 * ScissorRectMinAndSize.zw / ((float2)View.ViewSizeAndInvSize.xy * NumGroups);
float2 UnitPlaneMin = -1 + 2 * (ScissorRectMinAndSize.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw;
float2 UnitPlaneTileMin = (GroupId.xy * TileSize + UnitPlaneMin) * float2(1, -1);
float2 UnitPlaneTileMax = ((GroupId.xy + 1) * TileSize + UnitPlaneMin) * float2(1, -1);
{
float MinTileDeviceZ = ConvertToDeviceZ(MinTileZ);
float4 MinDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView);
float4 MinDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView);
float MaxTileDeviceZ = ConvertToDeviceZ(MaxTileZ2);
float4 MaxDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView);
float4 MaxDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView);
ViewTileMin.xy = min(MinDepthMinCorner.xy / MinDepthMinCorner.w, MaxDepthMinCorner.xy / MaxDepthMinCorner.w);
ViewTileMax.xy = max(MinDepthMaxCorner.xy / MinDepthMaxCorner.w, MaxDepthMaxCorner.xy / MaxDepthMaxCorner.w);
ViewTileMin.z = MinTileZ;
ViewTileMax.z = MaxTileZ2;
}
{
float MinTileDeviceZ = ConvertToDeviceZ(MinTileZ2);
float4 MinDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView);
float4 MinDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView);
float MaxTileDeviceZ = ConvertToDeviceZ(MaxTileZ);
float4 MaxDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView);
float4 MaxDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView);
ViewTileMin2.xy = min(MinDepthMinCorner.xy / MinDepthMinCorner.w, MaxDepthMinCorner.xy / MaxDepthMinCorner.w);
ViewTileMax2.xy = max(MinDepthMaxCorner.xy / MinDepthMaxCorner.w, MaxDepthMaxCorner.xy / MaxDepthMaxCorner.w);
ViewTileMin2.z = MinTileZ2;
ViewTileMax2.z = MaxTileZ;
}
}
float3 ViewGroup0Center = (ViewTileMax + ViewTileMin) / 2;
TileCullingData0.TranslatedBoundingSphere.xyz = mul(float4(ViewGroup0Center, 1), View.ViewToTranslatedWorld).xyz;
TileCullingData0.TranslatedBoundingSphere.w = length(ViewGroup0Center - ViewTileMax);
float3 ViewGroup1Center = (ViewTileMax2 + ViewTileMin2) / 2;
TileCullingData1.TranslatedBoundingSphere.xyz = mul(float4(ViewGroup1Center, 1), View.ViewToTranslatedWorld).xyz;
TileCullingData1.TranslatedBoundingSphere.w = length(ViewGroup1Center - ViewTileMax);
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
#if POINT_LIGHT
float3 LightVector0 = LightTranslatedPositionAndInvRadius.xyz - TileCullingData0.TranslatedBoundingSphere.xyz;
float LightVector0Length = length(LightVector0);
float3 LightVector1 = LightTranslatedPositionAndInvRadius.xyz - TileCullingData1.TranslatedBoundingSphere.xyz;
float LightVector1Length = length(LightVector1);
TileCullingData0.ConeAxis = LightVector0 / LightVector0Length;
TileCullingData1.ConeAxis = LightVector1 / LightVector1Length;;
float TanLightAngle0 = LightSourceRadius / LightVector0Length;
float TanLightAngle1 = LightSourceRadius / LightVector1Length;
TileCullingData0.ConeAngleCos = 1.0f / sqrt(1 + TanLightAngle0 * TanLightAngle0);
TileCullingData0.ConeAngleSin = TileCullingData0.ConeAngleCos * TanLightAngle0;
TileCullingData1.ConeAngleCos = 1.0f / sqrt(1 + TanLightAngle1 * TanLightAngle1);
TileCullingData1.ConeAngleSin = TileCullingData1.ConeAngleCos * TanLightAngle1;
// Don't operate on tiles completely outside of the light's influence
bool bTileInLightInfluenceBounds = LightVector0Length < 1.0f / LightTranslatedPositionAndInvRadius.w + TileCullingData0.TranslatedBoundingSphere.w
|| LightVector1Length < 1.0f / LightTranslatedPositionAndInvRadius.w + TileCullingData1.TranslatedBoundingSphere.w;
bTileShouldComputeShadowing = bTileShouldComputeShadowing && bTileInLightInfluenceBounds;
#else
TileCullingData0.ConeAxis = TileCullingData1.ConeAxis = LightDirection;
TileCullingData0.ConeAngleCos = TileCullingData1.ConeAngleCos = cos(LightAngleAndNormalThreshold.x);
TileCullingData0.ConeAngleSin = TileCullingData1.ConeAngleSin = sin(LightAngleAndNormalThreshold.x);
#endif
#endif
GroupIndex = SceneDepth > MaxTileZ2 ? 1 : 0;
}
// Scaled sphere intersection allows capsule shadows to blend together better when penumbras are large, so use for indirect.
// Otherwise an occluder sphere will be extracted from the capsule and used for shadowing.
// This maintains shadow silhouette shapes better but has a discontinuity when the capsule direction is nearly parallel to the light direction.
#define USE_SCALED_SPHERE_INTERSECTION (LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL)
uint CullCapsuleShapesToTile(
uint ThreadIndex,
uint GroupIndex,
float MaxOcclusionDistance,
FTileCullingData TileCullingData0,
FTileCullingData TileCullingData1)
{
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
const float3 ConeAxis0 = TileCullingData0.ConeAxis;
const float ConeAngleCos0 = TileCullingData0.ConeAngleCos;
const float ConeAngleSin0 = TileCullingData0.ConeAngleSin;
const float3 ConeAxis1 = TileCullingData1.ConeAxis;
const float ConeAngleCos1 = TileCullingData1.ConeAngleCos;
const float ConeAngleSin1 = TileCullingData1.ConeAngleSin;
#endif
LOOP
for (uint ShapeIndex = ThreadIndex; ShapeIndex < NumShadowCapsules; ShapeIndex += THREADGROUP_SIZEX * THREADGROUP_SIZEY)
{
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
float3 ConeAxis0;
float LightAngle;
float Unused;
GetLightDirectionData(ShapeIndex, false, ConeAxis0, LightAngle, Unused);
float ConeAngleCos0 = cos(LightAngle);
float ConeAngleSin0 = sin(LightAngle);
float3 ConeAxis1 = ConeAxis0;
float ConeAngleCos1 = ConeAngleCos0;
float ConeAngleSin1 = ConeAngleSin0;
#endif
FCapsuleShape CapsuleShape = ShadowCapsuleShapes[ShapeIndex];
float3 TransformedSphereTranslatedCenter = CapsuleShape.TranslatedCenter;
float TransformedSphereRadius = CapsuleShape.Radius;
float3 TransformedTileTranslatedBoundingSphereCenter0 = TileCullingData0.TranslatedBoundingSphere.xyz;
float3 TransformedTileTranslatedBoundingSphereCenter1 = TileCullingData1.TranslatedBoundingSphere.xyz;
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
float3 TransformedConeAxis0 = ConeAxis0;
float3 TransformedConeAxis1 = ConeAxis1;
#endif
#if USE_SCALED_SPHERE_INTERSECTION
float3 CapsuleSpaceX;
float3 CapsuleSpaceY;
float3 CapsuleSpaceZ = CapsuleShape.Orientation;
GenerateCoordinateSystem(CapsuleSpaceZ, CapsuleSpaceX, CapsuleSpaceY);
// Scale required along the capsule's axis to turn it into a sphere (assuming it was originally a scaled sphere instead of a capsule)
float CapsuleZScale = CapsuleShape.Radius / (.5f * CapsuleShape.Length + CapsuleShape.Radius);
CapsuleSpaceZ *= CapsuleZScale;
// The capsule is centered at 0 in the scaled sphere space
TransformedSphereTranslatedCenter = 0;
// After scaling along the capsule axis it will become a sphere with the original radius
TransformedSphereRadius = CapsuleShape.Radius;
// Transform the sphere center and cone axis into the scaled sphere space
float3 CapsuleCenterToTileCenter0 = TileCullingData0.TranslatedBoundingSphere.xyz - CapsuleShape.TranslatedCenter;
TransformedTileTranslatedBoundingSphereCenter0 = float3(dot(CapsuleCenterToTileCenter0, CapsuleSpaceX), dot(CapsuleCenterToTileCenter0, CapsuleSpaceY), dot(CapsuleCenterToTileCenter0, CapsuleSpaceZ));
float3 CapsuleCenterToTileCenter1 = TileCullingData1.TranslatedBoundingSphere.xyz - CapsuleShape.TranslatedCenter;
TransformedTileTranslatedBoundingSphereCenter1 = float3(dot(CapsuleCenterToTileCenter1, CapsuleSpaceX), dot(CapsuleCenterToTileCenter1, CapsuleSpaceY), dot(CapsuleCenterToTileCenter1, CapsuleSpaceZ));
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
// Renormalize the cone axis as it went through a non-uniformly scaled transform
TransformedConeAxis0 = normalize(float3(dot(ConeAxis0, CapsuleSpaceX), dot(ConeAxis0, CapsuleSpaceY), dot(ConeAxis0, CapsuleSpaceZ)));
TransformedConeAxis1 = normalize(float3(dot(ConeAxis1, CapsuleSpaceX), dot(ConeAxis1, CapsuleSpaceY), dot(ConeAxis1, CapsuleSpaceZ)));
#endif
#else
// Add half capsule length to bounding sphere
TransformedSphereRadius = CapsuleShape.Radius + .5f * CapsuleShape.Length;
#endif
BRANCH
if (SphereIntersectSphere(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + MaxOcclusionDistance), float4(TransformedTileTranslatedBoundingSphereCenter0, TileCullingData0.TranslatedBoundingSphere.w))
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
&& SphereIntersectConeWithMaxDistance(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + TileCullingData0.TranslatedBoundingSphere.w), TransformedTileTranslatedBoundingSphereCenter0, TransformedConeAxis0, ConeAngleCos0, ConeAngleSin0, MaxOcclusionDistance)
#endif
)
{
uint ListIndex;
InterlockedAdd(TileNumCapsules0, 1U, ListIndex);
// Don't overwrite on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_SHAPES - 1));
IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 0 + ListIndex] = ShapeIndex;
}
BRANCH
if (SphereIntersectSphere(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + MaxOcclusionDistance), float4(TransformedTileTranslatedBoundingSphereCenter1, TileCullingData1.TranslatedBoundingSphere.w))
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
&& SphereIntersectConeWithMaxDistance(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + TileCullingData1.TranslatedBoundingSphere.w), TransformedTileTranslatedBoundingSphereCenter1, TransformedConeAxis1, ConeAngleCos1, ConeAngleSin1, MaxOcclusionDistance)
#endif
)
{
uint ListIndex;
InterlockedAdd(TileNumCapsules1, 1U, ListIndex);
// Don't write out of bounds on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_SHAPES - 1));
IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 1 + ListIndex] = ShapeIndex;
}
}
GroupMemoryBarrierWithGroupSync();
return min(GroupIndex == 0 ? TileNumCapsules0 : TileNumCapsules1, (uint)MAX_INTERSECTING_SHAPES);
}
// Approximate the area of intersection of two spherical caps, from 'Ambient Aperture Lighting'
// fRadius0 : First caps radius (arc length in radians)
// fRadius1 : Second caps radius (in radians)
// fDist : Distance between caps (radians between centers of caps)
float SphericalCapIntersectionAreaFast(float fRadius0, float fRadius1, float fDist)
{
float fArea;
if ( fDist <= max(fRadius0, fRadius1) - min(fRadius0, fRadius1) )
{
// One cap is completely inside the other
fArea = 6.283185308f - 6.283185308f * cos( min(fRadius0,fRadius1) );
}
else if ( fDist >= fRadius0 + fRadius1 )
{
// No intersection exists
fArea = 0;
}
else
{
float fDiff = abs(fRadius0 - fRadius1);
fArea = smoothstep(0.0f,
1.0f,
1.0f - saturate((fDist-fDiff)/(fRadius0+fRadius1-fDiff)));
fArea *= 6.283185308f - 6.283185308f * cos( min(fRadius0,fRadius1) );
}
return fArea;
}
// CosFadeStartAngle in x, 1 / (1 - CosFadeStartAngle) in y
float2 CosFadeStartAngle;
void ApplyFadeToCapsuleRadius(inout float CapsuleRadius, float3 LightDirection)
{
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
// Fade out when nearly vertical up due to self shadowing artifacts
float ShapeFadeAlpha = 1 - saturate(2 * (-LightDirection.z - CosFadeStartAngle.x) * CosFadeStartAngle.y);
CapsuleRadius *= ShapeFadeAlpha;
#endif
}
float ShadowConeTraceAgainstCulledCapsuleShapes(
float3 TranslatedWorldRayStart,
float3 UnitRayDirection,
float LightVectorLength,
float LightAngle,
float InvMaxOcclusionDistance,
uint CulledDataParameter,
uint NumIntersectingCapsules,
uniform bool bUseCulling)
{
float ConeVisibility = 1;
float AreaOfLight = 6.283185308f - 6.283185308f * cos(LightAngle);
LOOP
for (uint TileCulledObjectIndex = 0; TileCulledObjectIndex < NumIntersectingCapsules; TileCulledObjectIndex++)
{
uint ObjectIndex;
if (bUseCulling)
{
uint GroupIndex = CulledDataParameter;
ObjectIndex = IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * GroupIndex + TileCulledObjectIndex];
}
else
{
ObjectIndex = TileCulledObjectIndex;
}
float MinVisibility = 0.0f;
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
GetLightDirectionData(ObjectIndex, false, UnitRayDirection, LightAngle, MinVisibility);
AreaOfLight = 6.283185308f - 6.283185308f * cos(LightAngle);
#endif
#define OVERRIDE_LIGHT_DEBUG 0
#if OVERRIDE_LIGHT_DEBUG
//UnitRayDirection = normalize(float3(.2f, .2f, .8f));
UnitRayDirection = float3(0, 0, 1);
LightAngle = .3f;
#endif
FCapsuleShape CapsuleShape = ShadowCapsuleShapes[ObjectIndex];
ApplyFadeToCapsuleRadius(CapsuleShape.Radius, UnitRayDirection);
float DistanceToShadowSphere;
float3 UnitVectorToShadowSphere;
float3 UnitRayDirectionInCorrectSpace = UnitRayDirection;
BRANCH
if (CapsuleShape.Length > 0)
{
#if USE_SCALED_SPHERE_INTERSECTION
float3 CapsuleSpaceX;
float3 CapsuleSpaceY;
float3 CapsuleSpaceZ = CapsuleShape.Orientation;
GenerateCoordinateSystem(CapsuleSpaceZ, CapsuleSpaceX, CapsuleSpaceY);
float CapsuleZScale = CapsuleShape.Radius / (.5f * CapsuleShape.Length + CapsuleShape.Radius);
CapsuleSpaceZ *= CapsuleZScale;
float3 CapsuleCenterToRayStart = TranslatedWorldRayStart - CapsuleShape.TranslatedCenter;
float3 CapsuleSpaceRayStart = float3(dot(CapsuleCenterToRayStart, CapsuleSpaceX), dot(CapsuleCenterToRayStart, CapsuleSpaceY), dot(CapsuleCenterToRayStart, CapsuleSpaceZ));
float3 CapsuleSpaceRayDirection = float3(dot(UnitRayDirection, CapsuleSpaceX), dot(UnitRayDirection, CapsuleSpaceY), dot(UnitRayDirection, CapsuleSpaceZ));
DistanceToShadowSphere = length(CapsuleSpaceRayStart);
UnitVectorToShadowSphere = -CapsuleSpaceRayStart / DistanceToShadowSphere;
UnitRayDirectionInCorrectSpace = normalize(CapsuleSpaceRayDirection);
#else
float3 VectorToCapsuleCenter = CapsuleShape.TranslatedCenter - TranslatedWorldRayStart;
// Closest point on line segment to ray
float3 L01 = CapsuleShape.Orientation * CapsuleShape.Length;
float3 L0 = VectorToCapsuleCenter - 0.5 * L01;
float3 L1 = VectorToCapsuleCenter + 0.5 * L01;
// The below is computing the shortest distance between capsule line segment and ray
float CapsuleOrientationProjectedOntoRay = dot(UnitRayDirection, L01);
// Vector that spans L01 perpendicular to the ray
float3 PerpendicularSpanningVector = CapsuleOrientationProjectedOntoRay * UnitRayDirection - L01;
// Length of PerpendicularSpanningVector using the right triangle formed by L01 and UnitRayDirection * CapsuleOrientationProjectedOntoRay
float PerpendicularDistance = Square(CapsuleShape.Length) - CapsuleOrientationProjectedOntoRay * CapsuleOrientationProjectedOntoRay;
// Project the vector to a capsule endpoint onto the perpendicular spanning vector, normalized
float t = saturate(dot(L0, PerpendicularSpanningVector) / PerpendicularDistance);
// Compute the vector to the shadow sphere which best approximates the capsule's shadowing
float3 VectorToShadowSphere = L0 + t * L01;
DistanceToShadowSphere = length(VectorToShadowSphere);
UnitVectorToShadowSphere = VectorToShadowSphere / DistanceToShadowSphere;
// The above 'best shadow sphere' calculation doesn't take into account the projected solid angle of the potential shadow spheres
// As a result, there's a discontinuity when the capsule and the ray point in nearly the same direction, where the far end of the capsule gets chosen
// Here we mitigate the effect by overriding the distance to shadow sphere if one of the capsule end points was closer
DistanceToShadowSphere = min(DistanceToShadowSphere, length(L0));
DistanceToShadowSphere = min(DistanceToShadowSphere, length(L1));
#endif
}
else
{
DistanceToShadowSphere = length(CapsuleShape.TranslatedCenter - TranslatedWorldRayStart);
UnitVectorToShadowSphere = (CapsuleShape.TranslatedCenter - TranslatedWorldRayStart) / DistanceToShadowSphere;
}
float AngleBetween = acosFast(dot(UnitVectorToShadowSphere, UnitRayDirectionInCorrectSpace));
float IntersectionArea = SphericalCapIntersectionAreaFast(LightAngle, atanFastPos(CapsuleShape.Radius / DistanceToShadowSphere), AngleBetween);
#if POINT_LIGHT
// SphericalCapIntersectionAreaFast does not take the relative distance of the two sphere caps into account, which can cause shadows to be cast on the opposite direction
// Here we compare DistanceToShadowSphere and LightVectorLength to determine whether a shadow should be cast
// To prevent discontinuity, we use the ratio of the distance difference to the capsule's radius as a smooth factor
IntersectionArea = lerp(IntersectionArea, 0, saturate((DistanceToShadowSphere - LightVectorLength + CapsuleShape.Radius) / CapsuleShape.Radius));
#endif
float ConeConeIntersection = 1 - saturate(IntersectionArea / AreaOfLight);
float DistanceFadeAlpha = saturate(DistanceToShadowSphere * InvMaxOcclusionDistance * 3 - 2);
ConeConeIntersection = lerp(ConeConeIntersection, 1, DistanceFadeAlpha);
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL
// Apply to indirect shadows only
ConeConeIntersection = lerp(MinVisibility, 1, ConeConeIntersection);
#endif
ConeVisibility *= ConeConeIntersection;
}
return ConeVisibility;
}
// Whether to actually sample the distance field while doing tile culling
#define USE_DISTANCE_FIELD_FOR_TILE_CULLING 1
bool TileBoundsIntersectDistanceFieldCaster(
uint ObjectIndex,
FDFObjectBounds DFObjectBounds,
float4 TileTranslatedBoundingSphere,
float3 TileConeAxis,
float TileConeAngleCos,
float TileConeAngleSin,
float MaxOcclusionDistance)
{
const float3 TranslatedBoundsCenter = DFFastToTranslatedWorld(DFObjectBounds.Center, PrimaryView.PreViewTranslation);
BRANCH
if (SphereIntersectSphere(float4(TranslatedBoundsCenter, DFObjectBounds.SphereRadius + MaxOcclusionDistance), TileTranslatedBoundingSphere)
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
// This 'can object cast on tile bounds' test has to be disabled because the weight used to combine occlusion from multiple objects is computed based on distance to occluder only
//&& SphereIntersectConeWithMaxDistance(float4(SphereCenterAndRadius.xyz, SphereCenterAndRadius.w + TileBoundingSphere.w), TileBoundingSphere.xyz, TileConeAxis, TileConeAngleCos, TileConeAngleSin, MaxOcclusionDistance)
#endif
)
{
#if USE_DISTANCE_FIELD_FOR_TILE_CULLING
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
float4x4 TranslatedWorldToVolume = DFFastToTranslatedWorld(DFObjectData.WorldToVolume, PrimaryView.PreViewTranslation);
float3 VolumeSamplePosition = mul(float4(TileTranslatedBoundingSphere.xyz, 1), TranslatedWorldToVolume).xyz;
float3 ClampedSamplePosition = clamp(VolumeSamplePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent);
float DistanceToClamped = length(ClampedSamplePosition - VolumeSamplePosition);
float WorldDistanceToOccluder = (DistanceToMeshSurfaceStandalone(ClampedSamplePosition, DFObjectData) + DistanceToClamped) * DFObjectData.VolumeScale;
float ErrorTolerance = 1.1f;
// The tile can only be affected by the object's shadow if the closest part of the object is less than MaxOcclusionDistance from the tile bounds
return WorldDistanceToOccluder - TileTranslatedBoundingSphere.w < MaxOcclusionDistance * ErrorTolerance;
#else
return true;
#endif
}
return false;
}
uint CullDistanceFieldCastersToTile(
uint ThreadIndex,
uint GroupIndex,
float MaxOcclusionDistance,
FTileCullingData TileCullingData0,
FTileCullingData TileCullingData1)
{
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
float3 ConeAxis0 = TileCullingData0.ConeAxis;
float ConeAngleCos0 = TileCullingData0.ConeAngleCos;
float ConeAngleSin0 = TileCullingData0.ConeAngleSin;
float3 ConeAxis1 = TileCullingData1.ConeAxis;
float ConeAngleCos1 = TileCullingData1.ConeAngleCos;
float ConeAngleSin1 = TileCullingData1.ConeAngleSin;
#else
float3 ConeAxis0 = 0;
float ConeAngleCos0 = 0;
float ConeAngleSin0 = 0;
float3 ConeAxis1 = 0;
float ConeAngleCos1 = 0;
float ConeAngleSin1 = 0;
#endif
LOOP
for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumMeshDistanceFieldCasters; ListObjectIndex += THREADGROUP_SIZEX * THREADGROUP_SIZEY)
{
uint ObjectIndex = MeshDistanceFieldCasterIndices[ListObjectIndex];
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
float LightAngle;
float Unused;
GetLightDirectionData(ListObjectIndex, true, ConeAxis0, LightAngle, Unused);
ConeAngleCos0 = cos(LightAngle);
ConeAngleSin0 = sin(LightAngle);
ConeAxis1 = ConeAxis0;
ConeAngleCos1 = ConeAngleCos0;
ConeAngleSin1 = ConeAngleSin0;
#endif
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
float EffectiveMaxOcclusionDistance = MaxOcclusionDistance + .5f * DFObjectBounds.SphereRadius;
BRANCH
if (TileBoundsIntersectDistanceFieldCaster(ObjectIndex, DFObjectBounds, TileCullingData0.TranslatedBoundingSphere, ConeAxis0, ConeAngleCos0, ConeAngleSin0, EffectiveMaxOcclusionDistance))
{
uint ListIndex;
InterlockedAdd(TileNumDistanceFields0, 1U, ListIndex);
// Don't overwrite on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_DISTANCE_FIELDS - 1));
IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 0 + ListIndex] = ListObjectIndex;
}
BRANCH
if (TileBoundsIntersectDistanceFieldCaster(ObjectIndex, DFObjectBounds, TileCullingData1.TranslatedBoundingSphere, ConeAxis1, ConeAngleCos1, ConeAngleSin1, EffectiveMaxOcclusionDistance))
{
uint ListIndex;
InterlockedAdd(TileNumDistanceFields1, 1U, ListIndex);
// Don't write out of bounds on overflow
ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_DISTANCE_FIELDS - 1));
IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 1 + ListIndex] = ListObjectIndex;
}
}
GroupMemoryBarrierWithGroupSync();
return min(GroupIndex == 0 ? TileNumDistanceFields0 : TileNumDistanceFields1, (uint)MAX_INTERSECTING_DISTANCE_FIELDS);
}
float ShadowConeTraceAgainstCulledDistanceFieldCasters(
float3 TranslatedWorldRayStart,
float3 UnitRayDirection,
float LightAngle,
float MaxOcclusionDistance,
uint CulledDataParameter,
uint NumIntersectingCasters,
uniform bool bUseCulling,
inout uint NumTraceSteps)
{
float TanConeAngle = tan(LightAngle);
float GeometricMeanNumerator = 0;
float TotalWeight = 0;
LOOP
for (uint TileCulledObjectIndex = 0; TileCulledObjectIndex < NumIntersectingCasters; TileCulledObjectIndex++)
{
uint ListObjectIndex = TileCulledObjectIndex;
if (bUseCulling)
{
uint GroupIndex = CulledDataParameter;
ListObjectIndex = IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * GroupIndex + TileCulledObjectIndex];
}
float MinVisibility = 0.0f;
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE
GetLightDirectionData(ListObjectIndex, true, UnitRayDirection, LightAngle, MinVisibility);
TanConeAngle = tan(LightAngle);
#endif
uint ObjectIndex = MeshDistanceFieldCasterIndices[ListObjectIndex];
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
float4x4 TranslatedWorldToVolume = DFFastToTranslatedWorld(DFObjectData.WorldToVolume, PrimaryView.PreViewTranslation);
// Increase max occlusion distance based on object size for distance field casters
// This improves the solidness of the shadows, since the fadeout distance causes internal structure of objects to become visible
float EffectiveMaxOcclusionDistance = MaxOcclusionDistance + .5f * DFObjectBounds.SphereRadius;
float MaxSphereRadius = TanConeAngle * EffectiveMaxOcclusionDistance;
float3 TranslatedWorldRayEnd = TranslatedWorldRayStart + UnitRayDirection * EffectiveMaxOcclusionDistance;
float3 VolumeRayStart = mul(float4(TranslatedWorldRayStart, 1), TranslatedWorldToVolume).xyz;
float3 VolumeRayEnd = mul(float4(TranslatedWorldRayEnd, 1), TranslatedWorldToVolume).xyz;
float3 VolumeRayDirection = VolumeRayEnd - VolumeRayStart;
float VolumeRayLength = length(VolumeRayDirection);
VolumeRayDirection /= VolumeRayLength;
float VolumeMaxSphereRadius = MaxSphereRadius / DFObjectData.VolumeScale;
{
FDFAssetData DFAssetData = LoadDFAssetDataHighestResolution(DFObjectData.AssetIndex);
const float MaxEncodedDistance = DFAssetData.DistanceFieldToVolumeScaleBias.x + DFAssetData.DistanceFieldToVolumeScaleBias.y;
// Prevent incorrect shadowing when sampling invalid bricks by limiting VolumeMaxSphereRadius to MaxEncodedDistance
VolumeMaxSphereRadius = min(VolumeMaxSphereRadius, MaxEncodedDistance);
float MinTraceVisibility = 1;
//@todo - derive from texel size
float StartOffset = .02f;
uint MaxSteps = 32;
float MinStepSize = 1.0f / (4 * MaxSteps);
// How much to artificially slow down the stepping proportional to cone occlusion
// Reduces artifacts when steps are large (far from the surface) yet heavily occluded because the cone angle is large
float FullVisibilityMaxStepFraction = .1f;
float OcclusionExponent = .8f;
float SampleRayTime = StartOffset;
uint StepIndex = 0;
LOOP
for (; StepIndex < MaxSteps; StepIndex++)
{
float3 SampleVolumePosition = VolumeRayStart + VolumeRayDirection * SampleRayTime;
float3 ClampedSamplePosition = clamp(SampleVolumePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent);
float DistanceToClamped = length(ClampedSamplePosition - SampleVolumePosition);
float VolumeDistanceToOccluder = SampleSparseMeshSignedDistanceField(ClampedSamplePosition, DFAssetData) + DistanceToClamped;
float SphereRadius = clamp(TanConeAngle * SampleRayTime, 0, VolumeMaxSphereRadius);
float StepVisibility = pow(saturate(VolumeDistanceToOccluder / SphereRadius), OcclusionExponent);
float OccluderDistanceFraction = (SampleRayTime + VolumeDistanceToOccluder) * DFObjectData.VolumeScale / EffectiveMaxOcclusionDistance;
// Fade out occlusion based on distance to occluder to avoid a discontinuity at the max AO distance
//@todo - this introduces banding artifacts because we may be taking large steps through the fade region
StepVisibility = max(StepVisibility, saturate(OccluderDistanceFraction));
MinTraceVisibility = min(MinTraceVisibility, StepVisibility);
float StepDistance = min(VolumeDistanceToOccluder, StepVisibility * FullVisibilityMaxStepFraction * VolumeRayLength);
StepDistance = max(StepDistance, MinStepSize);
SampleRayTime += StepDistance;
NumTraceSteps++;
// Terminate the trace if we reached a negative area or went past the end of the ray
if (VolumeDistanceToOccluder <= 0
|| SampleRayTime > VolumeRayLength)
{
break;
}
}
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL
// Attempt to match the effect of MinVisibility on capsule shadows, which combine with multiply so a MinVisibility of .1 does not actually achieve values >= .1
MinVisibility *= MinVisibility;
// Apply to indirect shadows only
MinTraceVisibility = lerp(MinVisibility, 1, MinTraceVisibility);
#endif
float WeightDistance = EffectiveMaxOcclusionDistance;
{
float3 SampleVolumePosition = VolumeRayStart;
float3 ClampedSamplePosition = clamp(SampleVolumePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent);
float DistanceToClamped = length(ClampedSamplePosition - SampleVolumePosition);
float VolumeDistanceToOccluder = SampleSparseMeshSignedDistanceField(ClampedSamplePosition, DFAssetData) + DistanceToClamped;
WeightDistance = VolumeDistanceToOccluder * DFObjectData.VolumeScale;
//WeightDistance = length(SphereCenterAndRadius.xyz - WorldRayStart);
}
float Weight = 1 - saturate(WeightDistance / EffectiveMaxOcclusionDistance);
Weight = pow(Weight, 4);
// Weighted geometric mean to combine shadows from multiple casters without over-darkening like a simple multiply would do
GeometricMeanNumerator += Weight * log2(MinTraceVisibility);
TotalWeight += Weight;
}
}
float ConeVisibility = 1;
if (TotalWeight > 0)
{
ConeVisibility = exp2(GeometricMeanNumerator / TotalWeight);
}
return ConeVisibility;
}
void ApplySelfShadowingIntensityForDeferred(float2 ScreenUV, inout float Visibility, float2 SVPos)
{
BRANCH
if (View.IndirectCapsuleSelfShadowingIntensity < 1)
{
#if SUBTRATE_GBUFFER_FORMAT==1
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SVPos.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
const bool bHasDynamicIndirectShadowCasterRepresentation = SubstratePixelHeader.HasDynamicIndirectShadowCasterRepresentation();
#else
#if SHADING_PATH_MOBILE
FGBufferData GBufferData = MobileFetchAndDecodeGBuffer(ScreenUV, SVPos);;
#else
FGBufferData GBufferData = GetGBufferData(ScreenUV);
#endif
const bool bHasDynamicIndirectShadowCasterRepresentation = HasDynamicIndirectShadowCasterRepresentation(GBufferData);
#endif
// Reduce self shadowing intensity
Visibility = lerp(1, Visibility, bHasDynamicIndirectShadowCasterRepresentation ? View.IndirectCapsuleSelfShadowingIntensity : 1);
}
}
#if APPLY_TO_BENT_NORMAL
Texture2D ReceiverBentNormalTexture;
RWTexture2D<float4> RWBentNormalTexture;
#endif
uint EyeIndex;
float IndirectCapsuleSelfShadowingIntensity;
uint DownsampleFactor;
RWTexture2D<float2> RWShadowFactors;
float MaxOcclusionDistance;
uint2 TileDimensions;
RWStructuredBuffer<uint> RWTileIntersectionCounts;
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void CapsuleShadowingCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
float2 ScreenUV = float2((DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy + .5f) * View.BufferSizeAndInvSize.zw);
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
// Mobile does not support bent normals
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_RECEIVER && !SHADING_PATH_MOBILE
float4 ReceiverTextureValue = ReceiverBentNormalTexture.Load(DispatchThreadId.xyz);
float3 ReceiverBentNormal = ReceiverTextureValue.xyz;
float SceneDepth = ReceiverTextureValue.w;
#else
#if SHADING_PATH_MOBILE
float SceneDepth = CalcSceneDepth(ScreenUV, EyeIndex);
#else
float SceneDepth = CalcSceneDepth(ScreenUV);
#endif
#endif
const float3 OpaqueTranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, SceneDepth), SceneDepth, 1), PrimaryView.ScreenToTranslatedWorld).xyz;
uint CulledDataParameter = 0;
bool bTileShouldComputeShadowing = true;
FTileCullingData TileCullingData0;
FTileCullingData TileCullingData1;
uint NumPixelIntersectingShapes = 0;
uint NumTileIntersectingShapes = 0;
uint NumDistanceFieldSteps = 0;
// So we can skip skybox pixels / tiles without having to check the GBuffer for shading model
float MaxDepth = 20000;
#define USE_CULLING 1
#if USE_CULLING
SetupTileCullingData(SceneDepth, MaxDepth, ThreadIndex, GroupId.xy, TileCullingData0, TileCullingData1, bTileShouldComputeShadowing, CulledDataParameter);
#endif // USE_CULLING
float Visibility = 1;
BRANCH
if (bTileShouldComputeShadowing)
{
// World space offset along the start of the ray to avoid incorrect self-shadowing
float RayStartOffset = 0;
float LightVectorLength = 0;
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL
#if POINT_LIGHT
float3 LightVector = LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition;
LightVectorLength = length(LightVector);
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightVector / LightVectorLength * RayStartOffset;
float3 UnitRayDirection = (LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition) / LightVectorLength;
float LightAngle = atanFastPos(LightSourceRadius / LightVectorLength);
#else
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightDirection * RayStartOffset;
float3 UnitRayDirection = LightDirection;
float LightAngle = LightAngleAndNormalThreshold.x;
#endif
#elif LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_RECEIVER && !SHADING_PATH_MOBILE
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition;
float BentNormalLength = length(ReceiverBentNormal);
float3 UnitRayDirection = ReceiverBentNormal / max(BentNormalLength, .00001f);
float LightAngle = max(BentNormalLength * .5f * PI, PI / 8);
#else
float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition;
float3 UnitRayDirection = 0;
float LightAngle = 0;
#endif
uint NumIntersectingCapsules = NumShadowCapsules;
uint NumIntersectingDistanceFieldCasters = NumMeshDistanceFieldCasters;
#if USE_CULLING
#if SUPPORT_CAPSULE_SHAPES
NumIntersectingCapsules = CullCapsuleShapesToTile(
ThreadIndex,
CulledDataParameter,
MaxOcclusionDistance,
TileCullingData0,
TileCullingData1);
NumTileIntersectingShapes += TileNumCapsules0 + TileNumCapsules1;
#endif
#if SUPPORT_MESH_DISTANCE_FIELDS
NumIntersectingDistanceFieldCasters = CullDistanceFieldCastersToTile(
ThreadIndex,
CulledDataParameter,
MaxOcclusionDistance,
TileCullingData0,
TileCullingData1);
NumTileIntersectingShapes += TileNumDistanceFields0 + TileNumDistanceFields1;
#endif
#else
NumTileIntersectingShapes = NumShadowCapsules + NumMeshDistanceFieldCasters;
#endif
NumPixelIntersectingShapes += NumIntersectingCapsules + NumIntersectingDistanceFieldCasters;
#if SUPPORT_CAPSULE_SHAPES
Visibility *= ShadowConeTraceAgainstCulledCapsuleShapes(
TranslatedWorldRayStart,
UnitRayDirection,
LightVectorLength,
LightAngle,
1.0f / MaxOcclusionDistance,
CulledDataParameter,
NumIntersectingCapsules,
USE_CULLING ? true : false);
#endif
#if SUPPORT_MESH_DISTANCE_FIELDS
Visibility *= ShadowConeTraceAgainstCulledDistanceFieldCasters(
TranslatedWorldRayStart,
UnitRayDirection,
LightAngle,
MaxOcclusionDistance,
CulledDataParameter,
NumIntersectingDistanceFieldCasters,
USE_CULLING ? true : false,
NumDistanceFieldSteps);
#endif
#if !APPLY_TO_BENT_NORMAL
if (all(GroupThreadId.xy == 0) && all(GroupId.xy < TileDimensions))
{
RWTileIntersectionCounts[GroupId.y * TileDimensions.x + GroupId.x] = NumTileIntersectingShapes;
}
#endif
}
//Visibility = NumDistanceFieldSteps / 20.0f;
//Visibility = NumPixelIntersectingShapes / 20.0f;
//Visibility = bTileShouldComputeShadowing ? 1 : 0;
#if APPLY_TO_BENT_NORMAL
#if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER
float3 ReceiverBentNormal = ReceiverBentNormalTexture.Load(DispatchThreadId.xyz).xyz;
#endif
#if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE && !FORWARD_SHADING
// The third param of this function is used only by mobile paths which do not support bent normals.
ApplySelfShadowingIntensityForDeferred(ScreenUV, Visibility, float2(0,0));
#endif
#if METAL_ES3_1_PROFILE
// clamp max depth to avoid #inf
SceneDepth = min(SceneDepth, 65500.0f);
#endif
RWBentNormalTexture[DispatchThreadId.xy] = float4(ReceiverBentNormal * Visibility, SceneDepth);
#else
RWShadowFactors[DispatchThreadId.xy] = float2(Visibility, SceneDepth);
#endif
}
StructuredBuffer<uint> TileIntersectionCounts;
// Size of a tile in NDC
float2 TileSize;
#ifndef TILES_PER_INSTANCE
#define TILES_PER_INSTANCE 1
#endif
void CapsuleShadowingUpsampleVS(
float2 TexCoord : ATTRIBUTE0,
uint VertexId : SV_VertexID,
uint InstanceId : SV_InstanceID,
out float4 OutPosition : SV_POSITION
// This is a hack to target a different slice of the RT target array in mobile multi view fallback
// because the D3D RHI does not allow to set slice 1 of a texture2darray as a render target
// and the capsule shadows shaders are run view-by-view.
// Mobile multi view on vulkan does not need this hack.
#if MOBILE_MULTI_VIEW_FALLBACK
, out uint LayerIndex : SV_RenderTargetArrayIndex
#endif
)
{
#if MOBILE_MULTI_VIEW_FALLBACK
LayerIndex = EyeIndex;
#endif
// Compute the actual instance id for when multiple tiles are packed into the vertex buffer
uint EffectiveInstanceId = InstanceId * TILES_PER_INSTANCE + VertexId / 4;
uint NumCapsulesAffectingTile = TileIntersectionCounts[EffectiveInstanceId];
uint TileY = InstanceId / TileDimensions.x;
uint2 TileCoordinate = uint2(EffectiveInstanceId - TileY * TileDimensions.x, TileY);
float2 ScreenUV = ((TileCoordinate + TexCoord) * TileSize + ScissorRectMinAndSize.xy) * View.BufferSizeAndInvSize.zw;
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
OutPosition = float4(ScreenPosition, 0, 1);
// Cull the tile if no affecting capsules, shadow will not be visible
if (NumCapsulesAffectingTile == 0)
{
OutPosition.xy = 0;
}
}
#ifdef UPSAMPLE_PASS
Texture2D ShadowFactorsTexture;
SamplerState ShadowFactorsSampler;
float2 ShadowFactorsUVBilinearMax;
float OutputtingToLightAttenuation;
void CapsuleShadowingUpsamplePS(
in float4 SVPos : SV_POSITION,
out float4 OutColor : SV_Target0
#if APPLY_TO_SSAO
,out float4 OutAmbientOcclusion : SV_Target1
#endif
)
{
const float2 ScreenUV = SvPositionToBufferUV(SVPos);
float Output;
float SceneDepth;
UpsampleShadowFactors(SVPos, ScissorRectMinAndSize, 1.0f / DOWNSAMPLE_FACTOR, 0, POSITIVE_INFINITY, ShadowFactorsTexture, ShadowFactorsSampler, ShadowFactorsUVBilinearMax, Output, SceneDepth, EyeIndex);
if (OutputtingToLightAttenuation > 0)
{
OutColor = EncodeLightAttenuation(Output).xxxx;
}
else
{
#if !FORWARD_SHADING
// Self-shadowing determination is binary so apply at full res where possible
ApplySelfShadowingIntensityForDeferred(ScreenUV, Output, SVPos.xy);
#endif
OutColor = Output;
}
#if APPLY_TO_SSAO
OutAmbientOcclusion = Output;
#endif
}
#endif // UPSAMPLE_PASS