// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= CapsuleShadowShaders.usf: Tiled deferred culling and shadowing from capsule shapes =============================================================================*/ #include "Common.ush" #include "DeferredShadingCommon.ush" #include "FastMath.ush" #include "DistanceFieldLightingShared.ush" #include "SHCommon.ush" #include "VolumetricLightmapShared.ush" #include "ReflectionEnvironmentShared.ush" #include "IntersectionUtils.ush" #include "Substrate/Substrate.ush" #ifdef UPSAMPLE_PASS # include "ShadowFactorsUpsampleCommon.ush" #endif #ifndef THREADGROUP_SIZEX # define THREADGROUP_SIZEX 1 #endif #ifndef THREADGROUP_SIZEY # define THREADGROUP_SIZEY 1 #endif #ifndef LIGHT_SOURCE_MODE #define LIGHT_SOURCE_MODE 0 #endif // must match CapsuleShadowRendering.cpp #define LIGHT_SOURCE_PUNCTUAL 0 #define LIGHT_SOURCE_FROM_CAPSULE 1 #define LIGHT_SOURCE_FROM_RECEIVER 2 struct FCapsuleShape { float3 TranslatedCenter; float Radius; float3 Orientation; float Length; }; // SUPPORT_CAPSULE_SHAPES /** Number of capsules affecting the tile, after culling. */ groupshared uint TileNumCapsules0; groupshared uint TileNumCapsules1; #define MAX_INTERSECTING_SHAPES 512 groupshared uint IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 2]; uint NumShadowCapsules; StructuredBuffer ShadowCapsuleShapes; // SUPPORT_MESH_DISTANCE_FIELDS /** Number of distance fields affecting the tile, after culling. */ groupshared uint TileNumDistanceFields0; groupshared uint TileNumDistanceFields1; #define MAX_INTERSECTING_DISTANCE_FIELDS 64 groupshared uint IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 2]; uint NumMeshDistanceFieldCasters; StructuredBuffer MeshDistanceFieldCasterIndices; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE StructuredBuffer LightDirectionData; void DecodeLightDirectionW(float LightDirectionW, out float LightAngle, out float MinVisibility) { uint WInt = asuint(LightDirectionW); LightAngle = f16tof32(WInt); MinVisibility = f16tof32(WInt >> 16); } float EncodeLightDirectionW(float LightAngle, float MinVisibility) { return asfloat(f32tof16(LightAngle) | f32tof16(MinVisibility) << 16); } void GetLightDirectionData(uint ShapeIndex, bool bDistanceFieldCaster, out float3 LightDirection, out float LightAngle, out float MinVisibility) { // Light data for distance field casters is placed after light data for capsules in SetupIndirectCapsuleShadows uint BaseLightDataIndex = bDistanceFieldCaster ? NumShadowCapsules : 0; float4 VectorValue = LightDirectionData[ShapeIndex + BaseLightDataIndex]; LightDirection = VectorValue.xyz; DecodeLightDirectionW(VectorValue.w, LightAngle, MinVisibility); } uint SkyLightMode; float CapsuleIndirectConeAngle; float CapsuleSkyAngleScale; float CapsuleMinSkyAngle; uint NumLightDirectionData; RWStructuredBuffer RWComputedLightDirectionData; [numthreads(THREADGROUP_SIZEX, 1, 1)] void ComputeLightDirectionFromVolumetricLightmapCS( uint3 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.x < NumLightDirectionData) { float4 LightData = LightDirectionData[DispatchThreadId.x]; float3 ObjectPosition = LightData.xyz; float3 BrickTextureUVs = ComputeVolumetricLightmapBrickTextureUVs(ObjectPosition); float3 LightDirection; float LightAngle; if (SkyLightMode == 2) { FTwoBandSHVectorRGB SkyIrradianceSH; // See ComputeSkyEnvMapDiffuseIrradianceCS for the coefficient and sign adaptation. N need to rescale the coefficients, direction is preserve. SkyIrradianceSH.R.V = SkyIrradianceEnvironmentMap[0].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f); SkyIrradianceSH.G.V = SkyIrradianceEnvironmentMap[1].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f); SkyIrradianceSH.B.V = SkyIrradianceEnvironmentMap[2].wyzx * float4(1.0f, -1.0f, 1.0f, -1.0f); LightDirection = GetMaximumDirection(GetLuminance(SkyIrradianceSH)); LightAngle = CapsuleIndirectConeAngle; } else if (SkyLightMode == 1) { // Stationary sky light shadowing float3 SkyBentNormal = GetVolumetricLightmapSkyBentNormal(BrickTextureUVs); float SkyBentNormalLength = length(SkyBentNormal); float ConeAngle = max(SkyBentNormalLength * CapsuleSkyAngleScale * .5f * PI, CapsuleMinSkyAngle * PI / 180.0f); LightDirection = SkyBentNormal / max(SkyBentNormalLength, .0001f); LightAngle = ConeAngle; } else { FTwoBandSHVectorRGB IrradianceSH = GetVolumetricLightmapSH2(BrickTextureUVs); LightDirection = GetMaximumDirection(GetLuminance(IrradianceSH)); LightAngle = CapsuleIndirectConeAngle; } if (dot(LightDirection, LightDirection) < .1f) { LightDirection = float3(0, 0, 1); } float Unused; float MinVisibility; DecodeLightDirectionW(LightData.w, Unused, MinVisibility); float4 PackedLightDirection = float4(LightDirection, EncodeLightDirectionW(LightAngle, MinVisibility)); RWComputedLightDirectionData[DispatchThreadId.x] = PackedLightDirection; } } #endif #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL /** From point being shaded toward light, for directional lights. */ float3 LightDirection; float4 LightTranslatedPositionAndInvRadius; float LightSourceRadius; float RayStartOffsetDepthScale; float3 LightAngleAndNormalThreshold; #endif uint4 ScissorRectMinAndSize; float2 NumGroups; /** Min and Max depth for this tile. */ groupshared uint IntegerTileMinZ; groupshared uint IntegerTileMaxZ; /** Inner Min and Max depth for this tile. */ groupshared uint IntegerTileMinZ2; groupshared uint IntegerTileMaxZ2; struct FTileCullingData { float4 TranslatedBoundingSphere; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL float3 ConeAxis; float ConeAngleCos; float ConeAngleSin; #endif }; void SetupTileCullingData( float SceneDepth, float MaxDepth, uint ThreadIndex, uint2 GroupId, out FTileCullingData TileCullingData0, out FTileCullingData TileCullingData1, out bool bTileShouldComputeShadowing, out uint GroupIndex) { // Initialize per-tile variables if (ThreadIndex == 0) { IntegerTileMinZ = 0x7F7FFFFF; IntegerTileMaxZ = 0; IntegerTileMinZ2 = 0x7F7FFFFF; IntegerTileMaxZ2 = 0; TileNumCapsules0 = 0; TileNumCapsules1 = 0; TileNumDistanceFields0 = 0; TileNumDistanceFields1 = 0; } GroupMemoryBarrierWithGroupSync(); // Use shared memory atomics to build the depth bounds for this tile // Each thread is assigned to a pixel at this point //@todo - move depth range computation to a central point where it can be reused by all the frame's tiled deferred passes! if (SceneDepth < MaxDepth) { InterlockedMin(IntegerTileMinZ, asuint(SceneDepth)); InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth)); } GroupMemoryBarrierWithGroupSync(); float MinTileZ = asfloat(IntegerTileMinZ); float MaxTileZ = asfloat(IntegerTileMaxZ); float HalfZ = .5f * (MinTileZ + MaxTileZ); if (SceneDepth < MaxDepth) { // Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile // This results in more conservative tile depth bounds and fewer intersections if (SceneDepth >= HalfZ) { InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth)); } if (SceneDepth <= HalfZ) { InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth)); } } GroupMemoryBarrierWithGroupSync(); float MinTileZ2 = asfloat(IntegerTileMinZ2); float MaxTileZ2 = asfloat(IntegerTileMaxZ2); bTileShouldComputeShadowing = true; if (IntegerTileMinZ == 0x7F7FFFFF && IntegerTileMaxZ == 0) { bTileShouldComputeShadowing = false; } float3 ViewTileMin; float3 ViewTileMax; float3 ViewTileMin2; float3 ViewTileMax2; bool bCenteredProjection = abs(View.ViewToClip[2][0]) < .00001f && abs(View.ViewToClip[2][1]) < .00001f; BRANCH // Off center projection path uses 37 more asm instructions if (bCenteredProjection) { float2 TanViewFOV = GetTanHalfFieldOfView(); // tan(FOV) = HalfUnitPlaneWidth / 1, so TanViewFOV * 2 is the size of the whole unit view plane // We are operating on a subset of that defined by ScissorRectMinAndSize float2 TileSize = TanViewFOV * 2 * ScissorRectMinAndSize.zw / ((float2)View.ViewSizeAndInvSize.xy * NumGroups); float2 UnitPlaneMin = -TanViewFOV + TanViewFOV * 2 * (ScissorRectMinAndSize.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw; float2 UnitPlaneTileMin = (GroupId.xy * TileSize + UnitPlaneMin) * float2(1, -1); float2 UnitPlaneTileMax = ((GroupId.xy + 1) * TileSize + UnitPlaneMin) * float2(1, -1); ViewTileMin.xy = min(MinTileZ * UnitPlaneTileMin, MaxTileZ2 * UnitPlaneTileMin); ViewTileMax.xy = max(MinTileZ * UnitPlaneTileMax, MaxTileZ2 * UnitPlaneTileMax); ViewTileMin.z = MinTileZ; ViewTileMax.z = MaxTileZ2; ViewTileMin2.xy = min(MinTileZ2 * UnitPlaneTileMin, MaxTileZ * UnitPlaneTileMin); ViewTileMax2.xy = max(MinTileZ2 * UnitPlaneTileMax, MaxTileZ * UnitPlaneTileMax); ViewTileMin2.z = MinTileZ2; ViewTileMax2.z = MaxTileZ; } else { float2 TileSize = 2 * ScissorRectMinAndSize.zw / ((float2)View.ViewSizeAndInvSize.xy * NumGroups); float2 UnitPlaneMin = -1 + 2 * (ScissorRectMinAndSize.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw; float2 UnitPlaneTileMin = (GroupId.xy * TileSize + UnitPlaneMin) * float2(1, -1); float2 UnitPlaneTileMax = ((GroupId.xy + 1) * TileSize + UnitPlaneMin) * float2(1, -1); { float MinTileDeviceZ = ConvertToDeviceZ(MinTileZ); float4 MinDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView); float4 MinDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView); float MaxTileDeviceZ = ConvertToDeviceZ(MaxTileZ2); float4 MaxDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView); float4 MaxDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView); ViewTileMin.xy = min(MinDepthMinCorner.xy / MinDepthMinCorner.w, MaxDepthMinCorner.xy / MaxDepthMinCorner.w); ViewTileMax.xy = max(MinDepthMaxCorner.xy / MinDepthMaxCorner.w, MaxDepthMaxCorner.xy / MaxDepthMaxCorner.w); ViewTileMin.z = MinTileZ; ViewTileMax.z = MaxTileZ2; } { float MinTileDeviceZ = ConvertToDeviceZ(MinTileZ2); float4 MinDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView); float4 MinDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView); float MaxTileDeviceZ = ConvertToDeviceZ(MaxTileZ); float4 MaxDepthMinCorner = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView); float4 MaxDepthMaxCorner = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView); ViewTileMin2.xy = min(MinDepthMinCorner.xy / MinDepthMinCorner.w, MaxDepthMinCorner.xy / MaxDepthMinCorner.w); ViewTileMax2.xy = max(MinDepthMaxCorner.xy / MinDepthMaxCorner.w, MaxDepthMaxCorner.xy / MaxDepthMaxCorner.w); ViewTileMin2.z = MinTileZ2; ViewTileMax2.z = MaxTileZ; } } float3 ViewGroup0Center = (ViewTileMax + ViewTileMin) / 2; TileCullingData0.TranslatedBoundingSphere.xyz = mul(float4(ViewGroup0Center, 1), View.ViewToTranslatedWorld).xyz; TileCullingData0.TranslatedBoundingSphere.w = length(ViewGroup0Center - ViewTileMax); float3 ViewGroup1Center = (ViewTileMax2 + ViewTileMin2) / 2; TileCullingData1.TranslatedBoundingSphere.xyz = mul(float4(ViewGroup1Center, 1), View.ViewToTranslatedWorld).xyz; TileCullingData1.TranslatedBoundingSphere.w = length(ViewGroup1Center - ViewTileMax); #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL #if POINT_LIGHT float3 LightVector0 = LightTranslatedPositionAndInvRadius.xyz - TileCullingData0.TranslatedBoundingSphere.xyz; float LightVector0Length = length(LightVector0); float3 LightVector1 = LightTranslatedPositionAndInvRadius.xyz - TileCullingData1.TranslatedBoundingSphere.xyz; float LightVector1Length = length(LightVector1); TileCullingData0.ConeAxis = LightVector0 / LightVector0Length; TileCullingData1.ConeAxis = LightVector1 / LightVector1Length;; float TanLightAngle0 = LightSourceRadius / LightVector0Length; float TanLightAngle1 = LightSourceRadius / LightVector1Length; TileCullingData0.ConeAngleCos = 1.0f / sqrt(1 + TanLightAngle0 * TanLightAngle0); TileCullingData0.ConeAngleSin = TileCullingData0.ConeAngleCos * TanLightAngle0; TileCullingData1.ConeAngleCos = 1.0f / sqrt(1 + TanLightAngle1 * TanLightAngle1); TileCullingData1.ConeAngleSin = TileCullingData1.ConeAngleCos * TanLightAngle1; // Don't operate on tiles completely outside of the light's influence bool bTileInLightInfluenceBounds = LightVector0Length < 1.0f / LightTranslatedPositionAndInvRadius.w + TileCullingData0.TranslatedBoundingSphere.w || LightVector1Length < 1.0f / LightTranslatedPositionAndInvRadius.w + TileCullingData1.TranslatedBoundingSphere.w; bTileShouldComputeShadowing = bTileShouldComputeShadowing && bTileInLightInfluenceBounds; #else TileCullingData0.ConeAxis = TileCullingData1.ConeAxis = LightDirection; TileCullingData0.ConeAngleCos = TileCullingData1.ConeAngleCos = cos(LightAngleAndNormalThreshold.x); TileCullingData0.ConeAngleSin = TileCullingData1.ConeAngleSin = sin(LightAngleAndNormalThreshold.x); #endif #endif GroupIndex = SceneDepth > MaxTileZ2 ? 1 : 0; } // Scaled sphere intersection allows capsule shadows to blend together better when penumbras are large, so use for indirect. // Otherwise an occluder sphere will be extracted from the capsule and used for shadowing. // This maintains shadow silhouette shapes better but has a discontinuity when the capsule direction is nearly parallel to the light direction. #define USE_SCALED_SPHERE_INTERSECTION (LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL) uint CullCapsuleShapesToTile( uint ThreadIndex, uint GroupIndex, float MaxOcclusionDistance, FTileCullingData TileCullingData0, FTileCullingData TileCullingData1) { #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL const float3 ConeAxis0 = TileCullingData0.ConeAxis; const float ConeAngleCos0 = TileCullingData0.ConeAngleCos; const float ConeAngleSin0 = TileCullingData0.ConeAngleSin; const float3 ConeAxis1 = TileCullingData1.ConeAxis; const float ConeAngleCos1 = TileCullingData1.ConeAngleCos; const float ConeAngleSin1 = TileCullingData1.ConeAngleSin; #endif LOOP for (uint ShapeIndex = ThreadIndex; ShapeIndex < NumShadowCapsules; ShapeIndex += THREADGROUP_SIZEX * THREADGROUP_SIZEY) { #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE float3 ConeAxis0; float LightAngle; float Unused; GetLightDirectionData(ShapeIndex, false, ConeAxis0, LightAngle, Unused); float ConeAngleCos0 = cos(LightAngle); float ConeAngleSin0 = sin(LightAngle); float3 ConeAxis1 = ConeAxis0; float ConeAngleCos1 = ConeAngleCos0; float ConeAngleSin1 = ConeAngleSin0; #endif FCapsuleShape CapsuleShape = ShadowCapsuleShapes[ShapeIndex]; float3 TransformedSphereTranslatedCenter = CapsuleShape.TranslatedCenter; float TransformedSphereRadius = CapsuleShape.Radius; float3 TransformedTileTranslatedBoundingSphereCenter0 = TileCullingData0.TranslatedBoundingSphere.xyz; float3 TransformedTileTranslatedBoundingSphereCenter1 = TileCullingData1.TranslatedBoundingSphere.xyz; #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER float3 TransformedConeAxis0 = ConeAxis0; float3 TransformedConeAxis1 = ConeAxis1; #endif #if USE_SCALED_SPHERE_INTERSECTION float3 CapsuleSpaceX; float3 CapsuleSpaceY; float3 CapsuleSpaceZ = CapsuleShape.Orientation; GenerateCoordinateSystem(CapsuleSpaceZ, CapsuleSpaceX, CapsuleSpaceY); // Scale required along the capsule's axis to turn it into a sphere (assuming it was originally a scaled sphere instead of a capsule) float CapsuleZScale = CapsuleShape.Radius / (.5f * CapsuleShape.Length + CapsuleShape.Radius); CapsuleSpaceZ *= CapsuleZScale; // The capsule is centered at 0 in the scaled sphere space TransformedSphereTranslatedCenter = 0; // After scaling along the capsule axis it will become a sphere with the original radius TransformedSphereRadius = CapsuleShape.Radius; // Transform the sphere center and cone axis into the scaled sphere space float3 CapsuleCenterToTileCenter0 = TileCullingData0.TranslatedBoundingSphere.xyz - CapsuleShape.TranslatedCenter; TransformedTileTranslatedBoundingSphereCenter0 = float3(dot(CapsuleCenterToTileCenter0, CapsuleSpaceX), dot(CapsuleCenterToTileCenter0, CapsuleSpaceY), dot(CapsuleCenterToTileCenter0, CapsuleSpaceZ)); float3 CapsuleCenterToTileCenter1 = TileCullingData1.TranslatedBoundingSphere.xyz - CapsuleShape.TranslatedCenter; TransformedTileTranslatedBoundingSphereCenter1 = float3(dot(CapsuleCenterToTileCenter1, CapsuleSpaceX), dot(CapsuleCenterToTileCenter1, CapsuleSpaceY), dot(CapsuleCenterToTileCenter1, CapsuleSpaceZ)); #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER // Renormalize the cone axis as it went through a non-uniformly scaled transform TransformedConeAxis0 = normalize(float3(dot(ConeAxis0, CapsuleSpaceX), dot(ConeAxis0, CapsuleSpaceY), dot(ConeAxis0, CapsuleSpaceZ))); TransformedConeAxis1 = normalize(float3(dot(ConeAxis1, CapsuleSpaceX), dot(ConeAxis1, CapsuleSpaceY), dot(ConeAxis1, CapsuleSpaceZ))); #endif #else // Add half capsule length to bounding sphere TransformedSphereRadius = CapsuleShape.Radius + .5f * CapsuleShape.Length; #endif BRANCH if (SphereIntersectSphere(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + MaxOcclusionDistance), float4(TransformedTileTranslatedBoundingSphereCenter0, TileCullingData0.TranslatedBoundingSphere.w)) #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER && SphereIntersectConeWithMaxDistance(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + TileCullingData0.TranslatedBoundingSphere.w), TransformedTileTranslatedBoundingSphereCenter0, TransformedConeAxis0, ConeAngleCos0, ConeAngleSin0, MaxOcclusionDistance) #endif ) { uint ListIndex; InterlockedAdd(TileNumCapsules0, 1U, ListIndex); // Don't overwrite on overflow ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_SHAPES - 1)); IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 0 + ListIndex] = ShapeIndex; } BRANCH if (SphereIntersectSphere(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + MaxOcclusionDistance), float4(TransformedTileTranslatedBoundingSphereCenter1, TileCullingData1.TranslatedBoundingSphere.w)) #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER && SphereIntersectConeWithMaxDistance(float4(TransformedSphereTranslatedCenter, TransformedSphereRadius + TileCullingData1.TranslatedBoundingSphere.w), TransformedTileTranslatedBoundingSphereCenter1, TransformedConeAxis1, ConeAngleCos1, ConeAngleSin1, MaxOcclusionDistance) #endif ) { uint ListIndex; InterlockedAdd(TileNumCapsules1, 1U, ListIndex); // Don't write out of bounds on overflow ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_SHAPES - 1)); IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * 1 + ListIndex] = ShapeIndex; } } GroupMemoryBarrierWithGroupSync(); return min(GroupIndex == 0 ? TileNumCapsules0 : TileNumCapsules1, (uint)MAX_INTERSECTING_SHAPES); } // Approximate the area of intersection of two spherical caps, from 'Ambient Aperture Lighting' // fRadius0 : First caps radius (arc length in radians) // fRadius1 : Second caps radius (in radians) // fDist : Distance between caps (radians between centers of caps) float SphericalCapIntersectionAreaFast(float fRadius0, float fRadius1, float fDist) { float fArea; if ( fDist <= max(fRadius0, fRadius1) - min(fRadius0, fRadius1) ) { // One cap is completely inside the other fArea = 6.283185308f - 6.283185308f * cos( min(fRadius0,fRadius1) ); } else if ( fDist >= fRadius0 + fRadius1 ) { // No intersection exists fArea = 0; } else { float fDiff = abs(fRadius0 - fRadius1); fArea = smoothstep(0.0f, 1.0f, 1.0f - saturate((fDist-fDiff)/(fRadius0+fRadius1-fDiff))); fArea *= 6.283185308f - 6.283185308f * cos( min(fRadius0,fRadius1) ); } return fArea; } // CosFadeStartAngle in x, 1 / (1 - CosFadeStartAngle) in y float2 CosFadeStartAngle; void ApplyFadeToCapsuleRadius(inout float CapsuleRadius, float3 LightDirection) { #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE // Fade out when nearly vertical up due to self shadowing artifacts float ShapeFadeAlpha = 1 - saturate(2 * (-LightDirection.z - CosFadeStartAngle.x) * CosFadeStartAngle.y); CapsuleRadius *= ShapeFadeAlpha; #endif } float ShadowConeTraceAgainstCulledCapsuleShapes( float3 TranslatedWorldRayStart, float3 UnitRayDirection, float LightVectorLength, float LightAngle, float InvMaxOcclusionDistance, uint CulledDataParameter, uint NumIntersectingCapsules, uniform bool bUseCulling) { float ConeVisibility = 1; float AreaOfLight = 6.283185308f - 6.283185308f * cos(LightAngle); LOOP for (uint TileCulledObjectIndex = 0; TileCulledObjectIndex < NumIntersectingCapsules; TileCulledObjectIndex++) { uint ObjectIndex; if (bUseCulling) { uint GroupIndex = CulledDataParameter; ObjectIndex = IntersectingShapeIndices[MAX_INTERSECTING_SHAPES * GroupIndex + TileCulledObjectIndex]; } else { ObjectIndex = TileCulledObjectIndex; } float MinVisibility = 0.0f; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE GetLightDirectionData(ObjectIndex, false, UnitRayDirection, LightAngle, MinVisibility); AreaOfLight = 6.283185308f - 6.283185308f * cos(LightAngle); #endif #define OVERRIDE_LIGHT_DEBUG 0 #if OVERRIDE_LIGHT_DEBUG //UnitRayDirection = normalize(float3(.2f, .2f, .8f)); UnitRayDirection = float3(0, 0, 1); LightAngle = .3f; #endif FCapsuleShape CapsuleShape = ShadowCapsuleShapes[ObjectIndex]; ApplyFadeToCapsuleRadius(CapsuleShape.Radius, UnitRayDirection); float DistanceToShadowSphere; float3 UnitVectorToShadowSphere; float3 UnitRayDirectionInCorrectSpace = UnitRayDirection; BRANCH if (CapsuleShape.Length > 0) { #if USE_SCALED_SPHERE_INTERSECTION float3 CapsuleSpaceX; float3 CapsuleSpaceY; float3 CapsuleSpaceZ = CapsuleShape.Orientation; GenerateCoordinateSystem(CapsuleSpaceZ, CapsuleSpaceX, CapsuleSpaceY); float CapsuleZScale = CapsuleShape.Radius / (.5f * CapsuleShape.Length + CapsuleShape.Radius); CapsuleSpaceZ *= CapsuleZScale; float3 CapsuleCenterToRayStart = TranslatedWorldRayStart - CapsuleShape.TranslatedCenter; float3 CapsuleSpaceRayStart = float3(dot(CapsuleCenterToRayStart, CapsuleSpaceX), dot(CapsuleCenterToRayStart, CapsuleSpaceY), dot(CapsuleCenterToRayStart, CapsuleSpaceZ)); float3 CapsuleSpaceRayDirection = float3(dot(UnitRayDirection, CapsuleSpaceX), dot(UnitRayDirection, CapsuleSpaceY), dot(UnitRayDirection, CapsuleSpaceZ)); DistanceToShadowSphere = length(CapsuleSpaceRayStart); UnitVectorToShadowSphere = -CapsuleSpaceRayStart / DistanceToShadowSphere; UnitRayDirectionInCorrectSpace = normalize(CapsuleSpaceRayDirection); #else float3 VectorToCapsuleCenter = CapsuleShape.TranslatedCenter - TranslatedWorldRayStart; // Closest point on line segment to ray float3 L01 = CapsuleShape.Orientation * CapsuleShape.Length; float3 L0 = VectorToCapsuleCenter - 0.5 * L01; float3 L1 = VectorToCapsuleCenter + 0.5 * L01; // The below is computing the shortest distance between capsule line segment and ray float CapsuleOrientationProjectedOntoRay = dot(UnitRayDirection, L01); // Vector that spans L01 perpendicular to the ray float3 PerpendicularSpanningVector = CapsuleOrientationProjectedOntoRay * UnitRayDirection - L01; // Length of PerpendicularSpanningVector using the right triangle formed by L01 and UnitRayDirection * CapsuleOrientationProjectedOntoRay float PerpendicularDistance = Square(CapsuleShape.Length) - CapsuleOrientationProjectedOntoRay * CapsuleOrientationProjectedOntoRay; // Project the vector to a capsule endpoint onto the perpendicular spanning vector, normalized float t = saturate(dot(L0, PerpendicularSpanningVector) / PerpendicularDistance); // Compute the vector to the shadow sphere which best approximates the capsule's shadowing float3 VectorToShadowSphere = L0 + t * L01; DistanceToShadowSphere = length(VectorToShadowSphere); UnitVectorToShadowSphere = VectorToShadowSphere / DistanceToShadowSphere; // The above 'best shadow sphere' calculation doesn't take into account the projected solid angle of the potential shadow spheres // As a result, there's a discontinuity when the capsule and the ray point in nearly the same direction, where the far end of the capsule gets chosen // Here we mitigate the effect by overriding the distance to shadow sphere if one of the capsule end points was closer DistanceToShadowSphere = min(DistanceToShadowSphere, length(L0)); DistanceToShadowSphere = min(DistanceToShadowSphere, length(L1)); #endif } else { DistanceToShadowSphere = length(CapsuleShape.TranslatedCenter - TranslatedWorldRayStart); UnitVectorToShadowSphere = (CapsuleShape.TranslatedCenter - TranslatedWorldRayStart) / DistanceToShadowSphere; } float AngleBetween = acosFast(dot(UnitVectorToShadowSphere, UnitRayDirectionInCorrectSpace)); float IntersectionArea = SphericalCapIntersectionAreaFast(LightAngle, atanFastPos(CapsuleShape.Radius / DistanceToShadowSphere), AngleBetween); #if POINT_LIGHT // SphericalCapIntersectionAreaFast does not take the relative distance of the two sphere caps into account, which can cause shadows to be cast on the opposite direction // Here we compare DistanceToShadowSphere and LightVectorLength to determine whether a shadow should be cast // To prevent discontinuity, we use the ratio of the distance difference to the capsule's radius as a smooth factor IntersectionArea = lerp(IntersectionArea, 0, saturate((DistanceToShadowSphere - LightVectorLength + CapsuleShape.Radius) / CapsuleShape.Radius)); #endif float ConeConeIntersection = 1 - saturate(IntersectionArea / AreaOfLight); float DistanceFadeAlpha = saturate(DistanceToShadowSphere * InvMaxOcclusionDistance * 3 - 2); ConeConeIntersection = lerp(ConeConeIntersection, 1, DistanceFadeAlpha); #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL // Apply to indirect shadows only ConeConeIntersection = lerp(MinVisibility, 1, ConeConeIntersection); #endif ConeVisibility *= ConeConeIntersection; } return ConeVisibility; } // Whether to actually sample the distance field while doing tile culling #define USE_DISTANCE_FIELD_FOR_TILE_CULLING 1 bool TileBoundsIntersectDistanceFieldCaster( uint ObjectIndex, FDFObjectBounds DFObjectBounds, float4 TileTranslatedBoundingSphere, float3 TileConeAxis, float TileConeAngleCos, float TileConeAngleSin, float MaxOcclusionDistance) { const float3 TranslatedBoundsCenter = DFFastToTranslatedWorld(DFObjectBounds.Center, PrimaryView.PreViewTranslation); BRANCH if (SphereIntersectSphere(float4(TranslatedBoundsCenter, DFObjectBounds.SphereRadius + MaxOcclusionDistance), TileTranslatedBoundingSphere) #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER // This 'can object cast on tile bounds' test has to be disabled because the weight used to combine occlusion from multiple objects is computed based on distance to occluder only //&& SphereIntersectConeWithMaxDistance(float4(SphereCenterAndRadius.xyz, SphereCenterAndRadius.w + TileBoundingSphere.w), TileBoundingSphere.xyz, TileConeAxis, TileConeAngleCos, TileConeAngleSin, MaxOcclusionDistance) #endif ) { #if USE_DISTANCE_FIELD_FOR_TILE_CULLING FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex); float4x4 TranslatedWorldToVolume = DFFastToTranslatedWorld(DFObjectData.WorldToVolume, PrimaryView.PreViewTranslation); float3 VolumeSamplePosition = mul(float4(TileTranslatedBoundingSphere.xyz, 1), TranslatedWorldToVolume).xyz; float3 ClampedSamplePosition = clamp(VolumeSamplePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent); float DistanceToClamped = length(ClampedSamplePosition - VolumeSamplePosition); float WorldDistanceToOccluder = (DistanceToMeshSurfaceStandalone(ClampedSamplePosition, DFObjectData) + DistanceToClamped) * DFObjectData.VolumeScale; float ErrorTolerance = 1.1f; // The tile can only be affected by the object's shadow if the closest part of the object is less than MaxOcclusionDistance from the tile bounds return WorldDistanceToOccluder - TileTranslatedBoundingSphere.w < MaxOcclusionDistance * ErrorTolerance; #else return true; #endif } return false; } uint CullDistanceFieldCastersToTile( uint ThreadIndex, uint GroupIndex, float MaxOcclusionDistance, FTileCullingData TileCullingData0, FTileCullingData TileCullingData1) { #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL float3 ConeAxis0 = TileCullingData0.ConeAxis; float ConeAngleCos0 = TileCullingData0.ConeAngleCos; float ConeAngleSin0 = TileCullingData0.ConeAngleSin; float3 ConeAxis1 = TileCullingData1.ConeAxis; float ConeAngleCos1 = TileCullingData1.ConeAngleCos; float ConeAngleSin1 = TileCullingData1.ConeAngleSin; #else float3 ConeAxis0 = 0; float ConeAngleCos0 = 0; float ConeAngleSin0 = 0; float3 ConeAxis1 = 0; float ConeAngleCos1 = 0; float ConeAngleSin1 = 0; #endif LOOP for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumMeshDistanceFieldCasters; ListObjectIndex += THREADGROUP_SIZEX * THREADGROUP_SIZEY) { uint ObjectIndex = MeshDistanceFieldCasterIndices[ListObjectIndex]; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE float LightAngle; float Unused; GetLightDirectionData(ListObjectIndex, true, ConeAxis0, LightAngle, Unused); ConeAngleCos0 = cos(LightAngle); ConeAngleSin0 = sin(LightAngle); ConeAxis1 = ConeAxis0; ConeAngleCos1 = ConeAngleCos0; ConeAngleSin1 = ConeAngleSin0; #endif FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex); float EffectiveMaxOcclusionDistance = MaxOcclusionDistance + .5f * DFObjectBounds.SphereRadius; BRANCH if (TileBoundsIntersectDistanceFieldCaster(ObjectIndex, DFObjectBounds, TileCullingData0.TranslatedBoundingSphere, ConeAxis0, ConeAngleCos0, ConeAngleSin0, EffectiveMaxOcclusionDistance)) { uint ListIndex; InterlockedAdd(TileNumDistanceFields0, 1U, ListIndex); // Don't overwrite on overflow ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_DISTANCE_FIELDS - 1)); IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 0 + ListIndex] = ListObjectIndex; } BRANCH if (TileBoundsIntersectDistanceFieldCaster(ObjectIndex, DFObjectBounds, TileCullingData1.TranslatedBoundingSphere, ConeAxis1, ConeAngleCos1, ConeAngleSin1, EffectiveMaxOcclusionDistance)) { uint ListIndex; InterlockedAdd(TileNumDistanceFields1, 1U, ListIndex); // Don't write out of bounds on overflow ListIndex = min(ListIndex, (uint)(MAX_INTERSECTING_DISTANCE_FIELDS - 1)); IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * 1 + ListIndex] = ListObjectIndex; } } GroupMemoryBarrierWithGroupSync(); return min(GroupIndex == 0 ? TileNumDistanceFields0 : TileNumDistanceFields1, (uint)MAX_INTERSECTING_DISTANCE_FIELDS); } float ShadowConeTraceAgainstCulledDistanceFieldCasters( float3 TranslatedWorldRayStart, float3 UnitRayDirection, float LightAngle, float MaxOcclusionDistance, uint CulledDataParameter, uint NumIntersectingCasters, uniform bool bUseCulling, inout uint NumTraceSteps) { float TanConeAngle = tan(LightAngle); float GeometricMeanNumerator = 0; float TotalWeight = 0; LOOP for (uint TileCulledObjectIndex = 0; TileCulledObjectIndex < NumIntersectingCasters; TileCulledObjectIndex++) { uint ListObjectIndex = TileCulledObjectIndex; if (bUseCulling) { uint GroupIndex = CulledDataParameter; ListObjectIndex = IntersectingMeshDistanceFieldIndices[MAX_INTERSECTING_DISTANCE_FIELDS * GroupIndex + TileCulledObjectIndex]; } float MinVisibility = 0.0f; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE GetLightDirectionData(ListObjectIndex, true, UnitRayDirection, LightAngle, MinVisibility); TanConeAngle = tan(LightAngle); #endif uint ObjectIndex = MeshDistanceFieldCasterIndices[ListObjectIndex]; FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex); FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex); float4x4 TranslatedWorldToVolume = DFFastToTranslatedWorld(DFObjectData.WorldToVolume, PrimaryView.PreViewTranslation); // Increase max occlusion distance based on object size for distance field casters // This improves the solidness of the shadows, since the fadeout distance causes internal structure of objects to become visible float EffectiveMaxOcclusionDistance = MaxOcclusionDistance + .5f * DFObjectBounds.SphereRadius; float MaxSphereRadius = TanConeAngle * EffectiveMaxOcclusionDistance; float3 TranslatedWorldRayEnd = TranslatedWorldRayStart + UnitRayDirection * EffectiveMaxOcclusionDistance; float3 VolumeRayStart = mul(float4(TranslatedWorldRayStart, 1), TranslatedWorldToVolume).xyz; float3 VolumeRayEnd = mul(float4(TranslatedWorldRayEnd, 1), TranslatedWorldToVolume).xyz; float3 VolumeRayDirection = VolumeRayEnd - VolumeRayStart; float VolumeRayLength = length(VolumeRayDirection); VolumeRayDirection /= VolumeRayLength; float VolumeMaxSphereRadius = MaxSphereRadius / DFObjectData.VolumeScale; { FDFAssetData DFAssetData = LoadDFAssetDataHighestResolution(DFObjectData.AssetIndex); const float MaxEncodedDistance = DFAssetData.DistanceFieldToVolumeScaleBias.x + DFAssetData.DistanceFieldToVolumeScaleBias.y; // Prevent incorrect shadowing when sampling invalid bricks by limiting VolumeMaxSphereRadius to MaxEncodedDistance VolumeMaxSphereRadius = min(VolumeMaxSphereRadius, MaxEncodedDistance); float MinTraceVisibility = 1; //@todo - derive from texel size float StartOffset = .02f; uint MaxSteps = 32; float MinStepSize = 1.0f / (4 * MaxSteps); // How much to artificially slow down the stepping proportional to cone occlusion // Reduces artifacts when steps are large (far from the surface) yet heavily occluded because the cone angle is large float FullVisibilityMaxStepFraction = .1f; float OcclusionExponent = .8f; float SampleRayTime = StartOffset; uint StepIndex = 0; LOOP for (; StepIndex < MaxSteps; StepIndex++) { float3 SampleVolumePosition = VolumeRayStart + VolumeRayDirection * SampleRayTime; float3 ClampedSamplePosition = clamp(SampleVolumePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent); float DistanceToClamped = length(ClampedSamplePosition - SampleVolumePosition); float VolumeDistanceToOccluder = SampleSparseMeshSignedDistanceField(ClampedSamplePosition, DFAssetData) + DistanceToClamped; float SphereRadius = clamp(TanConeAngle * SampleRayTime, 0, VolumeMaxSphereRadius); float StepVisibility = pow(saturate(VolumeDistanceToOccluder / SphereRadius), OcclusionExponent); float OccluderDistanceFraction = (SampleRayTime + VolumeDistanceToOccluder) * DFObjectData.VolumeScale / EffectiveMaxOcclusionDistance; // Fade out occlusion based on distance to occluder to avoid a discontinuity at the max AO distance //@todo - this introduces banding artifacts because we may be taking large steps through the fade region StepVisibility = max(StepVisibility, saturate(OccluderDistanceFraction)); MinTraceVisibility = min(MinTraceVisibility, StepVisibility); float StepDistance = min(VolumeDistanceToOccluder, StepVisibility * FullVisibilityMaxStepFraction * VolumeRayLength); StepDistance = max(StepDistance, MinStepSize); SampleRayTime += StepDistance; NumTraceSteps++; // Terminate the trace if we reached a negative area or went past the end of the ray if (VolumeDistanceToOccluder <= 0 || SampleRayTime > VolumeRayLength) { break; } } #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_PUNCTUAL // Attempt to match the effect of MinVisibility on capsule shadows, which combine with multiply so a MinVisibility of .1 does not actually achieve values >= .1 MinVisibility *= MinVisibility; // Apply to indirect shadows only MinTraceVisibility = lerp(MinVisibility, 1, MinTraceVisibility); #endif float WeightDistance = EffectiveMaxOcclusionDistance; { float3 SampleVolumePosition = VolumeRayStart; float3 ClampedSamplePosition = clamp(SampleVolumePosition, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent); float DistanceToClamped = length(ClampedSamplePosition - SampleVolumePosition); float VolumeDistanceToOccluder = SampleSparseMeshSignedDistanceField(ClampedSamplePosition, DFAssetData) + DistanceToClamped; WeightDistance = VolumeDistanceToOccluder * DFObjectData.VolumeScale; //WeightDistance = length(SphereCenterAndRadius.xyz - WorldRayStart); } float Weight = 1 - saturate(WeightDistance / EffectiveMaxOcclusionDistance); Weight = pow(Weight, 4); // Weighted geometric mean to combine shadows from multiple casters without over-darkening like a simple multiply would do GeometricMeanNumerator += Weight * log2(MinTraceVisibility); TotalWeight += Weight; } } float ConeVisibility = 1; if (TotalWeight > 0) { ConeVisibility = exp2(GeometricMeanNumerator / TotalWeight); } return ConeVisibility; } void ApplySelfShadowingIntensityForDeferred(float2 ScreenUV, inout float Visibility, float2 SVPos) { BRANCH if (View.IndirectCapsuleSelfShadowingIntensity < 1) { #if SUBTRATE_GBUFFER_FORMAT==1 FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SVPos.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel); FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture); const bool bHasDynamicIndirectShadowCasterRepresentation = SubstratePixelHeader.HasDynamicIndirectShadowCasterRepresentation(); #else #if SHADING_PATH_MOBILE FGBufferData GBufferData = MobileFetchAndDecodeGBuffer(ScreenUV, SVPos);; #else FGBufferData GBufferData = GetGBufferData(ScreenUV); #endif const bool bHasDynamicIndirectShadowCasterRepresentation = HasDynamicIndirectShadowCasterRepresentation(GBufferData); #endif // Reduce self shadowing intensity Visibility = lerp(1, Visibility, bHasDynamicIndirectShadowCasterRepresentation ? View.IndirectCapsuleSelfShadowingIntensity : 1); } } #if APPLY_TO_BENT_NORMAL Texture2D ReceiverBentNormalTexture; RWTexture2D RWBentNormalTexture; #endif uint EyeIndex; float IndirectCapsuleSelfShadowingIntensity; uint DownsampleFactor; RWTexture2D RWShadowFactors; float MaxOcclusionDistance; uint2 TileDimensions; RWStructuredBuffer RWTileIntersectionCounts; [numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)] void CapsuleShadowingCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x; float2 ScreenUV = float2((DispatchThreadId.xy * DownsampleFactor + ScissorRectMinAndSize.xy + .5f) * View.BufferSizeAndInvSize.zw); float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy; // Mobile does not support bent normals #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_RECEIVER && !SHADING_PATH_MOBILE float4 ReceiverTextureValue = ReceiverBentNormalTexture.Load(DispatchThreadId.xyz); float3 ReceiverBentNormal = ReceiverTextureValue.xyz; float SceneDepth = ReceiverTextureValue.w; #else #if SHADING_PATH_MOBILE float SceneDepth = CalcSceneDepth(ScreenUV, EyeIndex); #else float SceneDepth = CalcSceneDepth(ScreenUV); #endif #endif const float3 OpaqueTranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, SceneDepth), SceneDepth, 1), PrimaryView.ScreenToTranslatedWorld).xyz; uint CulledDataParameter = 0; bool bTileShouldComputeShadowing = true; FTileCullingData TileCullingData0; FTileCullingData TileCullingData1; uint NumPixelIntersectingShapes = 0; uint NumTileIntersectingShapes = 0; uint NumDistanceFieldSteps = 0; // So we can skip skybox pixels / tiles without having to check the GBuffer for shading model float MaxDepth = 20000; #define USE_CULLING 1 #if USE_CULLING SetupTileCullingData(SceneDepth, MaxDepth, ThreadIndex, GroupId.xy, TileCullingData0, TileCullingData1, bTileShouldComputeShadowing, CulledDataParameter); #endif // USE_CULLING float Visibility = 1; BRANCH if (bTileShouldComputeShadowing) { // World space offset along the start of the ray to avoid incorrect self-shadowing float RayStartOffset = 0; float LightVectorLength = 0; #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_PUNCTUAL #if POINT_LIGHT float3 LightVector = LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition; LightVectorLength = length(LightVector); float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightVector / LightVectorLength * RayStartOffset; float3 UnitRayDirection = (LightTranslatedPositionAndInvRadius.xyz - OpaqueTranslatedWorldPosition) / LightVectorLength; float LightAngle = atanFastPos(LightSourceRadius / LightVectorLength); #else float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition + LightDirection * RayStartOffset; float3 UnitRayDirection = LightDirection; float LightAngle = LightAngleAndNormalThreshold.x; #endif #elif LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_RECEIVER && !SHADING_PATH_MOBILE float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition; float BentNormalLength = length(ReceiverBentNormal); float3 UnitRayDirection = ReceiverBentNormal / max(BentNormalLength, .00001f); float LightAngle = max(BentNormalLength * .5f * PI, PI / 8); #else float3 TranslatedWorldRayStart = OpaqueTranslatedWorldPosition; float3 UnitRayDirection = 0; float LightAngle = 0; #endif uint NumIntersectingCapsules = NumShadowCapsules; uint NumIntersectingDistanceFieldCasters = NumMeshDistanceFieldCasters; #if USE_CULLING #if SUPPORT_CAPSULE_SHAPES NumIntersectingCapsules = CullCapsuleShapesToTile( ThreadIndex, CulledDataParameter, MaxOcclusionDistance, TileCullingData0, TileCullingData1); NumTileIntersectingShapes += TileNumCapsules0 + TileNumCapsules1; #endif #if SUPPORT_MESH_DISTANCE_FIELDS NumIntersectingDistanceFieldCasters = CullDistanceFieldCastersToTile( ThreadIndex, CulledDataParameter, MaxOcclusionDistance, TileCullingData0, TileCullingData1); NumTileIntersectingShapes += TileNumDistanceFields0 + TileNumDistanceFields1; #endif #else NumTileIntersectingShapes = NumShadowCapsules + NumMeshDistanceFieldCasters; #endif NumPixelIntersectingShapes += NumIntersectingCapsules + NumIntersectingDistanceFieldCasters; #if SUPPORT_CAPSULE_SHAPES Visibility *= ShadowConeTraceAgainstCulledCapsuleShapes( TranslatedWorldRayStart, UnitRayDirection, LightVectorLength, LightAngle, 1.0f / MaxOcclusionDistance, CulledDataParameter, NumIntersectingCapsules, USE_CULLING ? true : false); #endif #if SUPPORT_MESH_DISTANCE_FIELDS Visibility *= ShadowConeTraceAgainstCulledDistanceFieldCasters( TranslatedWorldRayStart, UnitRayDirection, LightAngle, MaxOcclusionDistance, CulledDataParameter, NumIntersectingDistanceFieldCasters, USE_CULLING ? true : false, NumDistanceFieldSteps); #endif #if !APPLY_TO_BENT_NORMAL if (all(GroupThreadId.xy == 0) && all(GroupId.xy < TileDimensions)) { RWTileIntersectionCounts[GroupId.y * TileDimensions.x + GroupId.x] = NumTileIntersectingShapes; } #endif } //Visibility = NumDistanceFieldSteps / 20.0f; //Visibility = NumPixelIntersectingShapes / 20.0f; //Visibility = bTileShouldComputeShadowing ? 1 : 0; #if APPLY_TO_BENT_NORMAL #if LIGHT_SOURCE_MODE != LIGHT_SOURCE_FROM_RECEIVER float3 ReceiverBentNormal = ReceiverBentNormalTexture.Load(DispatchThreadId.xyz).xyz; #endif #if LIGHT_SOURCE_MODE == LIGHT_SOURCE_FROM_CAPSULE && !FORWARD_SHADING // The third param of this function is used only by mobile paths which do not support bent normals. ApplySelfShadowingIntensityForDeferred(ScreenUV, Visibility, float2(0,0)); #endif #if METAL_ES3_1_PROFILE // clamp max depth to avoid #inf SceneDepth = min(SceneDepth, 65500.0f); #endif RWBentNormalTexture[DispatchThreadId.xy] = float4(ReceiverBentNormal * Visibility, SceneDepth); #else RWShadowFactors[DispatchThreadId.xy] = float2(Visibility, SceneDepth); #endif } StructuredBuffer TileIntersectionCounts; // Size of a tile in NDC float2 TileSize; #ifndef TILES_PER_INSTANCE #define TILES_PER_INSTANCE 1 #endif void CapsuleShadowingUpsampleVS( float2 TexCoord : ATTRIBUTE0, uint VertexId : SV_VertexID, uint InstanceId : SV_InstanceID, out float4 OutPosition : SV_POSITION // This is a hack to target a different slice of the RT target array in mobile multi view fallback // because the D3D RHI does not allow to set slice 1 of a texture2darray as a render target // and the capsule shadows shaders are run view-by-view. // Mobile multi view on vulkan does not need this hack. #if MOBILE_MULTI_VIEW_FALLBACK , out uint LayerIndex : SV_RenderTargetArrayIndex #endif ) { #if MOBILE_MULTI_VIEW_FALLBACK LayerIndex = EyeIndex; #endif // Compute the actual instance id for when multiple tiles are packed into the vertex buffer uint EffectiveInstanceId = InstanceId * TILES_PER_INSTANCE + VertexId / 4; uint NumCapsulesAffectingTile = TileIntersectionCounts[EffectiveInstanceId]; uint TileY = InstanceId / TileDimensions.x; uint2 TileCoordinate = uint2(EffectiveInstanceId - TileY * TileDimensions.x, TileY); float2 ScreenUV = ((TileCoordinate + TexCoord) * TileSize + ScissorRectMinAndSize.xy) * View.BufferSizeAndInvSize.zw; float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy; OutPosition = float4(ScreenPosition, 0, 1); // Cull the tile if no affecting capsules, shadow will not be visible if (NumCapsulesAffectingTile == 0) { OutPosition.xy = 0; } } #ifdef UPSAMPLE_PASS Texture2D ShadowFactorsTexture; SamplerState ShadowFactorsSampler; float2 ShadowFactorsUVBilinearMax; float OutputtingToLightAttenuation; void CapsuleShadowingUpsamplePS( in float4 SVPos : SV_POSITION, out float4 OutColor : SV_Target0 #if APPLY_TO_SSAO ,out float4 OutAmbientOcclusion : SV_Target1 #endif ) { const float2 ScreenUV = SvPositionToBufferUV(SVPos); float Output; float SceneDepth; UpsampleShadowFactors(SVPos, ScissorRectMinAndSize, 1.0f / DOWNSAMPLE_FACTOR, 0, POSITIVE_INFINITY, ShadowFactorsTexture, ShadowFactorsSampler, ShadowFactorsUVBilinearMax, Output, SceneDepth, EyeIndex); if (OutputtingToLightAttenuation > 0) { OutColor = EncodeLightAttenuation(Output).xxxx; } else { #if !FORWARD_SHADING // Self-shadowing determination is binary so apply at full res where possible ApplySelfShadowingIntensityForDeferred(ScreenUV, Output, SVPos.xy); #endif OutColor = Output; } #if APPLY_TO_SSAO OutAmbientOcclusion = Output; #endif } #endif // UPSAMPLE_PASS