// Copyright Epic Games, Inc. All Rights Reserved. #include "../Common.ush" #include "LumenMaterial.ush" #include "../SceneTextureParameters.ush" #include "../BRDF.ush" #include "../Random.ush" #include "LumenReflectionCommon.ush" #include "../ClearCoatCommon.ush" #include "../FastMath.ush" #include "LumenRadianceCacheCommon.ush" #ifndef THREADGROUP_SIZE #define THREADGROUP_SIZE 1 #endif #ifndef FRONT_LAYER_TRANSLUCENCY #define FRONT_LAYER_TRANSLUCENCY 0 #endif RWBuffer RWReflectionClearTileIndirectArgs; RWBuffer RWReflectionResolveTileIndirectArgs; RWBuffer RWReflectionTracingTileIndirectArgs; RWTexture2DArray RWDownsampledDepth; RWTexture2D RWDownsampledClosureIndex; groupshared uint SharedTileNeedsResolve[4][4]; groupshared uint SharedTileNeedsTracing; // Must match cpp GReflectionResolveTileSize #define RESOLVE_TILE_SIZE 8 #ifndef PERMUTATION_OVERFLOW_TILE #define PERMUTATION_OVERFLOW_TILE 0 #endif RWTexture2DArray RWResolveTileUsed; groupshared uint SharedTileClassification; [numthreads(RESOLVE_TILE_SIZE, RESOLVE_TILE_SIZE, 1)] void ReflectionTileClassificationMarkCS( uint2 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint2 GroupThreadId : SV_GroupThreadID) { if (DispatchThreadId.x < 3 && DispatchThreadId.y == 0 && DispatchThreadId.z == 0 && PERMUTATION_OVERFLOW_TILE == 0) { // Clear indirect args for future pass RWReflectionClearTileIndirectArgs[DispatchThreadId.x] = (DispatchThreadId.x == 0) ? 0 : 1; RWReflectionResolveTileIndirectArgs[DispatchThreadId.x] = (DispatchThreadId.x == 0) ? 0 : 1; RWReflectionTracingTileIndirectArgs[DispatchThreadId.x] = (DispatchThreadId.x == 0) ? 0 : 1; } if (all(GroupThreadId == 0)) { SharedTileClassification = 0; } GroupMemoryBarrierWithGroupSync(); bool bIsValid = all(DispatchThreadId.xy < View.ViewRectMinAndSize.zw); bool bIsAnyValid = bIsValid; const FLumenMaterialCoord Coord = GetLumenMaterialCoord(DispatchThreadId.xy + View.ViewRectMinAndSize.xy, DispatchThreadId.z); const uint3 FlattenTileCoord = uint3(GroupId, Coord.ClosureIndex); const uint2 DownSampleDepthCoord = DispatchThreadId.xy / ReflectionDownsampleFactorXY + ReflectionTracingViewMin; const bool bIsDownSampleDepthCoordValid = all(DownSampleDepthCoord < ReflectionTracingViewMin + ReflectionTracingViewSize); #if SUBTRATE_GBUFFER_FORMAT==1 && SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 if (bIsValid && Coord.ClosureIndex > 0) { FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(Coord.SvPosition, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel); FSubstratePixelHeader SubstratePixelHeader= UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture); bIsValid = Coord.ClosureIndex < SubstratePixelHeader.ClosureCount; } #endif if (bIsValid) { const FLumenMaterialData Material = ApplySmoothBias(ReadMaterialData(Coord, MaxRoughnessToTrace), false /*bTopLayerRoughness*/); if (NeedRayTracedReflections(Material.Roughness, Material)) { // Metal compiler issue: it requires `+=` instead of `=` to record the tile as of UE 5.0 SharedTileClassification += 1; } } GroupMemoryBarrierWithGroupSync(); if (all(GroupThreadId == 0) && bIsAnyValid) { RWResolveTileUsed[FlattenTileCoord] = SharedTileClassification > 0 ? 1u : 0; } // Clear tiles that the generate shader won't run on if (SharedTileClassification == 0 && bIsDownSampleDepthCoordValid) { RWDownsampledDepth[uint3(DownSampleDepthCoord, Coord.ClosureIndex)] = -1.0f; #if SUBTRATE_GBUFFER_FORMAT==1 && SUBSTRATE_STOCHASTIC_LIGHTING_ALLOWED && !FRONT_LAYER_TRANSLUCENCY RWDownsampledClosureIndex[DownSampleDepthCoord] = SubstratePackClosureIndex(0); #endif } } RWBuffer RWReflectionClearTileData; RWBuffer RWReflectionTileIndirectArgs; RWBuffer RWReflectionTileData; Texture2DArray ResolveTileUsed; uint2 TileViewportDimensions; uint2 ResolveTileViewportDimensions; groupshared uint SharedNumTiles; groupshared uint SharedNumClearTiles; groupshared uint SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE]; groupshared uint SharedTileUsed[THREADGROUP_SIZE * THREADGROUP_SIZE]; groupshared uint SharedGlobalTileOffset; groupshared uint SharedGlobalClearTileOffset; [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void ReflectionTileClassificationBuildListsCS( uint3 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) { const uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x; // When generating downsampled trace tiles we need to downsample ResolveTileUsed to shared memory first #if SUPPORT_DOWNSAMPLE_FACTOR SharedTileUsed[ThreadIndex] = 0; GroupMemoryBarrierWithGroupSync(); uint TileUsed = 0; uint2 TileCoordinate = GroupId.xy * THREADGROUP_SIZE + GroupThreadId; uint ClosureIndex = SUBTRATE_GBUFFER_FORMAT == 1 && SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 ? GroupId.z : 0; // Gather whether any of the resolve tiles corresponding to this tracing tile were used for (uint Y = 0; Y < ReflectionDownsampleFactorXY.y; Y++) { for (uint X = 0; X < ReflectionDownsampleFactorXY.x; X++) { uint2 ResolveTileCoordinate = TileCoordinate * ReflectionDownsampleFactorXY + uint2(X, Y); if (all(ResolveTileCoordinate < ResolveTileViewportDimensions)) { TileUsed = TileUsed || ResolveTileUsed[uint3(ResolveTileCoordinate, ClosureIndex)]; } } } SharedTileUsed[ThreadIndex] = TileUsed; GroupMemoryBarrierWithGroupSync(); #endif //@todo - parallel version if (ThreadIndex == 0) { SharedNumTiles = 0; SharedNumClearTiles = 0; for (uint x = 0; x < THREADGROUP_SIZE * THREADGROUP_SIZE; x++) { const uint2 ThreadOffset = ZOrder2D(x, log2(THREADGROUP_SIZE)); #if PERMUTATION_OVERFLOW_TILE && SUBTRATE_GBUFFER_FORMAT==1 const uint LinearIndex = GroupId.x * THREADGROUP_SIZE * THREADGROUP_SIZE + x; const bool bIsTileValid = LinearIndex < Substrate.ClosureTileCountBuffer[0]; FReflectionTileData TileData = (FReflectionTileData)0; if (bIsTileValid) { const FSubstrateClosureTile Tile = UnpackClosureTile(Substrate.ClosureTileBuffer[LinearIndex]); TileData.Coord = Tile.TileCoord; TileData.ClosureIndex = Tile.ClosureIndex; } const bool bIsValid = bIsTileValid && all(TileData.Coord < TileViewportDimensions); #else // ZOrder tiles to maximize screen locality after converting to 1d for compaction // The tile locality ultimately affects trace coherency, since trace compaction pulls from neighboring tiles FReflectionTileData TileData; TileData.Coord = GroupId.xy * THREADGROUP_SIZE + ThreadOffset; TileData.ClosureIndex = SUBTRATE_GBUFFER_FORMAT == 1 && SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 ? GroupId.z : 0; const bool bIsValid = all(TileData.Coord < TileViewportDimensions); #endif if (bIsValid) { bool bTileUsed; #if SUPPORT_DOWNSAMPLE_FACTOR bTileUsed = SharedTileUsed[ThreadOffset.y * THREADGROUP_SIZE + ThreadOffset.x]; #else const uint3 TileCoordFlatten = uint3(TileData.Coord, TileData.ClosureIndex); bTileUsed = ResolveTileUsed[TileCoordFlatten]; #endif if (bTileUsed) { uint TileOffset = SharedNumTiles; // Note: Must match encoding in WaterTileCatergorisationBuildListsCS SharedTileData[TileOffset] = PackTileData(TileData); SharedNumTiles = TileOffset + 1; } else { #if !SUPPORT_DOWNSAMPLE_FACTOR { // Pack clear tiles from the other end uint TileOffset = SharedNumClearTiles; SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE - 1 - TileOffset] = PackTileData(TileData); SharedNumClearTiles = TileOffset + 1; } #endif } } } } GroupMemoryBarrierWithGroupSync(); if (ThreadIndex == 0 && SharedNumTiles > 0) { InterlockedAdd(RWReflectionTileIndirectArgs[0], SharedNumTiles, SharedGlobalTileOffset); } #if !SUPPORT_DOWNSAMPLE_FACTOR { if (ThreadIndex == 0 && SharedNumClearTiles > 0) { InterlockedAdd(RWReflectionClearTileIndirectArgs[0], SharedNumClearTiles, SharedGlobalClearTileOffset); } } #endif GroupMemoryBarrierWithGroupSync(); if (ThreadIndex < SharedNumTiles) { RWReflectionTileData[SharedGlobalTileOffset + ThreadIndex] = SharedTileData[ThreadIndex]; } else { #if !SUPPORT_DOWNSAMPLE_FACTOR uint LocalThreadIndex = ThreadIndex - SharedNumTiles; if (LocalThreadIndex < SharedNumClearTiles) { RWReflectionClearTileData[SharedGlobalClearTileOffset + LocalThreadIndex] = SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE - 1 - LocalThreadIndex]; } #endif } } float GGXSamplingBias; float MaxTraceDistance; float RadianceCacheMinRoughness; float RadianceCacheMaxRoughness; float RadianceCacheMinTraceDistance; float RadianceCacheMaxTraceDistance; float RadianceCacheRoughnessFadeLength; RWTexture2DArray RWRayTraceDistance; RWTexture2DArray RWRayBuffer; float2 GenerateRandom(uint2 InReflectionTracingCoord) { #define BLUE_NOISE_LUT 1 #if BLUE_NOISE_LUT float2 E = BlueNoiseVec2(InReflectionTracingCoord, ReflectionsRayDirectionFrameIndex); #else uint2 RandomSeed = Rand3DPCG16(int3(InReflectionTracingCoord, ReflectionsStateFrameIndexMod8)).xy; float2 E = Hammersley16(0, 1, RandomSeed); #endif E.y *= 1 - GGXSamplingBias; return E; } float LinearStep(float Edge0, float Edge1, float X) { return saturate((X - Edge0) / (Edge1 - Edge0)); } [numthreads(REFLECTION_THREADGROUP_SIZE_1D, 1, 1)] void ReflectionGenerateRaysCS( uint GroupId : SV_GroupID, uint GroupThreadId : SV_GroupThreadID) { FReflectionTileData TileData; uint3 ReflectionTracingCoord = GetReflectionTracingScreenCoord(GroupId, GroupThreadId, TileData); bool bIsValid = all(ReflectionTracingCoord.xy < ReflectionTracingViewMin + ReflectionTracingViewSize); // SvPositionForMaterialCoord is the SvPosition for primary sample, but is PixelCoord for overflow sample float2 ScreenJitter = GetScreenTileJitter(ReflectionTracingCoord.xy); uint2 SvPositionForMaterialCoord = min(ReflectionTracingCoord.xy * ReflectionDownsampleFactorXY + uint2(ScreenJitter + .5f), View.ViewRectMinAndSize.xy + View.ViewRectMinAndSize.zw - 1); if (bIsValid) { const FLumenMaterialCoord Coord = GetLumenMaterialCoord_Reflection(SvPositionForMaterialCoord, TileData); const FLumenMaterialData Material = ApplySmoothBias(ReadMaterialData(Coord, MaxRoughnessToTrace), true /*bTopLayerRoughness*/); float DownsampledDepth = Material.SceneDepth; if (NeedRayTracedReflections(Material.TopLayerRoughness, Material)) { float2 ScreenUV = (Coord.SvPosition + .5f) * View.BufferSizeAndInvSize.zw; float3 TranslatedWorldPosition = GetTranslatedWorldPositionFromScreenUV(ScreenUV, Material.SceneDepth); float3 CameraVector = GetCameraVectorFromTranslatedWorldPosition(TranslatedWorldPosition); float3 RayDirection; float ConeAngle = 0.0f; bool bMirrorReflectionDebug = false; float3 V = -CameraVector; // Use Substrate sampling routine only for opaque surface. // When FontLayerTranslucency is enable, LumenMaterial is built from custom packed data (i.e., non-Substrate) #if SUBTRATE_GBUFFER_FORMAT==1 && !FRONT_LAYER_TRANSLUCENCY if (!Substrate.bStochasticLighting) { FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(Coord.SvPosition, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel); const FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture); const FSubstrateIntegrationSettings Settings = InitSubstrateIntegrationSettings(false /*bForceFullyRough*/, Substrate.bRoughDiffuse, Substrate.PeelLayersAboveDepth, Substrate.bRoughnessTracking); #if SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 if (Coord.ClosureIndex > 0) { const uint OffsetAddress = UnpackClosureOffsetAtIndex(Substrate.ClosureOffsetTexture[Coord.SvPosition], Coord.ClosureIndex, SubstratePixelHeader.ClosureCount); SubstrateSeekClosure(SubstrateAddressing, OffsetAddress); } #endif FSubstrateBSDF BSDF = UnpackSubstrateBSDF(Substrate.MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader); // We set slabs BSDFs as having a single specular lob without haziness. // This is to ensure the pdf is computed from a single lobe in order to be able to compute a matching cone angle. BSDF.SubstrateSetBSDFRoughness(Material.TopLayerRoughness); if (SubstrateGetBSDFType(BSDF) == SUBSTRATE_BSDF_TYPE_SLAB) { BSDF_SETHASHAZINESS(BSDF, 0); } const FSubstrateBSDFContext Context = SubstrateCreateBSDFContext(SubstratePixelHeader, BSDF, SubstrateAddressing, V); const float2 E = GenerateRandom(ReflectionTracingCoord.xy); const FBxDFSample Sample = SubstrateImportanceSampleBSDF(Context, E, SHADING_TERM_SPECULAR, Settings); RayDirection = normalize(Sample.L); ConeAngle = 1.0f / max(Sample.PDF, 0.0001f); } else #endif if (Material.TopLayerRoughness < 0.001f || bMirrorReflectionDebug) { RayDirection = reflect(CameraVector, Material.WorldNormal); } else { const float2 E = GenerateRandom(ReflectionTracingCoord.xy); float3x3 TangentBasis = GetTangentBasis(Material); float3 TangentV = mul(TangentBasis, V); float2 Alpha = Pow2(Material.TopLayerRoughness).xx; if (HasAnisotropy(Material)) { GetAnisotropicRoughness(Alpha.x, Material.Anisotropy, Alpha.x, Alpha.y); } float4 GGXSample = ImportanceSampleVisibleGGX(E, Alpha, TangentV); float3 WorldH = mul(GGXSample.xyz, TangentBasis); RayDirection = reflect(CameraVector, WorldH); ConeAngle = 1.0f / max(GGXSample.w, 0.0001f); } ConeAngle = max(ConeAngle, MinReflectionConeAngle); // Encode first person-ness in the sign bit of ConeAngle/RayBuffer.w float PackedConeAngle = Material.bIsFirstPerson ? -ConeAngle : ConeAngle; RWRayBuffer[ReflectionTracingCoord] = float4(RayDirection, PackedConeAngle); float TraceDistance = MaxTraceDistance; bool bUseRadianceCache = false; #if RADIANCE_CACHE { const float RadianceCacheRoughness = Material.TopLayerRoughness + BlueNoiseScalar(ReflectionTracingCoord.xy, ReflectionsStateFrameIndex) * RadianceCacheRoughnessFadeLength; if (RadianceCacheRoughness > RadianceCacheMinRoughness) { float3 WorldPosition = TranslatedWorldPosition - DFHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO FRadianceCacheCoverage Coverage = GetRadianceCacheCoverageWithUncertainCoverage(WorldPosition, RayDirection, InterleavedGradientNoise(ReflectionTracingCoord.xy, ReflectionsStateFrameIndexMod8)); bUseRadianceCache = Coverage.bValid; if (Coverage.bValid) { float RadianceCacheAlpha = LinearStep(RadianceCacheMinRoughness, RadianceCacheMaxRoughness, RadianceCacheRoughness); TraceDistance = lerp(RadianceCacheMaxTraceDistance, RadianceCacheMinTraceDistance, RadianceCacheAlpha); TraceDistance = clamp(TraceDistance, Coverage.MinTraceDistanceBeforeInterpolation, MaxTraceDistance); } } } #endif RWRayTraceDistance[ReflectionTracingCoord] = PackRayTraceDistance(TraceDistance, bUseRadianceCache); } else { // Store invalid ray in sign bit DownsampledDepth *= -1.0f; } RWDownsampledDepth[ReflectionTracingCoord] = DownsampledDepth; #if SUBTRATE_GBUFFER_FORMAT==1 && SUBSTRATE_STOCHASTIC_LIGHTING_ALLOWED && !FRONT_LAYER_TRANSLUCENCY RWDownsampledClosureIndex[ReflectionTracingCoord.xy] = SubstratePackClosureIndex(Material.ClosureIndex); #endif } #if SUBTRATE_GBUFFER_FORMAT==1 && SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 // Check at the tile level if there are several BSDFs else if (TileData.ClosureIndex > 0) { RWDownsampledDepth[ReflectionTracingCoord] = 0; } #endif } #ifdef ReflectionClearNeighborTileCS RWTexture2DArray RWSpecularAndSecondMoment; RWTexture2DArray RWSpecularIndirect; uint KernelRadiusInTiles; groupshared uint SharedbIsTileUsed; #include "LumenReflectionDenoiserCommon.ush" [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void ReflectionClearNeighborTileCS(uint3 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID, uint GroupThread1D : SV_GroupIndex) { const uint2 TileCoord = GroupId.xy; const uint2 PixelCoord = GroupId.xy * THREADGROUP_SIZE + GroupThreadId; const uint LayerIndex = GroupId.z + 1; // Layer0 is correctly cleared by previous passes. This pass only process layers [1..N] // If the tile is used, no need to clear it. const bool bCurrentTileUsed = ResolveTileUsed[uint3(TileCoord, LayerIndex)] > 0; if (bCurrentTileUsed) { return; } if (all(GroupThreadId == 0)) { SharedbIsTileUsed = 0; } GroupMemoryBarrierWithGroupSync(); // If the current tile is used by any tile in a neighborhood of 'KernelRadiusInTiles' radius, mark the tile to be cleared const uint NeighborTileCount1D = KernelRadiusInTiles*2 + 1; if (all(GroupThreadId < NeighborTileCount1D)) { const int2 Offset = int2(GroupThreadId) - KernelRadiusInTiles; const uint2 Coord = int2(TileCoord) + Offset; if (all(Coord < ResolveTileViewportDimensions)) { const uint bUsed = ResolveTileUsed[uint3(Coord, LayerIndex)] > 0 ? 1u : 0u; InterlockedMax(SharedbIsTileUsed, bUsed); } } GroupMemoryBarrierWithGroupSync(); // Clear all the tile's output pixels if (SharedbIsTileUsed) { RWSpecularAndSecondMoment[uint3(PixelCoord, LayerIndex)] = float4(INVALID_LIGHTING, 0); RWSpecularIndirect[uint3(PixelCoord, LayerIndex)] = INVALID_LIGHTING; } } #endif // ReflectionClearNeighborTileCS