// Copyright Epic Games, Inc. All Rights Reserved. #include "../Common.ush" #define DISTANCE_FIELD_IN_VIEW_UB 1 #include "../BlueNoise.ush" #include "../DistanceField/GlobalDistanceFieldShared.ush" #include "../DistanceField/GlobalDistanceFieldUtils.ush" #define HZB_TRACE_INCLUDE_FULL_RES_DEPTH 1 #define HZB_TRACE_USE_BILINEAR_SAMPLED_DEPTH 1 #include "../Lumen/LumenScreenTracing.ush" #include "../SceneTexturesCommon.ush" #include "MegaLights.ush" #include "MegaLightsRayTracing.ush" #if HAIR_VOXEL_TRACES #include "../HairStrands/HairStrandsRaytracing.ush" #endif Buffer CompactedTraceTexelAllocator; #ifdef InitCompactedTraceTexelIndirectArgsCS RWBuffer RWIndirectArgs; [numthreads(THREADGROUP_SIZE, 1, 1)] void InitCompactedTraceTexelIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { if (all(DispatchThreadId == 0)) { // NumTracesDiv64 uint NumGroups = (CompactedTraceTexelAllocator[0] + 63) / 64; WriteDispatchIndirectArgs(RWIndirectArgs, 0, NumGroups, 1, 1); // NumTracesDiv32 NumGroups = (CompactedTraceTexelAllocator[0] + 31) / 32; WriteDispatchIndirectArgs(RWIndirectArgs, 1, NumGroups, 1, 1); // NumTraces NumGroups = CompactedTraceTexelAllocator[0]; WriteDispatchIndirectArgs(RWIndirectArgs, 2, NumGroups, 1, 1); } } #endif uint2 SampleViewMin; uint2 SampleViewSize; uint DebugMode; float MaxHierarchicalScreenTraceIterations; float MaxTraceDistance; float RelativeDepthThickness; float HistoryDepthTestRelativeThickness; uint MinimumTracingThreadOccupancy; Texture2D LightSamples; Texture2D LightSampleRays; Texture2D RWTraceTile; RWBuffer RWCompactedTraceTexelData; RWBuffer RWCompactedTraceTexelAllocator; #ifdef CompactLightSampleTracesCS uint CompactForScreenSpaceTraces; groupshared uint SharedGlobalTraceTexelStartOffset; #if WAVE_OPS groupshared uint SharedGroupSum; #else //@todo - ordered compaction for non-wave ops path groupshared uint SharedTraceTexelAllocator; groupshared uint SharedTraceTexels[THREADGROUP_SIZE * THREADGROUP_SIZE]; #endif #if COMPILER_SUPPORTS_WAVE_SIZE && WAVE_OPS WAVESIZE(32) #endif [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void CompactLightSampleTracesCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { const uint LinearThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x; if (LinearThreadIndex == 0) #if WAVE_OPS { SharedGroupSum = 0; } #else { SharedTraceTexelAllocator = 0; } GroupMemoryBarrierWithGroupSync(); #endif uint2 SampleCoord = DispatchThreadId.xy + SampleViewMin; bool bTraceValid = false; uint TraceTexelForThisThread = 0; if (all(SampleCoord < SampleViewMin + SampleViewSize)) { // #ml_todo: try to preserve z-order here for improved tracing coherency FLightSampleRay LightSampleRay = UnpackLightSampleRay(LightSampleRays[SampleCoord]); if (!LightSampleRay.bCompleted && (CompactForScreenSpaceTraces == 0 || LightSampleRay.bSupportScreenTrace)) { #if WAVE_OPS { bTraceValid = true; TraceTexelForThisThread = PackTraceTexel(SampleCoord); } #else { uint SharedTexelOffset; InterlockedAdd(SharedTraceTexelAllocator, 1, SharedTexelOffset); SharedTraceTexels[SharedTexelOffset] = PackTraceTexel(SampleCoord); } #endif } } GroupMemoryBarrierWithGroupSync(); #if WAVE_OPS { const uint LastLaneIndex = WaveGetLaneCount() - 1; const uint LaneIndex = WaveGetLaneIndex(); const uint OffsetInWave = WavePrefixCountBits(bTraceValid); uint OffsetInGroup = 0; if (LaneIndex == LastLaneIndex) { const uint ThisWaveSum = OffsetInWave + (bTraceValid ? 1 : 0); InterlockedAdd(SharedGroupSum, ThisWaveSum, OffsetInGroup); } OffsetInGroup = WaveReadLaneAt(OffsetInGroup, LastLaneIndex) + OffsetInWave; GroupMemoryBarrierWithGroupSync(); // Allocate this group's compacted traces from the global allocator if (LinearThreadIndex == 0) { InterlockedAdd(RWCompactedTraceTexelAllocator[0], SharedGroupSum, SharedGlobalTraceTexelStartOffset); } GroupMemoryBarrierWithGroupSync(); if (bTraceValid) { RWCompactedTraceTexelData[SharedGlobalTraceTexelStartOffset + OffsetInGroup] = TraceTexelForThisThread; } } #else { if (LinearThreadIndex == 0) { InterlockedAdd(RWCompactedTraceTexelAllocator[0], SharedTraceTexelAllocator, SharedGlobalTraceTexelStartOffset); } GroupMemoryBarrierWithGroupSync(); if (LinearThreadIndex < SharedTraceTexelAllocator) { RWCompactedTraceTexelData[SharedGlobalTraceTexelStartOffset + LinearThreadIndex] = SharedTraceTexels[LinearThreadIndex]; } } #endif } #endif RWTexture2D RWLightSamples; RWTexture2D RWLightSampleRays; Buffer CompactedTraceTexelData; Texture2D DownsampledSceneDepth; Texture2D DownsampledSceneWorldNormal; #ifdef ScreenSpaceRayTraceLightSamplesCS [numthreads(THREADGROUP_SIZE, 1, 1)] void ScreenSpaceRayTraceLightSamplesCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { uint TraceTexelIndex = DispatchThreadId.x; if (TraceTexelIndex < CompactedTraceTexelAllocator[0]) { uint2 SampleCoord = UnpackTraceTexel(CompactedTraceTexelData[TraceTexelIndex]); uint2 ScreenCoord = SampleCoordToScreenCoord(SampleCoord); uint2 DownsampledScreenCoord = SampleCoordToDownsampledScreenCoord(SampleCoord); FShaderPrintContext DebugContext = InitDebugContext(DownsampledScreenCoord, /*bDownsampled*/ true, float2(0.05, 0.9)); float2 ScreenUV = (ScreenCoord + 0.5f) * View.BufferSizeAndInvSize.zw; float SceneDepth = DownsampledSceneDepth[DownsampledScreenCoord]; float3 SceneWorldNormal = normalize(DecodeNormal(DownsampledSceneWorldNormal[DownsampledScreenCoord])); float3 TranslatedWorldPosition = GetTranslatedWorldPositionFromScreenUV(ScreenUV, SceneDepth); FLightSample LightSample = UnpackLightSample(RWLightSamples[SampleCoord]); FLightSampleRay LightSampleRay = UnpackLightSampleRay(RWLightSampleRays[SampleCoord]); FLightSampleTrace LightSampleTrace = GetLightSampleTrace(TranslatedWorldPosition, LightSample.LocalLightIndex, LightSampleRay.UV); // Don't trace screen space ray if we want to find how much light enters from the other side of the surface const bool bValidRay = !(LightSampleRay.bBackfaceDiffuse && dot(LightSampleTrace.Direction, SceneWorldNormal) < 0.0f); if (bValidRay) { bool bHit; bool bUncertain; float3 HitUVz; float3 LastVisibleHitUVz; float HitTileZ; TraceScreen( TranslatedWorldPosition, LightSampleTrace.Direction, min(LightSampleTrace.Distance, MaxTraceDistance), HZBUvFactorAndInvFactor, MaxHierarchicalScreenTraceIterations, RelativeDepthThickness, 0, MinimumTracingThreadOccupancy, bHit, bUncertain, HitUVz, LastVisibleHitUVz, HitTileZ); bHit = bHit && !bUncertain; #if MEGA_LIGHTS_LIGHTING_CHANNELS const FForwardLightData ForwardLightData = GetForwardLightData(LightSample.LocalLightIndex, 0); uint LightingChannelMask = UnpackLightingChannelMask(ForwardLightData); float2 HitScreenCoord = HitUVz.xy * View.BufferSizeAndInvSize.xy; const uint PrimLightingChannelMask = GetSceneLightingChannel(HitScreenCoord); if((PrimLightingChannelMask & LightingChannelMask) == 0) { bHit = false; } #endif const float RayT = sqrt(ComputeRayHitSqrDistance(TranslatedWorldPosition, HitUVz)); const bool bCompleted = bHit || (RayT >= LightSampleTrace.Distance * 0.999f); if (bCompleted) { // Mark this ray as completed LightSampleRay.bCompleted = true; LightSample.bVisible = !bHit; RWLightSampleRays[SampleCoord] = PackLightSampleRay(LightSampleRay); RWLightSamples[SampleCoord] = PackLightSample(LightSample); } else { // Write new ray distance for ray continuation in next pass LightSampleRay.RayDistance = RayT; RWLightSampleRays[SampleCoord] = PackLightSampleRay(LightSampleRay); } if (DebugContext.bIsActive && DebugMode == DEBUG_MODE_VISUALIZE_TRACING) { const FForwardLightData ForwardLightData = GetForwardLightData(LightSample.LocalLightIndex, 0); const FDeferredLightData LightData = ConvertToDeferredLight(ForwardLightData); float3 RayStart = TranslatedWorldPosition; float3 RayEnd = TranslatedWorldPosition + LightSampleTrace.Direction * min(LightSampleTrace.Distance, RayT); float4 RayColor = float4(LightData.Color.xyz / Luminance(LightData.Color.xyz), 1.0f); if (bHit) { RayColor.xyz = 0.0f; } AddLineTWS(DebugContext, RayStart, RayEnd, RayColor, RayColor); AddCrossTWS(DebugContext, RayEnd, 2.0f, bCompleted ? RayColor : ColorRed); } } } } #endif #ifdef SoftwareRayTraceLightSamplesCS [numthreads(THREADGROUP_SIZE, 1, 1)] void SoftwareRayTraceLightSamplesCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { uint TraceTexelIndex = DispatchThreadId.x; if (TraceTexelIndex < CompactedTraceTexelAllocator[0]) { uint2 SampleCoord = UnpackTraceTexel(CompactedTraceTexelData[TraceTexelIndex]); uint2 ScreenCoord = SampleCoordToScreenCoord(SampleCoord); uint2 DownsampledScreenCoord = SampleCoordToDownsampledScreenCoord(SampleCoord); FShaderPrintContext DebugContext = InitDebugContext(DownsampledScreenCoord, /*bDownsampled*/ true, float2(0.55, 0.45)); float2 ScreenUV = (ScreenCoord + 0.5f) * View.BufferSizeAndInvSize.zw; float SceneDepth = DownsampledSceneDepth[DownsampledScreenCoord]; float3 TranslatedWorldPosition = GetTranslatedWorldPositionFromScreenUV(ScreenUV, SceneDepth); FLightSample LightSample = UnpackLightSample(RWLightSamples[SampleCoord]); FLightSampleRay LightSampleRay = UnpackLightSampleRay(LightSampleRays[SampleCoord]); FLightSampleTrace LightSampleTrace = GetLightSampleTrace(TranslatedWorldPosition, LightSample.LocalLightIndex, LightSampleRay.UV); FGlobalSDFTraceInput TraceInput = SetupGlobalSDFTraceInput(TranslatedWorldPosition, LightSampleTrace.Direction, LightSampleRay.RayDistance, LightSampleTrace.Distance, 1.0f, 1.0f); // #ml_todo: setup bias TraceInput.VoxelSizeRelativeBias = 0.5f; TraceInput.VoxelSizeRelativeRayEndBias = 0.5f; TraceInput.DitherScreenCoord = SampleCoord; FGlobalSDFTraceResult SDFResult = RayTraceGlobalDistanceField(TraceInput); #if HAIR_VOXEL_TRACES { // #ml_todo: replace with spatiotemporal blue noise RandomSequence RandSequence; RandomSequence_Initialize(RandSequence, SampleCoord, 0, MegaLightsStateFrameIndex, 1); SDFResult.HitTime = TraverseHair(ScreenCoord, RandSequence, TraceInput.TranslatedWorldRayStart, TraceInput.WorldRayDirection, SDFResult.HitTime, VirtualVoxel.Raytracing_ShadowOcclusionThreshold); } #endif const bool bHit = GlobalSDFTraceResultIsHit(SDFResult); if (bHit) { LightSample.bVisible = false; RWLightSamples[SampleCoord] = PackLightSample(LightSample); } if (DebugContext.bIsActive && DebugMode == DEBUG_MODE_VISUALIZE_TRACING) { const FForwardLightData ForwardLightData = GetForwardLightData(LightSample.LocalLightIndex, 0); const FDeferredLightData LightData = ConvertToDeferredLight(ForwardLightData); float3 RayStart = TraceInput.TranslatedWorldRayStart + TraceInput.WorldRayDirection * TraceInput.MinTraceDistance; float3 RayEnd = TraceInput.TranslatedWorldRayStart + TraceInput.WorldRayDirection * (bHit ? SDFResult.HitTime : TraceInput.MaxTraceDistance); float4 RayColor = float4(LightData.Color.xyz / Luminance(LightData.Color.xyz), 1.0f); if (bHit) { RayColor.xyz = 0.0f; } AddLineTWS(DebugContext, RayStart, RayEnd, RayColor); AddCrossTWS(DebugContext, RayEnd, 2.0f, RayColor); } } } #endif #ifdef PrintTraceStatsCS Buffer VSMIndirectArgs; Buffer ScreenIndirectArgs; Buffer WorldIndirectArgs; Buffer WorldMaterialRetraceIndirectArgs; Buffer VolumeIndirectArgs; Buffer TranslucencyVolume0IndirectArgs; Buffer TranslucencyVolume1IndirectArgs; [numthreads(THREADGROUP_SIZE, 1, 1)] void PrintTraceStatsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { if (all(DispatchThreadId == 0)) { FShaderPrintContext Context = InitShaderPrintContext(true, float2(0.8, 0.3)); Print(Context, TEXT("Trace Stats: "), FontTitle); Newline(Context); Print(Context, TEXT("VSM: ")); Print(Context, VSMIndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("Screen: ")); Print(Context, ScreenIndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("World: ")); Print(Context, WorldIndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("World (material retrace): ")); Print(Context, WorldMaterialRetraceIndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("Volume: ")); Print(Context, VolumeIndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("Translucency Volume Cascade 0: ")); Print(Context, TranslucencyVolume0IndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); Print(Context, TEXT("Translucency Volume Cascade 1: ")); Print(Context, TranslucencyVolume1IndirectArgs[DISPATCH_INDIRECT_UINT_COUNT * 2 + 0]); Newline(Context); } } #endif // PrintTraceStatsCS