// Copyright Epic Games, Inc. All Rights Reserved. #include "../Common.ush" #include "../ShaderPrint.ush" #include "LumenRadianceCacheCommon.ush" #include "LumenRadianceCacheMarkCommon.ush" #include "LumenRadianceCacheUpdate.ush" #include "LumenRadianceCacheTracingCommon.ush" float FirstClipmapWorldExtentRcp; uint FrameNumber; RWStructuredBuffer RWPriorityHistogram; StructuredBuffer PriorityHistogram; RWStructuredBuffer RWMaxUpdateBucket; StructuredBuffer MaxUpdateBucket; RWStructuredBuffer RWMaxTracesFromMaxUpdateBucket; StructuredBuffer MaxTracesFromMaxUpdateBucket; RWStructuredBuffer RWProbesToUpdateTraceCost; StructuredBuffer ProbesToUpdateTraceCost; RWStructuredBuffer RWMinNewProbeTraceCost; RWStructuredBuffer RWProbeLastTracedFrame; RWBuffer RWProbeLastUsedFrame; Buffer ProbeLastUsedFrame; uint MaxNumProbes; RWStructuredBuffer RWProbeWorldOffset; RWBuffer RWProbeFreeListAllocator; RWBuffer RWProbeFreeList; RWBuffer RWProbeAllocator; Buffer ProbeFreeList; RWBuffer RWProbeTraceAllocator; Buffer ProbeTraceAllocator; RWBuffer RWProbeTraceData; #ifdef ClearRadianceCacheUpdateResourcesCS /** * Batch clear all resources required for the subsequent radiance cache probe update pass */ [numthreads(THREADGROUP_SIZE, 1, 1)] void ClearRadianceCacheUpdateResourcesCS( uint3 DispatchThreadId : SV_DispatchThreadID) { uint ElementIndex = DispatchThreadId.x; if (ElementIndex < 1) { RWProbeTraceAllocator[ElementIndex] = 0; RWMaxUpdateBucket[ElementIndex] = 0; RWMaxTracesFromMaxUpdateBucket[ElementIndex] = 0; RWMinNewProbeTraceCost[ElementIndex] = 0; } if (ElementIndex < PROBES_TO_UPDATE_TRACE_COST_STRIDE) { RWProbesToUpdateTraceCost[ElementIndex] = 0; } if (ElementIndex < PRIORITY_HISTOGRAM_SIZE) { RWPriorityHistogram[ElementIndex] = 0; } } #endif uint GetPriorityBucketIndex(uint LastTracedFrameIndex, uint LastUsedFrameIndex, uint ClipmapIndex) { uint BucketIndex = 0; if (LastTracedFrameIndex == PROBE_FRAME_INDEX_NEVER_TRACED) { // Special case for probes which were created this frame. Places those in the most important bucket 0. BucketIndex = 0; } else { // [1;N] uint FramesBetweenTracedAndUsed = LastUsedFrameIndex > LastTracedFrameIndex ? LastUsedFrameIndex - LastTracedFrameIndex : 1; float UpdateImportance = FramesBetweenTracedAndUsed / (ClipmapIndex + 1.0f); // Remap from [1;N] to log2 histogram buckets, but don't write anything to special bucket 0 used for new probes without any data BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp(log2(UpdateImportance), 0, PRIORITY_HISTOGRAM_SIZE - 2); } return BucketIndex; } #ifdef AllocateUsedProbesCS /** * Allocate used probes and build their update priority histogram */ [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void AllocateUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z); if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution)) { uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[DispatchThreadId]; if (ProbeUsedMarker != INVALID_PROBE_INDEX) { uint ProbeIndex = INVALID_PROBE_INDEX; uint LastTracedFrameIndex = 0; uint LastUsedFrameIndex = 0; if (ProbeUsedMarker == USED_PROBE_INDEX) { // Allocate new probe #if PERSISTENT_CACHE int NumFree; InterlockedAdd(RWProbeFreeListAllocator[0], -1, NumFree); if (NumFree > 0) { ProbeIndex = ProbeFreeList[NumFree - 1]; } else { InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex); } #else InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex); #endif if (ProbeIndex < MaxNumProbes) { RWProbeLastTracedFrame[ProbeIndex] = PROBE_FRAME_INDEX_NEVER_TRACED; RWProbeLastUsedFrame[ProbeIndex] = FrameNumber; LastTracedFrameIndex = PROBE_FRAME_INDEX_NEVER_TRACED; LastUsedFrameIndex = FrameNumber; } } else { // Re-trace existing probe ProbeIndex = ProbeUsedMarker; LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex]; LastUsedFrameIndex = RWProbeLastUsedFrame[ProbeIndex]; } if (ProbeIndex < MaxNumProbes) { // Update histogram const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ClipmapIndex); const float3 ProbeWorldPosition = GetProbeTranslatedWorldPositionNoOffset(ProbeCoord, ClipmapIndex) - DFHackToFloat(PrimaryView.PreViewTranslation); uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition); InterlockedAdd(RWPriorityHistogram[PriorityBucketIndex], ProbeTraceCost); if (PriorityBucketIndex == 0) { // Cost of updating all probes in the first bucket if they would be all force downsampled InterlockedAdd(RWMinNewProbeTraceCost[0], PROBE_TRACE_COST_DOWNSAMPLED); } RWRadianceProbeIndirectionTexture[DispatchThreadId] = ProbeIndex; } else { RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX; } } } } #endif #ifdef SelectMaxPriorityBucketCS /** * Compute max bucket histogram to update and how many tiles should be updated in that last bucket */ [numthreads(THREADGROUP_SIZE, 1, 1)] void SelectMaxPriorityBucketCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID) { if (all(DispatchThreadId == 0)) { uint NumTracesBudget = GetProbeTraceCostBudget(); uint TraceSum = 0; uint PriorityBucketIndex = 0; uint NumTracesFromPriorityBucket = NumTracesBudget; for (; PriorityBucketIndex < PRIORITY_HISTOGRAM_SIZE; ++PriorityBucketIndex) { uint TilesPerBucket = PriorityHistogram[PriorityBucketIndex]; if (TraceSum + TilesPerBucket >= NumTracesBudget) { NumTracesFromPriorityBucket = NumTracesBudget - TraceSum; break; } TraceSum += TilesPerBucket; } RWMaxUpdateBucket[0] = PriorityBucketIndex; RWMaxTracesFromMaxUpdateBucket[0] = NumTracesFromPriorityBucket; } } #endif #ifdef AllocateProbeTracesCS /** * Iterate again over all probes and update them based on the histogram priority max update bucket */ [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void AllocateProbeTracesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z); if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution)) { const uint ProbeIndex = RadianceProbeIndirectionTexture[DispatchThreadId]; float3 ProbeWorldPosition = GetProbeTranslatedWorldPositionNoOffset(ProbeCoord, ClipmapIndex) - DFHackToFloat(PrimaryView.PreViewTranslation); if (ProbeIndex < MaxNumProbes) { const uint MaxUpdateBucketIndex = MaxUpdateBucket[0]; const uint LocalMaxTracesFromMaxUpdateBucket = MaxTracesFromMaxUpdateBucket[0]; const uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition); const uint LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex]; const uint LastUsedFrameIndex = ProbeLastUsedFrame[ProbeIndex]; // Update everything up to the max selected priority bucket const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ClipmapIndex); bool bTraceProbe = PriorityBucketIndex <= MaxUpdateBucketIndex; bool bForceDownsampleProbe = false; // Can't trace more than MaxTracesFromMaxUpdateBucket from the last bucket if (bTraceProbe) { if (PriorityBucketIndex == MaxUpdateBucketIndex && MaxUpdateBucketIndex > 0) { // Accumulate allocated probe trace cost for the last bucket uint ProbeTraceAllocatedFromMaxUpdateBucket = 0; InterlockedAdd(RWProbesToUpdateTraceCost[1], ProbeTraceCost, ProbeTraceAllocatedFromMaxUpdateBucket); if (ProbeTraceAllocatedFromMaxUpdateBucket + ProbeTraceCost > LocalMaxTracesFromMaxUpdateBucket) { bTraceProbe = false; } } else if (PriorityBucketIndex == 0) { // Special case for the first bucket. // It contains probes without valid data so all those probes need to be updated. uint PrevMinNewProbeTraceCost = 0; InterlockedAdd(RWMinNewProbeTraceCost[0], ProbeTraceCost - PROBE_TRACE_COST_DOWNSAMPLED, PrevMinNewProbeTraceCost); // Downsample anything exceeding allowed budget in order to keep performance reasonable if (PrevMinNewProbeTraceCost + ProbeTraceCost - PROBE_TRACE_COST_DOWNSAMPLED > GetProbeTraceCostBudget()) { bForceDownsampleProbe = true; } } } if (bTraceProbe) { InterlockedAdd(RWProbesToUpdateTraceCost[0], ProbeTraceCost); uint TraceIndex; InterlockedAdd(RWProbeTraceAllocator[0], 1, TraceIndex); RWProbeTraceData[TraceIndex] = PackProbeTraceData(ProbeWorldPosition, ClipmapIndex, ProbeIndex, bForceDownsampleProbe); RWProbeLastTracedFrame[ProbeIndex] = FrameNumber; } } } } #endif #ifdef ClearProbeFreeListCS [numthreads(THREADGROUP_SIZE, 1, 1)] void ClearProbeFreeListCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ThreadIndex = DispatchThreadId.x; if (ThreadIndex == 0) { RWProbeFreeListAllocator[0] = 0; } if (ThreadIndex < MaxNumProbes) { RWProbeFreeList[ThreadIndex] = 0; RWProbeLastUsedFrame[ThreadIndex] = 0; RWProbeWorldOffset[ThreadIndex] = 0; } } #endif Buffer ProbeAllocator; Texture3D LastFrameRadianceProbeIndirectionTexture; float4 LastFrameClipmapCornerTWSAndCellSize[RADIANCE_PROBE_MAX_CLIPMAPS]; uint NumFramesToKeepCachedProbes; float3 GetLastFrameProbeTranslatedWorldPosition(uint3 LastFrameProbeCoord, uint ClipmapIndex) { float3 CornerTranslatedWorldSpace = LastFrameClipmapCornerTWSAndCellSize[ClipmapIndex].xyz; float CellSize = LastFrameClipmapCornerTWSAndCellSize[ClipmapIndex].w; const float3 CornerToProbe = (LastFrameProbeCoord + 0.5) * CellSize; return CornerTranslatedWorldSpace + CornerToProbe; } #ifdef UpdateCacheForUsedProbesCS [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void UpdateCacheForUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 LastFrameProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.yz); if (ClipmapIndex < NumRadianceProbeClipmaps && all(LastFrameProbeCoord < RadianceProbeClipmapResolution)) { uint3 LastFrameProbeIndirectionTextureCoord = uint3(LastFrameProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, LastFrameProbeCoord.yz); uint LastFrameProbeIndex = LastFrameRadianceProbeIndirectionTexture.Load(uint4(LastFrameProbeIndirectionTextureCoord, 0)); if (LastFrameProbeIndex != INVALID_PROBE_INDEX) { float3 ProbeTranslatedWorldPosition = GetLastFrameProbeTranslatedWorldPosition(LastFrameProbeCoord, ClipmapIndex); int3 ProbeCoord = GetRadianceProbeCoord(ProbeTranslatedWorldPosition - DFHackToFloat(PrimaryView.PrevPreViewTranslation), ClipmapIndex); bool bReused = false; if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolution)) { uint3 ProbeIndirectionTextureCoord = uint3(ProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, ProbeCoord.yz); uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord]; uint LastUsedFrameNumber = RWProbeLastUsedFrame[LastFrameProbeIndex]; uint NumFramesToKeepCachedProbesHeuristic = NumFramesToKeepCachedProbes; // Make sure that it's possible to allocate at least NumProbesToTraceBudget probes this frame const uint NumFreeProbes = MaxNumProbes - ProbeAllocator[0] + RWProbeFreeListAllocator[0]; if (NumFreeProbes < NumProbesToTraceBudget) { NumFramesToKeepCachedProbesHeuristic = min(2, NumFramesToKeepCachedProbes); } if (ProbeUsedMarker == USED_PROBE_INDEX || FrameNumber - LastUsedFrameNumber < NumFramesToKeepCachedProbes) { bReused = true; if (ProbeUsedMarker == USED_PROBE_INDEX) { RWProbeLastUsedFrame[LastFrameProbeIndex] = FrameNumber; } RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord] = LastFrameProbeIndex; } } if (!bReused) { int FreeIndex; InterlockedAdd(RWProbeFreeListAllocator[0], 1, FreeIndex); RWProbeFreeList[FreeIndex] = LastFrameProbeIndex; } } } } #endif