396 lines
13 KiB
HLSL
396 lines
13 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "../Common.ush"
|
|
#include "../ShaderPrint.ush"
|
|
#include "LumenRadianceCacheCommon.ush"
|
|
#include "LumenRadianceCacheMarkCommon.ush"
|
|
#include "LumenRadianceCacheUpdate.ush"
|
|
#include "LumenRadianceCacheTracingCommon.ush"
|
|
|
|
float FirstClipmapWorldExtentRcp;
|
|
uint FrameNumber;
|
|
RWStructuredBuffer<uint> RWPriorityHistogram;
|
|
StructuredBuffer<uint> PriorityHistogram;
|
|
RWStructuredBuffer<uint> RWMaxUpdateBucket;
|
|
StructuredBuffer<uint> MaxUpdateBucket;
|
|
RWStructuredBuffer<uint> RWMaxTracesFromMaxUpdateBucket;
|
|
StructuredBuffer<uint> MaxTracesFromMaxUpdateBucket;
|
|
RWStructuredBuffer<uint> RWProbesToUpdateTraceCost;
|
|
StructuredBuffer<uint> ProbesToUpdateTraceCost;
|
|
RWStructuredBuffer<uint> RWMinNewProbeTraceCost;
|
|
|
|
RWStructuredBuffer<uint> RWProbeLastTracedFrame;
|
|
RWBuffer<uint> RWProbeLastUsedFrame;
|
|
Buffer<uint> ProbeLastUsedFrame;
|
|
|
|
uint MaxNumProbes;
|
|
RWStructuredBuffer<float4> RWProbeWorldOffset;
|
|
RWBuffer<int> RWProbeFreeListAllocator;
|
|
RWBuffer<uint> RWProbeFreeList;
|
|
RWBuffer<uint> RWProbeAllocator;
|
|
Buffer<uint> ProbeFreeList;
|
|
|
|
RWBuffer<uint> RWProbeTraceAllocator;
|
|
Buffer<uint> ProbeTraceAllocator;
|
|
RWBuffer<float4> RWProbeTraceData;
|
|
|
|
#ifdef ClearRadianceCacheUpdateResourcesCS
|
|
|
|
/**
|
|
* Batch clear all resources required for the subsequent radiance cache probe update pass
|
|
*/
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void ClearRadianceCacheUpdateResourcesCS(
|
|
uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
uint ElementIndex = DispatchThreadId.x;
|
|
|
|
if (ElementIndex < 1)
|
|
{
|
|
RWProbeTraceAllocator[ElementIndex] = 0;
|
|
RWMaxUpdateBucket[ElementIndex] = 0;
|
|
RWMaxTracesFromMaxUpdateBucket[ElementIndex] = 0;
|
|
RWMinNewProbeTraceCost[ElementIndex] = 0;
|
|
}
|
|
|
|
if (ElementIndex < PROBES_TO_UPDATE_TRACE_COST_STRIDE)
|
|
{
|
|
RWProbesToUpdateTraceCost[ElementIndex] = 0;
|
|
}
|
|
|
|
if (ElementIndex < PRIORITY_HISTOGRAM_SIZE)
|
|
{
|
|
RWPriorityHistogram[ElementIndex] = 0;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
uint GetPriorityBucketIndex(uint LastTracedFrameIndex, uint LastUsedFrameIndex, uint ClipmapIndex)
|
|
{
|
|
uint BucketIndex = 0;
|
|
|
|
if (LastTracedFrameIndex == PROBE_FRAME_INDEX_NEVER_TRACED)
|
|
{
|
|
// Special case for probes which were created this frame. Places those in the most important bucket 0.
|
|
BucketIndex = 0;
|
|
}
|
|
else
|
|
{
|
|
// [1;N]
|
|
uint FramesBetweenTracedAndUsed = LastUsedFrameIndex > LastTracedFrameIndex ? LastUsedFrameIndex - LastTracedFrameIndex : 1;
|
|
float UpdateImportance = FramesBetweenTracedAndUsed / (ClipmapIndex + 1.0f);
|
|
|
|
// Remap from [1;N] to log2 histogram buckets, but don't write anything to special bucket 0 used for new probes without any data
|
|
BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp(log2(UpdateImportance), 0, PRIORITY_HISTOGRAM_SIZE - 2);
|
|
}
|
|
|
|
return BucketIndex;
|
|
}
|
|
|
|
#ifdef AllocateUsedProbesCS
|
|
|
|
/**
|
|
* Allocate used probes and build their update priority histogram
|
|
*/
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void AllocateUsedProbesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution;
|
|
uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);
|
|
|
|
if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
|
|
{
|
|
uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[DispatchThreadId];
|
|
|
|
if (ProbeUsedMarker != INVALID_PROBE_INDEX)
|
|
{
|
|
uint ProbeIndex = INVALID_PROBE_INDEX;
|
|
uint LastTracedFrameIndex = 0;
|
|
uint LastUsedFrameIndex = 0;
|
|
|
|
if (ProbeUsedMarker == USED_PROBE_INDEX)
|
|
{
|
|
// Allocate new probe
|
|
#if PERSISTENT_CACHE
|
|
int NumFree;
|
|
InterlockedAdd(RWProbeFreeListAllocator[0], -1, NumFree);
|
|
|
|
if (NumFree > 0)
|
|
{
|
|
ProbeIndex = ProbeFreeList[NumFree - 1];
|
|
}
|
|
else
|
|
{
|
|
InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
|
|
}
|
|
#else
|
|
InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
|
|
#endif
|
|
|
|
if (ProbeIndex < MaxNumProbes)
|
|
{
|
|
RWProbeLastTracedFrame[ProbeIndex] = PROBE_FRAME_INDEX_NEVER_TRACED;
|
|
RWProbeLastUsedFrame[ProbeIndex] = FrameNumber;
|
|
|
|
LastTracedFrameIndex = PROBE_FRAME_INDEX_NEVER_TRACED;
|
|
LastUsedFrameIndex = FrameNumber;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Re-trace existing probe
|
|
ProbeIndex = ProbeUsedMarker;
|
|
LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
|
|
LastUsedFrameIndex = RWProbeLastUsedFrame[ProbeIndex];
|
|
}
|
|
|
|
if (ProbeIndex < MaxNumProbes)
|
|
{
|
|
// Update histogram
|
|
const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ClipmapIndex);
|
|
const float3 ProbeWorldPosition = GetProbeTranslatedWorldPositionNoOffset(ProbeCoord, ClipmapIndex) - DFHackToFloat(PrimaryView.PreViewTranslation);
|
|
uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
|
|
InterlockedAdd(RWPriorityHistogram[PriorityBucketIndex], ProbeTraceCost);
|
|
|
|
if (PriorityBucketIndex == 0)
|
|
{
|
|
// Cost of updating all probes in the first bucket if they would be all force downsampled
|
|
InterlockedAdd(RWMinNewProbeTraceCost[0], PROBE_TRACE_COST_DOWNSAMPLED);
|
|
}
|
|
|
|
RWRadianceProbeIndirectionTexture[DispatchThreadId] = ProbeIndex;
|
|
}
|
|
else
|
|
{
|
|
RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SelectMaxPriorityBucketCS
|
|
|
|
/**
|
|
* Compute max bucket histogram to update and how many tiles should be updated in that last bucket
|
|
*/
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void SelectMaxPriorityBucketCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
if (all(DispatchThreadId == 0))
|
|
{
|
|
uint NumTracesBudget = GetProbeTraceCostBudget();
|
|
uint TraceSum = 0;
|
|
uint PriorityBucketIndex = 0;
|
|
uint NumTracesFromPriorityBucket = NumTracesBudget;
|
|
|
|
for (; PriorityBucketIndex < PRIORITY_HISTOGRAM_SIZE; ++PriorityBucketIndex)
|
|
{
|
|
uint TilesPerBucket = PriorityHistogram[PriorityBucketIndex];
|
|
|
|
if (TraceSum + TilesPerBucket >= NumTracesBudget)
|
|
{
|
|
NumTracesFromPriorityBucket = NumTracesBudget - TraceSum;
|
|
break;
|
|
}
|
|
|
|
TraceSum += TilesPerBucket;
|
|
}
|
|
|
|
RWMaxUpdateBucket[0] = PriorityBucketIndex;
|
|
RWMaxTracesFromMaxUpdateBucket[0] = NumTracesFromPriorityBucket;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef AllocateProbeTracesCS
|
|
|
|
/**
|
|
* Iterate again over all probes and update them based on the histogram priority max update bucket
|
|
*/
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void AllocateProbeTracesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution;
|
|
uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);
|
|
|
|
if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
|
|
{
|
|
const uint ProbeIndex = RadianceProbeIndirectionTexture[DispatchThreadId];
|
|
float3 ProbeWorldPosition = GetProbeTranslatedWorldPositionNoOffset(ProbeCoord, ClipmapIndex) - DFHackToFloat(PrimaryView.PreViewTranslation);
|
|
|
|
if (ProbeIndex < MaxNumProbes)
|
|
{
|
|
const uint MaxUpdateBucketIndex = MaxUpdateBucket[0];
|
|
const uint LocalMaxTracesFromMaxUpdateBucket = MaxTracesFromMaxUpdateBucket[0];
|
|
|
|
const uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
|
|
const uint LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
|
|
const uint LastUsedFrameIndex = ProbeLastUsedFrame[ProbeIndex];
|
|
|
|
// Update everything up to the max selected priority bucket
|
|
const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ClipmapIndex);
|
|
|
|
bool bTraceProbe = PriorityBucketIndex <= MaxUpdateBucketIndex;
|
|
bool bForceDownsampleProbe = false;
|
|
|
|
// Can't trace more than MaxTracesFromMaxUpdateBucket from the last bucket
|
|
if (bTraceProbe)
|
|
{
|
|
if (PriorityBucketIndex == MaxUpdateBucketIndex && MaxUpdateBucketIndex > 0)
|
|
{
|
|
// Accumulate allocated probe trace cost for the last bucket
|
|
uint ProbeTraceAllocatedFromMaxUpdateBucket = 0;
|
|
InterlockedAdd(RWProbesToUpdateTraceCost[1], ProbeTraceCost, ProbeTraceAllocatedFromMaxUpdateBucket);
|
|
|
|
if (ProbeTraceAllocatedFromMaxUpdateBucket + ProbeTraceCost > LocalMaxTracesFromMaxUpdateBucket)
|
|
{
|
|
bTraceProbe = false;
|
|
}
|
|
}
|
|
else if (PriorityBucketIndex == 0)
|
|
{
|
|
// Special case for the first bucket.
|
|
// It contains probes without valid data so all those probes need to be updated.
|
|
|
|
uint PrevMinNewProbeTraceCost = 0;
|
|
InterlockedAdd(RWMinNewProbeTraceCost[0], ProbeTraceCost - PROBE_TRACE_COST_DOWNSAMPLED, PrevMinNewProbeTraceCost);
|
|
|
|
// Downsample anything exceeding allowed budget in order to keep performance reasonable
|
|
if (PrevMinNewProbeTraceCost + ProbeTraceCost - PROBE_TRACE_COST_DOWNSAMPLED > GetProbeTraceCostBudget())
|
|
{
|
|
bForceDownsampleProbe = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bTraceProbe)
|
|
{
|
|
InterlockedAdd(RWProbesToUpdateTraceCost[0], ProbeTraceCost);
|
|
|
|
uint TraceIndex;
|
|
InterlockedAdd(RWProbeTraceAllocator[0], 1, TraceIndex);
|
|
RWProbeTraceData[TraceIndex] = PackProbeTraceData(ProbeWorldPosition, ClipmapIndex, ProbeIndex, bForceDownsampleProbe);
|
|
RWProbeLastTracedFrame[ProbeIndex] = FrameNumber;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef ClearProbeFreeListCS
|
|
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void ClearProbeFreeListCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = DispatchThreadId.x;
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
RWProbeFreeListAllocator[0] = 0;
|
|
}
|
|
|
|
if (ThreadIndex < MaxNumProbes)
|
|
{
|
|
RWProbeFreeList[ThreadIndex] = 0;
|
|
RWProbeLastUsedFrame[ThreadIndex] = 0;
|
|
RWProbeWorldOffset[ThreadIndex] = 0;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
Buffer<uint> ProbeAllocator;
|
|
Texture3D<uint> LastFrameRadianceProbeIndirectionTexture;
|
|
float4 LastFrameClipmapCornerTWSAndCellSize[RADIANCE_PROBE_MAX_CLIPMAPS];
|
|
uint NumFramesToKeepCachedProbes;
|
|
|
|
float3 GetLastFrameProbeTranslatedWorldPosition(uint3 LastFrameProbeCoord, uint ClipmapIndex)
|
|
{
|
|
float3 CornerTranslatedWorldSpace = LastFrameClipmapCornerTWSAndCellSize[ClipmapIndex].xyz;
|
|
float CellSize = LastFrameClipmapCornerTWSAndCellSize[ClipmapIndex].w;
|
|
|
|
const float3 CornerToProbe = (LastFrameProbeCoord + 0.5) * CellSize;
|
|
return CornerTranslatedWorldSpace + CornerToProbe;
|
|
}
|
|
|
|
|
|
#ifdef UpdateCacheForUsedProbesCS
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void UpdateCacheForUsedProbesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution;
|
|
uint3 LastFrameProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.yz);
|
|
|
|
if (ClipmapIndex < NumRadianceProbeClipmaps && all(LastFrameProbeCoord < RadianceProbeClipmapResolution))
|
|
{
|
|
uint3 LastFrameProbeIndirectionTextureCoord = uint3(LastFrameProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, LastFrameProbeCoord.yz);
|
|
uint LastFrameProbeIndex = LastFrameRadianceProbeIndirectionTexture.Load(uint4(LastFrameProbeIndirectionTextureCoord, 0));
|
|
|
|
if (LastFrameProbeIndex != INVALID_PROBE_INDEX)
|
|
{
|
|
float3 ProbeTranslatedWorldPosition = GetLastFrameProbeTranslatedWorldPosition(LastFrameProbeCoord, ClipmapIndex);
|
|
int3 ProbeCoord = GetRadianceProbeCoord(ProbeTranslatedWorldPosition - DFHackToFloat(PrimaryView.PrevPreViewTranslation), ClipmapIndex);
|
|
|
|
bool bReused = false;
|
|
|
|
if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolution))
|
|
{
|
|
uint3 ProbeIndirectionTextureCoord = uint3(ProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, ProbeCoord.yz);
|
|
uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord];
|
|
uint LastUsedFrameNumber = RWProbeLastUsedFrame[LastFrameProbeIndex];
|
|
|
|
uint NumFramesToKeepCachedProbesHeuristic = NumFramesToKeepCachedProbes;
|
|
|
|
// Make sure that it's possible to allocate at least NumProbesToTraceBudget probes this frame
|
|
const uint NumFreeProbes = MaxNumProbes - ProbeAllocator[0] + RWProbeFreeListAllocator[0];
|
|
if (NumFreeProbes < NumProbesToTraceBudget)
|
|
{
|
|
NumFramesToKeepCachedProbesHeuristic = min(2, NumFramesToKeepCachedProbes);
|
|
}
|
|
|
|
if (ProbeUsedMarker == USED_PROBE_INDEX
|
|
|| FrameNumber - LastUsedFrameNumber < NumFramesToKeepCachedProbes)
|
|
{
|
|
bReused = true;
|
|
|
|
if (ProbeUsedMarker == USED_PROBE_INDEX)
|
|
{
|
|
RWProbeLastUsedFrame[LastFrameProbeIndex] = FrameNumber;
|
|
}
|
|
RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord] = LastFrameProbeIndex;
|
|
}
|
|
}
|
|
|
|
if (!bReused)
|
|
{
|
|
int FreeIndex;
|
|
InterlockedAdd(RWProbeFreeListAllocator[0], 1, FreeIndex);
|
|
RWProbeFreeList[FreeIndex] = LastFrameProbeIndex;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|