1405 lines
52 KiB
HLSL
1405 lines
52 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "../Common.ush"
|
|
#include "../MonteCarlo.ush"
|
|
#include "../FastMath.ush"
|
|
#include "../SHCommon.ush"
|
|
#include "../DeferredShadingCommon.ush"
|
|
#include "LumenCardCommon.ush"
|
|
#include "LumenTracingCommon.ush"
|
|
#include "LumenSoftwareRayTracing.ush"
|
|
#include "LumenRadianceCacheCommon.ush"
|
|
#include "LumenRadianceCacheMarkCommon.ush"
|
|
#include "LumenRadianceCacheTracingCommon.ush"
|
|
#include "LumenScreenProbeCommon.ush"
|
|
#include "LumenScreenProbeImportanceSamplingShared.ush"
|
|
#include "LumenRadianceCacheUpdate.ush"
|
|
|
|
#ifndef THREADGROUP_SIZE
|
|
#define THREADGROUP_SIZE 1
|
|
#endif
|
|
|
|
#ifdef ClearProbeIndirectionCS
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void ClearProbeIndirectionCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX;
|
|
}
|
|
|
|
#endif
|
|
|
|
RWStructuredBuffer<float4> RWProbeWorldOffset;
|
|
|
|
|
|
RWBuffer<int> RWProbeFreeListAllocator;
|
|
RWBuffer<uint> RWProbeFreeList;
|
|
RWBuffer<uint> RWProbeLastUsedFrame;
|
|
uint MaxNumProbes;
|
|
|
|
float MinTraceDistance;
|
|
float MaxTraceDistance;
|
|
|
|
#ifdef MarkRadianceProbesUsedByVisualizeCS
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void MarkRadianceProbesUsedByVisualizeCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
// For visualize mode, generate probes around the camera position
|
|
uint ClipmapIndex = GetRadianceProbeClipmapForMark(DFHackToFloat(PrimaryView.WorldCameraOrigin), .5f);
|
|
|
|
if (IsValidRadianceCacheClipmapForMark(ClipmapIndex))
|
|
{
|
|
MarkPositionUsedInIndirectionTexture(DFHackToFloat(PrimaryView.WorldCameraOrigin), ClipmapIndex);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// Must match C++
|
|
#define NUM_RADIANCE_PROBE_PDF_COEFFICIENTS (NUM_PDF_SH_COEFFICIENTS + 1)
|
|
|
|
|
|
RWBuffer<uint> RWProbeAllocator;
|
|
RWBuffer<uint> RWClearProbePDFsIndirectArgs;
|
|
RWBuffer<uint> RWGenerateProbeTraceTilesIndirectArgs;
|
|
RWBuffer<uint> RWProbeTraceTileAllocator;
|
|
RWBuffer<uint> RWFilterProbesIndirectArgs;
|
|
RWBuffer<uint> RWPrepareProbeOcclusionIndirectArgs;
|
|
RWBuffer<uint> RWFixupProbeBordersIndirectArgs;
|
|
Buffer<uint> ProbeTraceAllocator;
|
|
uint ClearProbePDFGroupSize;
|
|
uint TraceFromProbesGroupSizeXY;
|
|
uint FilterProbesGroupSizeXY;
|
|
|
|
#ifdef SetupProbeIndirectArgsCS
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void SetupProbeIndirectArgsCS()
|
|
{
|
|
// Clamp allocators
|
|
if (RWProbeFreeListAllocator[0] < 0)
|
|
{
|
|
RWProbeFreeListAllocator[0] = 0;
|
|
}
|
|
|
|
if (RWProbeFreeListAllocator[0] > (int)MaxNumProbes)
|
|
{
|
|
RWProbeFreeListAllocator[0] = (int)MaxNumProbes;
|
|
}
|
|
|
|
if (RWProbeAllocator[0] > MaxNumProbes)
|
|
{
|
|
RWProbeAllocator[0] = MaxNumProbes;
|
|
}
|
|
|
|
uint NumProbesToTrace = ProbeTraceAllocator[0];
|
|
|
|
WriteDispatchIndirectArgs(RWClearProbePDFsIndirectArgs, 0,
|
|
(NumProbesToTrace * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS + ClearProbePDFGroupSize - 1) / ClearProbePDFGroupSize,
|
|
1,
|
|
1);
|
|
|
|
WriteDispatchIndirectArgs(RWGenerateProbeTraceTilesIndirectArgs, 0, 1, 1, NumProbesToTrace);
|
|
|
|
RWProbeTraceTileAllocator[0] = 0;
|
|
|
|
WriteDispatchIndirectArgs(RWFilterProbesIndirectArgs, 0,
|
|
(RadianceProbeResolution + FilterProbesGroupSizeXY - 1) / FilterProbesGroupSizeXY,
|
|
(RadianceProbeResolution + FilterProbesGroupSizeXY - 1) / FilterProbesGroupSizeXY,
|
|
NumProbesToTrace);
|
|
|
|
uint FinalOcclusionProbeResolution = OcclusionProbeResolution + 2 * (1u << FinalRadianceAtlasMaxMip);
|
|
WriteDispatchIndirectArgs(RWPrepareProbeOcclusionIndirectArgs, 0,
|
|
(FinalOcclusionProbeResolution + TraceFromProbesGroupSizeXY - 1) / TraceFromProbesGroupSizeXY,
|
|
(FinalOcclusionProbeResolution + TraceFromProbesGroupSizeXY - 1) / TraceFromProbesGroupSizeXY,
|
|
NumProbesToTrace);
|
|
|
|
WriteDispatchIndirectArgs(RWFixupProbeBordersIndirectArgs, 0,
|
|
(FinalProbeResolution + TraceFromProbesGroupSizeXY - 1) / TraceFromProbesGroupSizeXY,
|
|
(FinalProbeResolution + TraceFromProbesGroupSizeXY - 1) / TraceFromProbesGroupSizeXY,
|
|
NumProbesToTrace);
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef ComputeProbeWorldOffsetsCS
|
|
|
|
Buffer<float4> ProbeTraceData;
|
|
|
|
groupshared float4 SharedBestOffset[THREADGROUP_SIZE];
|
|
groupshared float4 SharedBestOffset2[THREADGROUP_SIZE];
|
|
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void ComputeProbeWorldOffsetsCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ProbeTraceIndex = GroupId.z;
|
|
|
|
FProbeTraceData TraceData = GetProbeTraceDataNoOffset(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
const float3 ProbeTranslatedWorldCenter = TraceData.ProbeWorldCenter + DFHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
|
|
|
|
float DistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeTranslatedWorldCenter);
|
|
float TooCloseThreshold = .05f * GetRadianceProbeClipmapCellSize(TraceData.ClipmapIndex);
|
|
|
|
if (DistanceToSurface < TooCloseThreshold)
|
|
{
|
|
float MaxVoxelOffset = .25f;
|
|
|
|
uint X = GroupThreadId % 4;
|
|
uint Y = (GroupThreadId % 16) / 4;
|
|
uint Z = GroupThreadId / 16;
|
|
{
|
|
float3 Offset = (float3(X, Y, Z) * 2.0f / 3.0f - 1.0f) * MaxVoxelOffset * GetRadianceProbeClipmapCellSize(TraceData.ClipmapIndex);
|
|
float SampleDistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeTranslatedWorldCenter + Offset);
|
|
SharedBestOffset[GroupThreadId] = float4(Offset, SampleDistanceToSurface);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadId < 8)
|
|
{
|
|
float4 BestOffset = SharedBestOffset[GroupThreadId * 8];
|
|
|
|
for (uint i = 1; i < 8; i++)
|
|
{
|
|
if (SharedBestOffset[GroupThreadId * 8 + i].w > BestOffset.w)
|
|
{
|
|
BestOffset = SharedBestOffset[GroupThreadId * 8 + i];
|
|
}
|
|
}
|
|
|
|
SharedBestOffset2[GroupThreadId] = BestOffset;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadId == 0)
|
|
{
|
|
float4 BestOffset = SharedBestOffset2[0];
|
|
|
|
for (uint i = 1; i < 8; i++)
|
|
{
|
|
if (SharedBestOffset2[i].w > BestOffset.w)
|
|
{
|
|
BestOffset = SharedBestOffset2[i];
|
|
}
|
|
}
|
|
|
|
if (BestOffset.w >= TooCloseThreshold)
|
|
{
|
|
RWProbeWorldOffset[TraceData.ProbeIndex] = float4(BestOffset.xyz, 1);
|
|
}
|
|
else
|
|
{
|
|
RWProbeWorldOffset[TraceData.ProbeIndex] = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RWProbeWorldOffset[TraceData.ProbeIndex] = 0;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
RWBuffer<int> RWRadianceProbeSH_PDF;
|
|
|
|
#ifdef ClearProbePDFs
|
|
|
|
Buffer<float4> ProbeTraceData;
|
|
|
|
[numthreads(THREADGROUP_SIZE, 1, 1)]
|
|
void ClearProbePDFs(uint DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
uint ProbeTraceIndex = DispatchThreadId / NUM_RADIANCE_PROBE_PDF_COEFFICIENTS;
|
|
uint ProbeIndex = GetProbeIndexFromProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
uint CoefficientIndex = DispatchThreadId - ProbeTraceIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS;
|
|
|
|
RWRadianceProbeSH_PDF[ProbeIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS + CoefficientIndex] = 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef ScatterScreenProbeBRDFToRadianceProbesCS
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void ScatterScreenProbeBRDFToRadianceProbesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
// PDF pass: Accumulate BRDF SH from screen probes
|
|
// Scatter: Scatter BRDF SH into 8 radiance probes using atomics to buffer
|
|
// Threadgroup per probe
|
|
|
|
uint2 ScreenProbeAtlasCoord = GroupId.xy;
|
|
|
|
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
|
|
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
|
|
|
|
if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
|
|
{
|
|
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
|
|
float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
|
|
|
|
if (SceneDepth > 0)
|
|
{
|
|
float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, SceneDepth);
|
|
uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);
|
|
uint ClipmapIndex = GetRadianceProbeClipmap(WorldPosition, 0);
|
|
|
|
if (ClipmapIndex < NumRadianceProbeClipmaps)
|
|
{
|
|
int3 BottomCornerProbeCoord = GetRadianceProbeBottomCornerCoord(WorldPosition, ClipmapIndex);
|
|
int3 IndirectionTextureCoord = BottomCornerProbeCoord + int3(ClipmapIndex * RadianceProbeClipmapResolution, 0, 0);
|
|
uint ProbeIndex = RadianceProbeIndirectionTexture[IndirectionTextureCoord + int3(GroupThreadId.x & 0x1, (GroupThreadId.x & 0x2) >> 1, (GroupThreadId.x & 0x4) >> 2)];
|
|
bool bTwoSidedFoliage = GetScreenProbeIsTwoSidedFoliage(ScreenProbeAtlasCoord);
|
|
|
|
//@todo - skip probes which are cached this frame
|
|
if (ProbeIndex != INVALID_PROBE_INDEX)
|
|
{
|
|
uint SHBaseIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS;
|
|
uint ProbeSHBaseCoord = ProbeIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS;
|
|
|
|
for (uint CoefficientIndex = GroupThreadId.y; CoefficientIndex < NUM_RADIANCE_PROBE_PDF_COEFFICIENTS; CoefficientIndex += THREADGROUP_SIZE)
|
|
{
|
|
float Coefficient = 1.0f;
|
|
float MaxValuePerThread = 1.0f;
|
|
|
|
if (CoefficientIndex < NUM_PDF_SH_COEFFICIENTS)
|
|
{
|
|
// The Radiance Cache over-samples with high depth complexity caused by foliage, attempt to offset that by keeping at the lowest trace resolution
|
|
Coefficient = bTwoSidedFoliage ? 0.0f : BRDFProbabilityDensityFunctionSH[SHBaseIndex + CoefficientIndex];
|
|
MaxValuePerThread = (float)0xFFFFFFFF / 100000.0f;
|
|
}
|
|
|
|
int QuantizedCoefficient = Coefficient * MaxValuePerThread;
|
|
InterlockedAdd(RWRadianceProbeSH_PDF[ProbeSHBaseCoord + CoefficientIndex], QuantizedCoefficient);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
bool ShouldRefineTraceTile(uint2 TraceTileCoord, uint TraceTileResolution, float LevelPDFThreshold, FThreeBandSHVector BRDF)
|
|
{
|
|
float2 ProbeUV = (TraceTileCoord + float2(.5f, .5f)) / float(TraceTileResolution);
|
|
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
|
|
FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
|
|
float PDF = max(DotSH3(BRDF, DirectionSH), 0);
|
|
|
|
bool bRefineTraceTile = PDF > LevelPDFThreshold;
|
|
return bRefineTraceTile;
|
|
}
|
|
|
|
RWBuffer<uint2> RWProbeTraceTileData;
|
|
Buffer<int> RadianceProbeSH_PDF;
|
|
float SupersampleTileBRDFThreshold;
|
|
|
|
RWTexture2D<float> RWDebugBRDFProbabilityDensityFunction;
|
|
uint DebugProbeBRDFOctahedronResolution;
|
|
|
|
#ifdef GenerateProbeTraceTilesCS
|
|
|
|
groupshared uint SharedNumPendingTraceTiles;
|
|
groupshared uint2 PendingTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];
|
|
|
|
groupshared uint SharedNumCompletedTraceTiles;
|
|
groupshared uint2 CompletedTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];
|
|
|
|
groupshared uint GlobalTraceOffset;
|
|
|
|
FThreeBandSHVector GetBRDF_PDF(uint ProbeIndex)
|
|
{
|
|
uint SHBaseIndex = ProbeIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS;
|
|
float DequantizeScale = 100000.0f / (float)0xFFFFFFFF;
|
|
|
|
float TotalWeight = RadianceProbeSH_PDF[SHBaseIndex + 9];
|
|
float DecodeScale = 0.0f;
|
|
|
|
if (TotalWeight > 0.0f)
|
|
{
|
|
DecodeScale = DequantizeScale / TotalWeight;
|
|
}
|
|
|
|
FThreeBandSHVector BRDF;
|
|
BRDF.V0.x = RadianceProbeSH_PDF[SHBaseIndex + 0] * DecodeScale;
|
|
BRDF.V0.y = RadianceProbeSH_PDF[SHBaseIndex + 1] * DecodeScale;
|
|
BRDF.V0.z = RadianceProbeSH_PDF[SHBaseIndex + 2] * DecodeScale;
|
|
BRDF.V0.w = RadianceProbeSH_PDF[SHBaseIndex + 3] * DecodeScale;
|
|
BRDF.V1.x = RadianceProbeSH_PDF[SHBaseIndex + 4] * DecodeScale;
|
|
BRDF.V1.y = RadianceProbeSH_PDF[SHBaseIndex + 5] * DecodeScale;
|
|
BRDF.V1.z = RadianceProbeSH_PDF[SHBaseIndex + 6] * DecodeScale;
|
|
BRDF.V1.w = RadianceProbeSH_PDF[SHBaseIndex + 7] * DecodeScale;
|
|
BRDF.V2.x = RadianceProbeSH_PDF[SHBaseIndex + 8] * DecodeScale;
|
|
return BRDF;
|
|
}
|
|
|
|
void SubdivideTraceTileTreeOneStep(
|
|
uint ThreadIndex,
|
|
uint BaseTraceTileResolution,
|
|
uint Level,
|
|
uint NumLevels,
|
|
uint ProbeTraceIndex,
|
|
FThreeBandSHVector BRDF,
|
|
inout uint PendingTraceListStartIndex)
|
|
{
|
|
uint TraceTileResolution = BaseTraceTileResolution << Level;
|
|
uint NumPendingTraceTiles = SharedNumPendingTraceTiles;
|
|
|
|
for (uint PendingTraceTileIndex = PendingTraceListStartIndex + ThreadIndex; PendingTraceTileIndex < NumPendingTraceTiles; PendingTraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
|
|
{
|
|
uint2 TraceTileCoord = UnpackTraceTileInfo(PendingTraceTileList[PendingTraceTileIndex]);
|
|
|
|
if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, TraceTileResolution, SupersampleTileBRDFThreshold, BRDF))
|
|
{
|
|
uint TileBaseIndex;
|
|
InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);
|
|
PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), Level + 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), Level + 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), Level + 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), Level + 1, ProbeTraceIndex);
|
|
}
|
|
else
|
|
{
|
|
uint TileIndex;
|
|
InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
|
|
CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, Level, ProbeTraceIndex);
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
PendingTraceListStartIndex = NumPendingTraceTiles;
|
|
}
|
|
|
|
void SubdivideTraceTileTree(
|
|
uint ThreadIndex,
|
|
uint BaseTraceTileResolution,
|
|
uint NumLevels,
|
|
uint ProbeTraceIndex,
|
|
FThreeBandSHVector BRDF)
|
|
{
|
|
uint PendingTraceListStartIndex = 0;
|
|
|
|
// NumLevels must be a literal to allow the loop to unroll, otherwise we get this incorrect compile error from the DXC compiler:
|
|
// error X3663: thread sync operation found in varying flow control, consider reformulating your algorithm so all threads will hit the sync simultaneously
|
|
// Manual unrolling to avoid error X3663 with FXC compiler on certain platforms preview
|
|
if (NumLevels == 3)
|
|
{
|
|
SubdivideTraceTileTreeOneStep(ThreadIndex, BaseTraceTileResolution, 1, 3, ProbeTraceIndex, BRDF, PendingTraceListStartIndex);
|
|
SubdivideTraceTileTreeOneStep(ThreadIndex, BaseTraceTileResolution, 2, 3, ProbeTraceIndex, BRDF, PendingTraceListStartIndex);
|
|
}
|
|
else if (NumLevels == 2)
|
|
{
|
|
SubdivideTraceTileTreeOneStep(ThreadIndex, BaseTraceTileResolution, 1, 2, ProbeTraceIndex, BRDF, PendingTraceListStartIndex);
|
|
}
|
|
}
|
|
|
|
StructuredBuffer<uint> ProbesToUpdateTraceCost;
|
|
Buffer<float4> ProbeTraceData;
|
|
int ForcedUniformLevel;
|
|
|
|
// Note: should match GetProbeTraceCost as closely as possible
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void GenerateProbeTraceTilesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint2 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ProbeTraceIndex = GroupId.z;
|
|
|
|
FProbeTraceData TraceData = GetProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
float DistanceFromCameraSq = GetDistanceToCameraFromViewVectorSqr(DFHackToFloat(PrimaryView.WorldCameraOrigin) - TraceData.ProbeWorldCenter);
|
|
|
|
#define DEBUG_UNIFORM_TRACES 0
|
|
#if FORCE_UNIFORM_TRACES || DEBUG_UNIFORM_TRACES
|
|
|
|
uint UniformLevel = DEBUG_UNIFORM_TRACES ? 1 : ForcedUniformLevel;
|
|
|
|
#if !DEBUG_UNIFORM_TRACES
|
|
if (DistanceFromCameraSq >= DownsampleDistanceFromCameraSq)
|
|
{
|
|
UniformLevel = 0;
|
|
}
|
|
else if (DistanceFromCameraSq < SupersampleDistanceFromCameraSq)
|
|
{
|
|
UniformLevel = 2;
|
|
}
|
|
|
|
if (TraceData.bForceDownsample)
|
|
{
|
|
UniformLevel = 0;
|
|
}
|
|
#endif
|
|
|
|
uint TraceTileResolution = (RadianceProbeResolution / THREADGROUP_SIZE / 2) << UniformLevel;
|
|
|
|
if (TraceTileResolution == 0)
|
|
{
|
|
TraceTileResolution = 1;
|
|
UniformLevel = 1;
|
|
}
|
|
|
|
uint NumTraceTiles = TraceTileResolution * TraceTileResolution;
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
InterlockedAdd(RWProbeTraceTileAllocator[0], NumTraceTiles, GlobalTraceOffset);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
for (uint TraceTileIndex = ThreadIndex; TraceTileIndex < NumTraceTiles; TraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
|
|
{
|
|
uint2 TraceTileCoord = uint2(TraceTileIndex % TraceTileResolution, TraceTileIndex / TraceTileResolution);
|
|
RWProbeTraceTileData[GlobalTraceOffset + TraceTileIndex] = PackTraceTileInfo(TraceTileCoord, UniformLevel, ProbeTraceIndex);
|
|
}
|
|
|
|
#else
|
|
|
|
// Ray gen pass:
|
|
// Clear trace tile list
|
|
// For each level [0 - 2] test PDF at tile center, issue trace tile if below threshold, otherwise subdivide and queue for next level
|
|
// Write out all trace tiles for indirect dispatch
|
|
|
|
if (all(GroupThreadId.xy == 0))
|
|
{
|
|
SharedNumCompletedTraceTiles = 0;
|
|
SharedNumPendingTraceTiles = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint NumLevels = 1;
|
|
|
|
// Calculate subdivision level for the probe
|
|
// Level 0 is half of RadianceProbeResolution
|
|
if (DistanceFromCameraSq < DownsampleDistanceFromCameraSq)
|
|
{
|
|
NumLevels = DistanceFromCameraSq < SupersampleDistanceFromCameraSq ? 3 : 2;
|
|
}
|
|
|
|
if (TraceData.bForceDownsample)
|
|
{
|
|
NumLevels = 1;
|
|
}
|
|
|
|
FThreeBandSHVector BRDF = GetBRDF_PDF(TraceData.ProbeIndex);
|
|
|
|
uint BaseTraceTileResolution = RadianceProbeResolution / THREADGROUP_SIZE / 2;
|
|
|
|
// Queue trace tiles for level 0
|
|
if (all(GroupThreadId.xy < BaseTraceTileResolution))
|
|
{
|
|
uint2 TraceTileCoord = GroupThreadId.xy;
|
|
uint Level = 0;
|
|
|
|
if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, BaseTraceTileResolution, 0.0f, BRDF))
|
|
{
|
|
uint TileBaseIndex;
|
|
InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);
|
|
|
|
PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), 1, ProbeTraceIndex);
|
|
PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), 1, ProbeTraceIndex);
|
|
}
|
|
else
|
|
{
|
|
uint TileIndex;
|
|
InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
|
|
CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, 0, ProbeTraceIndex);
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
|
|
|
|
// Queue trace tiles for remaining levels
|
|
SubdivideTraceTileTree(ThreadIndex, BaseTraceTileResolution, NumLevels, ProbeTraceIndex, BRDF);
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
InterlockedAdd(RWProbeTraceTileAllocator[0], SharedNumCompletedTraceTiles, GlobalTraceOffset);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
for (uint TraceTileIndex = ThreadIndex; TraceTileIndex < SharedNumCompletedTraceTiles; TraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
|
|
{
|
|
RWProbeTraceTileData[GlobalTraceOffset + TraceTileIndex] = CompletedTraceTileList[TraceTileIndex];
|
|
}
|
|
|
|
|
|
// 'vis Lumen.RadianceCache.DebugBRDFProbabilityDensityFunction uv1'
|
|
#define VISUALIZE_BRDF_PDF_SPHERICAL_HARMONIC 0
|
|
#if VISUALIZE_BRDF_PDF_SPHERICAL_HARMONIC
|
|
uint2 TexelCoord = GroupThreadId.xy;
|
|
|
|
if (all(TexelCoord < DebugProbeBRDFOctahedronResolution))
|
|
{
|
|
float2 ProbeTexelCenter = float2(0.5, 0.5);
|
|
float2 ProbeUV = (TexelCoord + ProbeTexelCenter) / (float)DebugProbeBRDFOctahedronResolution;
|
|
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
|
|
|
|
FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
|
|
float PDF = max(DotSH3(BRDF, DirectionSH), 0) * .001f;
|
|
|
|
uint2 ProbeAtlasBaseCoord = DebugProbeBRDFOctahedronResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
RWDebugBRDFProbabilityDensityFunction[ProbeAtlasBaseCoord + TexelCoord] = PDF;
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SetupTraceFromProbesCS
|
|
|
|
Buffer<uint> ProbeTraceTileAllocator;
|
|
RWBuffer<uint> RWTraceProbesIndirectArgs;
|
|
RWBuffer<uint> RWSortProbeTraceTilesIndirectArgs;
|
|
RWBuffer<uint> RWRadianceCacheHardwareRayTracingIndirectArgs;
|
|
RWBuffer<uint> RWHardwareRayTracingRayAllocatorBuffer;
|
|
uint SortTraceTilesGroupSize;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void SetupTraceFromProbesCS()
|
|
{
|
|
uint NumProbeTraceTiles = ProbeTraceTileAllocator[0];
|
|
|
|
// Decompose the dispatch group layout into 2d to work around hitting D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION (65k) with a 1d layout,
|
|
// which manifests as flickering during Force Full Update
|
|
|
|
WriteDispatchIndirectArgs(RWTraceProbesIndirectArgs, 0,
|
|
TRACE_TILE_GROUP_STRIDE,
|
|
(NumProbeTraceTiles + TRACE_TILE_GROUP_STRIDE - 1) / TRACE_TILE_GROUP_STRIDE,
|
|
1);
|
|
|
|
WriteDispatchIndirectArgs(RWSortProbeTraceTilesIndirectArgs, 0,
|
|
(NumProbeTraceTiles + SortTraceTilesGroupSize - 1) / SortTraceTilesGroupSize,
|
|
1,
|
|
1);
|
|
|
|
WriteDispatchIndirectArgs(RWRadianceCacheHardwareRayTracingIndirectArgs, 0,
|
|
RADIANCE_CACHE_TRACE_TILE_SIZE_1D,
|
|
NumProbeTraceTiles,
|
|
1);
|
|
|
|
RWHardwareRayTracingRayAllocatorBuffer[0] = NumProbeTraceTiles * RADIANCE_CACHE_TRACE_TILE_SIZE_1D;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef SORT_TILES_THREADGROUP_SIZE
|
|
#define SORT_TILES_THREADGROUP_SIZE 1
|
|
#endif
|
|
|
|
#define NUM_DIRECTION_BINS_2D 8
|
|
#define NUM_DIRECTION_BINS_1D (NUM_DIRECTION_BINS_2D * NUM_DIRECTION_BINS_2D)
|
|
|
|
#ifdef SortProbeTraceTilesCS
|
|
|
|
Buffer<uint> ProbeTraceTileAllocator;
|
|
Buffer<uint2> ProbeTraceTileData;
|
|
|
|
groupshared uint SharedNumTraceTileBins[NUM_DIRECTION_BINS_1D];
|
|
groupshared uint SharedTraceTileBinOffset[NUM_DIRECTION_BINS_1D];
|
|
|
|
[numthreads(SORT_TILES_THREADGROUP_SIZE, 1, 1)]
|
|
void SortProbeTraceTilesCS(
|
|
uint GroupId : SV_GroupID,
|
|
uint GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
// Clear bins to 0
|
|
for (uint BinIndex = GroupThreadId; BinIndex < NUM_DIRECTION_BINS_1D; BinIndex += SORT_TILES_THREADGROUP_SIZE)
|
|
{
|
|
SharedNumTraceTileBins[BinIndex] = 0;
|
|
SharedTraceTileBinOffset[BinIndex] = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint TraceTileIndex = GroupId * SORT_TILES_THREADGROUP_SIZE + GroupThreadId;
|
|
|
|
// Count how many trace tiles in each direction bin
|
|
if (TraceTileIndex < ProbeTraceTileAllocator[0])
|
|
{
|
|
uint2 TraceTileCoord;
|
|
uint TraceTileLevel;
|
|
uint ProbeTraceIndex;
|
|
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
|
|
|
|
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
|
|
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
|
|
uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;
|
|
|
|
//@todo - also bin by Morton encoded position
|
|
uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;
|
|
|
|
InterlockedAdd(SharedNumTraceTileBins[FinalBinIndex], 1);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (TraceTileIndex < ProbeTraceTileAllocator[0])
|
|
{
|
|
uint2 TraceTileData = ProbeTraceTileData[TraceTileIndex];
|
|
|
|
uint2 TraceTileCoord;
|
|
uint TraceTileLevel;
|
|
uint ProbeTraceIndex;
|
|
UnpackTraceTileInfo(TraceTileData, TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
|
|
|
|
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
|
|
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
|
|
uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;
|
|
|
|
uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;
|
|
|
|
uint SortedTraceTileOffset;
|
|
|
|
// Calculate our sorted offset by adding up all the bins before us
|
|
{
|
|
InterlockedAdd(SharedTraceTileBinOffset[FinalBinIndex], 1, SortedTraceTileOffset);
|
|
|
|
for (uint BinIndex = 0; BinIndex < FinalBinIndex; BinIndex++)
|
|
{
|
|
SortedTraceTileOffset += SharedNumTraceTileBins[BinIndex];
|
|
}
|
|
}
|
|
|
|
// Write out to the sorted position
|
|
RWProbeTraceTileData[GroupId * SORT_TILES_THREADGROUP_SIZE + SortedTraceTileOffset] = TraceTileData;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
float StepFactor;
|
|
float MinSampleRadius;
|
|
float MaxMeshSDFTraceDistance;
|
|
float CachedLightingPreExposure;
|
|
|
|
FConeTraceResult TraceForProbeTexel(FConeTraceInput TraceInput)
|
|
{
|
|
FConeTraceResult TraceResult;
|
|
TraceResult = (FConeTraceResult)0;
|
|
TraceResult.Lighting = 0.0;
|
|
TraceResult.Transparency = 1.0;
|
|
TraceResult.OpaqueHitDistance = TraceInput.MaxTraceDistance;
|
|
TraceInput.bZeroRadianceIfRayStartsInsideGeometry = true;
|
|
|
|
ConeTraceLumenSceneVoxels(TraceInput, TraceResult);
|
|
ApplySkylightToTraceResult(TraceInput.ConeDirection, TraceResult);
|
|
return TraceResult;
|
|
}
|
|
|
|
#ifdef TraceFromProbesCS
|
|
|
|
Buffer<float4> ProbeTraceData;
|
|
Buffer<uint2> ProbeTraceTileData;
|
|
Buffer<uint> ProbeTraceTileAllocator;
|
|
|
|
RWTexture2D<float3> RWRadianceProbeAtlasTexture;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWTexture2D<float> RWSkyVisibilityProbeAtlasTexture;
|
|
#endif
|
|
RWTexture2D<float> RWDepthProbeAtlasTexture;
|
|
|
|
groupshared float3 SharedTraceRadiance[RADIANCE_CACHE_TRACE_TILE_SIZE_2D][RADIANCE_CACHE_TRACE_TILE_SIZE_2D];
|
|
groupshared float SharedTraceSkyVisibility[RADIANCE_CACHE_TRACE_TILE_SIZE_2D][RADIANCE_CACHE_TRACE_TILE_SIZE_2D];
|
|
groupshared float SharedTraceHitDistance[RADIANCE_CACHE_TRACE_TILE_SIZE_2D][RADIANCE_CACHE_TRACE_TILE_SIZE_2D];
|
|
|
|
[numthreads(RADIANCE_CACHE_TRACE_TILE_SIZE_2D, RADIANCE_CACHE_TRACE_TILE_SIZE_2D, 1)]
|
|
void TraceFromProbesCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint2 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint TraceTileIndex = GroupId.y * TRACE_TILE_GROUP_STRIDE + GroupId.x;
|
|
|
|
if (TraceTileIndex < ProbeTraceTileAllocator[0])
|
|
{
|
|
uint2 TraceTileCoord;
|
|
uint TraceTileLevel;
|
|
uint ProbeTraceIndex;
|
|
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
|
|
|
|
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
|
|
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + GroupThreadId.xy;
|
|
|
|
FProbeTraceData TraceData = GetProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
const float3 ProbeTranslatedWorldCenter = TraceData.ProbeWorldCenter + DFHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
|
|
|
|
if (all(ProbeTexelCoord < TraceResolution))
|
|
{
|
|
float2 ProbeTexelCenter = float2(0.5, 0.5);
|
|
|
|
// No temporal accumulation, so just reads as dirty lighting
|
|
#define JITTER_TRACE_DIRECTION 0
|
|
#if JITTER_TRACE_DIRECTION
|
|
uint2 RandomSeed = Rand3DPCG16(int3(floor(TraceData.ProbeWorldCenter / GetRadianceProbeClipmapCellSize(0)))).xy;
|
|
ProbeTexelCenter = Hammersley16(0, 1, RandomSeed);
|
|
#endif
|
|
|
|
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / float(TraceResolution);
|
|
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
|
|
|
|
float FinalMinTraceDistance = max(MinTraceDistance, GetRadianceProbeTMin(TraceData.ClipmapIndex));
|
|
float FinalMaxTraceDistance = MaxTraceDistance;
|
|
float EffectiveStepFactor = StepFactor;
|
|
|
|
// Evenly distributing the sphere solid angle among all cones instead of based on Octahedron distortion
|
|
float ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(TraceResolution * TraceResolution));
|
|
|
|
FConeTraceInput TraceInput;
|
|
TraceInput.Setup(
|
|
TraceData.ProbeWorldCenter,
|
|
ProbeTranslatedWorldCenter,
|
|
WorldConeDirection,
|
|
ConeHalfAngle,
|
|
MinSampleRadius,
|
|
FinalMinTraceDistance,
|
|
FinalMaxTraceDistance,
|
|
EffectiveStepFactor);
|
|
|
|
TraceInput.bDitheredTransparency = true;
|
|
TraceInput.DitherScreenCoord = RadianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift) + ProbeTexelCoord;
|
|
|
|
bool bContinueCardTracing = false;
|
|
|
|
TraceInput.VoxelTraceStartDistance = CalculateVoxelTraceStartDistance(FinalMinTraceDistance, FinalMaxTraceDistance, MaxMeshSDFTraceDistance, bContinueCardTracing);
|
|
|
|
FConeTraceResult TraceResult = TraceForProbeTexel(TraceInput);
|
|
|
|
#define DEBUG_VISUALIZE_SAMPLING_RESOLUTION 0
|
|
#if DEBUG_VISUALIZE_SAMPLING_RESOLUTION
|
|
// Set r.Lumen.RadianceCache.SpatialFilterProbes 0 for raw output
|
|
TraceResult.Lighting = TraceTileLevel == 0 ? float3(0, 1, 0) : (TraceTileLevel == 1 ? float3(1, 0, 0) : float3(1, 0, 1));
|
|
#endif
|
|
|
|
SharedTraceRadiance[GroupThreadId.y][GroupThreadId.x] = TraceResult.Lighting * CachedLightingPreExposure;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
SharedTraceSkyVisibility[GroupThreadId.y][GroupThreadId.x] = TraceResult.Transparency;
|
|
#endif
|
|
SharedTraceHitDistance[GroupThreadId.y][GroupThreadId.x] = TraceResult.OpaqueHitDistance;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
if (TraceResolution < RadianceProbeResolution)
|
|
{
|
|
uint UpsampleFactor = RadianceProbeResolution / TraceResolution;
|
|
ProbeAtlasBaseCoord += (RADIANCE_CACHE_TRACE_TILE_SIZE_2D * TraceTileCoord + GroupThreadId.xy) * UpsampleFactor;
|
|
|
|
float3 Lighting = SharedTraceRadiance[GroupThreadId.y][GroupThreadId.x];
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
float SkyVisibility = SharedTraceSkyVisibility[GroupThreadId.y][GroupThreadId.x];
|
|
#endif
|
|
|
|
{
|
|
for (uint Y = 0; Y < UpsampleFactor; Y++)
|
|
{
|
|
for (uint X = 0; X < UpsampleFactor; X++)
|
|
{
|
|
RWRadianceProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = Lighting;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWSkyVisibilityProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = SkyVisibility;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
float HitDistance = min(SharedTraceHitDistance[GroupThreadId.y][GroupThreadId.x], MaxHalfFloat);
|
|
|
|
for (uint Y = 0; Y < UpsampleFactor; Y++)
|
|
{
|
|
for (uint X = 0; X < UpsampleFactor; X++)
|
|
{
|
|
RWDepthProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = HitDistance;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint DownsampleFactor = TraceResolution / RadianceProbeResolution;
|
|
uint WriteTileSize = RADIANCE_CACHE_TRACE_TILE_SIZE_2D / DownsampleFactor;
|
|
|
|
if (all(GroupThreadId.xy < WriteTileSize))
|
|
{
|
|
float3 Lighting = 0;
|
|
float SkyVisibility = 0.0f;
|
|
|
|
{
|
|
for (uint Y = 0; Y < DownsampleFactor; Y++)
|
|
{
|
|
for (uint X = 0; X < DownsampleFactor; X++)
|
|
{
|
|
Lighting += SharedTraceRadiance[GroupThreadId.y * DownsampleFactor + Y][GroupThreadId.x * DownsampleFactor + X];
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
SkyVisibility += SharedTraceSkyVisibility[GroupThreadId.y * DownsampleFactor + Y][GroupThreadId.x * DownsampleFactor + X];
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
ProbeAtlasBaseCoord += WriteTileSize * TraceTileCoord + GroupThreadId.xy;
|
|
RWRadianceProbeAtlasTexture[ProbeAtlasBaseCoord] = Lighting / (float)(DownsampleFactor * DownsampleFactor);
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWSkyVisibilityProbeAtlasTexture[ProbeAtlasBaseCoord] = SkyVisibility / (float)(DownsampleFactor * DownsampleFactor);
|
|
#endif
|
|
|
|
float HitDistance = MaxHalfFloat;
|
|
|
|
for (uint Y = 0; Y < DownsampleFactor; Y++)
|
|
{
|
|
for (uint X = 0; X < DownsampleFactor; X++)
|
|
{
|
|
HitDistance = min(HitDistance, SharedTraceHitDistance[GroupThreadId.y * DownsampleFactor + Y][GroupThreadId.x * DownsampleFactor + X]);
|
|
}
|
|
}
|
|
|
|
RWDepthProbeAtlasTexture[ProbeAtlasBaseCoord] = HitDistance;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#define MAX_RAY_INTENSITY 10000.0f
|
|
|
|
Texture2D<float3> RadianceProbeAtlasTexture;
|
|
Texture2D<float> SkyVisibilityProbeAtlasTexture;
|
|
Texture2D<float> DepthProbeAtlasTexture;
|
|
|
|
#ifdef FilterProbeRadianceWithGatherCS
|
|
|
|
RWTexture2D<float3> RWRadianceProbeAtlasTexture;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWTexture2D<float> RWSkyVisibilityProbeAtlasTexture;
|
|
#endif
|
|
Buffer<float4> ProbeTraceData;
|
|
float SpatialFilterMaxRadianceHitAngle;
|
|
|
|
groupshared uint SharedRadiance[4][THREADGROUP_SIZE][THREADGROUP_SIZE];
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void FilterProbeRadianceWithGatherCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
uint ProbeTraceIndex = GroupId.z;
|
|
|
|
FProbeTraceData TraceData = GetProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
#define LOCAL_SCATTER_FILTER 0
|
|
#if LOCAL_SCATTER_FILTER
|
|
|
|
// Load current radiance, store to groupshared
|
|
// For each neighbor
|
|
// Load in matching tile, reproject, depth weight, accumulate with atomics
|
|
// Load in randomly sampled outlier regions, etc
|
|
// Normalize and write out
|
|
|
|
if (all(DispatchThreadId.xy < RadianceProbeResolution))
|
|
{
|
|
uint2 ProbeTexelCoord = DispatchThreadId.xy;
|
|
float3 Lighting = RadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord].xyz;
|
|
|
|
float MaxValuePerThread = (float)0xFFFFFFFF / ((float)THREADGROUP_SIZE * THREADGROUP_SIZE);
|
|
float LightingQuantizeScale = MaxValuePerThread / MAX_RAY_INTENSITY;
|
|
uint3 QuantizedLighting = Lighting * LightingQuantizeScale;
|
|
uint QuantizedWeight = 1.0f * MaxValuePerThread;
|
|
SharedRadiance[0][GroupThreadId.y][GroupThreadId.x] = QuantizedLighting.x;
|
|
SharedRadiance[1][GroupThreadId.y][GroupThreadId.x] = QuantizedLighting.y;
|
|
SharedRadiance[2][GroupThreadId.y][GroupThreadId.x] = QuantizedLighting.z;
|
|
SharedRadiance[3][GroupThreadId.y][GroupThreadId.x] = QuantizedWeight;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
int3 ProbeCoord = GetRadianceProbeCoord(TraceData.ProbeWorldCenter, TraceData.ClipmapIndex);
|
|
uint2 RandSeed = Rand3DPCG16(int3(TraceData.ProbeWorldCenter / 10.0f)).xy;
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
|
|
|
|
int3 Offsets[6];
|
|
Offsets[0] = int3(-1, 0, 0);
|
|
Offsets[1] = int3(1, 0, 0);
|
|
Offsets[2] = int3(0, -1, 0);
|
|
Offsets[3] = int3(0, 1, 0);
|
|
Offsets[4] = int3(0, 0, -1);
|
|
Offsets[5] = int3(0, 0, 1);
|
|
|
|
for (uint OffsetIndex = 0; OffsetIndex < 6; OffsetIndex++)
|
|
{
|
|
int3 NeighborCoord = ProbeCoord + Offsets[OffsetIndex];
|
|
|
|
if (all(NeighborCoord >= 0) && all(NeighborCoord < (int3)RadianceProbeClipmapResolution))
|
|
{
|
|
uint NeighborProbeIndex = GetProbeIndexFromIndirectionTexture(NeighborCoord, TraceData.ClipmapIndex);
|
|
|
|
if (NeighborProbeIndex != INVALID_PROBE_INDEX)
|
|
{
|
|
uint2 NeighborProbeAtlasBaseCoord = RadianceProbeResolution * uint2(NeighborProbeIndex & ProbeAtlasResolutionModuloMask, NeighborProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
float2 GroupCenter = (GroupId.xy + .5f) * (float)THREADGROUP_SIZE;
|
|
float ExtraKernelTexels = 2.0f;
|
|
//int2 SampleCoord = GroupCenter + (Hammersley16(ThreadIndex, THREADGROUP_SIZE * THREADGROUP_SIZE, RandSeed) - .5f) * (THREADGROUP_SIZE + 2.0f * ExtraKernelTexels);
|
|
int2 SampleCoord = DispatchThreadId.xy;
|
|
uint2 ProbeTexelCoord = (SampleCoord + RadianceProbeResolution) % RadianceProbeResolution;
|
|
float NeighborRadianceDepth = DepthProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord];
|
|
|
|
float2 ProbeTexelCenter = float2(0.5, 0.5);
|
|
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / (float)RadianceProbeResolution;
|
|
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
|
|
|
|
float3 NeighborWorldPosition = GetProbeWorldPosition(NeighborCoord, ClipmapIndex, NeighborProbeIndex);
|
|
float3 NeighborHitPosition = NeighborWorldPosition + WorldConeDirection * NeighborRadianceDepth;
|
|
float3 ToNeighborHit = NeighborHitPosition - ProbeWorldCenter;
|
|
uint2 ProbeTexelCoordForNeighborHit = InverseEquiAreaSphericalMapping(ToNeighborHit) * RadianceProbeResolution;
|
|
float ProbeDepthForNeighborHit = DepthProbeAtlasTexture[ProbeTexelCoordForNeighborHit + ProbeAtlasBaseCoord];
|
|
float VoxelRadius = sqrt(3.0f) * GetRadianceProbeClipmapCellSize(ClipmapIndex);
|
|
|
|
float DistanceWeight = 1.0f;
|
|
|
|
if (ProbeDepthForNeighborHit < 1.0f * (GetRadianceProbeClipmapCellSize(ClipmapIndex) + VoxelRadius))
|
|
{
|
|
//@todo - need to trace through neighbor probe depths to see if the ray is occluded near the origin to stop leaking
|
|
DistanceWeight = 0;
|
|
}
|
|
|
|
float Weight = DistanceWeight;
|
|
float3 Lighting = RadianceProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord].xyz * Weight;
|
|
int2 SharedMemoryCoord = ProbeTexelCoordForNeighborHit - GroupId.xy * THREADGROUP_SIZE;
|
|
|
|
if (Weight > 0.0f && all(SharedMemoryCoord >= 0 && SharedMemoryCoord < THREADGROUP_SIZE))
|
|
{
|
|
float MaxValuePerThread = (float)0xFFFFFFFF / ((float)THREADGROUP_SIZE * THREADGROUP_SIZE);
|
|
float LightingQuantizeScale = MaxValuePerThread / MAX_RAY_INTENSITY;
|
|
uint3 QuantizedLighting = Lighting * LightingQuantizeScale;
|
|
uint QuantizedWeight = Weight * MaxValuePerThread;
|
|
|
|
InterlockedAdd(SharedRadiance[0][SharedMemoryCoord.y][SharedMemoryCoord.x], QuantizedLighting.x);
|
|
InterlockedAdd(SharedRadiance[1][SharedMemoryCoord.y][SharedMemoryCoord.x], QuantizedLighting.y);
|
|
InterlockedAdd(SharedRadiance[2][SharedMemoryCoord.y][SharedMemoryCoord.x], QuantizedLighting.z);
|
|
InterlockedAdd(SharedRadiance[3][SharedMemoryCoord.y][SharedMemoryCoord.x], QuantizedWeight);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint2 ProbeTexelCoord = DispatchThreadId.xy;
|
|
|
|
if (all(ProbeTexelCoord < RadianceProbeResolution))
|
|
{
|
|
uint3 QuantizedLighting = uint3(
|
|
SharedRadiance[0][GroupThreadId.y][GroupThreadId.x],
|
|
SharedRadiance[1][GroupThreadId.y][GroupThreadId.x],
|
|
SharedRadiance[2][GroupThreadId.y][GroupThreadId.x]);
|
|
|
|
float MaxValuePerThread = (float)0xFFFFFFFF / ((float)THREADGROUP_SIZE * THREADGROUP_SIZE);
|
|
float LightingDequantizeScale = MAX_RAY_INTENSITY / MaxValuePerThread;
|
|
float3 Lighting = QuantizedLighting * LightingDequantizeScale;
|
|
float TotalWeight = SharedRadiance[3][GroupThreadId.y][GroupThreadId.x] / MaxValuePerThread;
|
|
RWRadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord] = Lighting / TotalWeight;
|
|
}
|
|
#else
|
|
|
|
uint2 ProbeTexelCoord = DispatchThreadId.xy;
|
|
|
|
if (all(ProbeTexelCoord < RadianceProbeResolution))
|
|
{
|
|
FProbeTraceData TraceData = GetProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
float3 Lighting = RadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord].xyz;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
float SkyVisibility = SkyVisibilityProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord].x;
|
|
#endif
|
|
float HitDistance = DepthProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord];
|
|
float TotalWeight = 1.0f;
|
|
|
|
float2 ProbeTexelCenter = float2(0.5, 0.5);
|
|
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / (float)RadianceProbeResolution;
|
|
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
|
|
|
|
int3 ProbeCoord = GetRadianceProbeCoord(TraceData.ProbeWorldCenter, TraceData.ClipmapIndex);
|
|
|
|
int3 Offsets[6];
|
|
Offsets[0] = int3(-1, 0, 0);
|
|
Offsets[1] = int3(1, 0, 0);
|
|
Offsets[2] = int3(0, -1, 0);
|
|
Offsets[3] = int3(0, 1, 0);
|
|
Offsets[4] = int3(0, 0, -1);
|
|
Offsets[5] = int3(0, 0, 1);
|
|
|
|
for (uint OffsetIndex = 0; OffsetIndex < 6; OffsetIndex++)
|
|
{
|
|
int3 NeighborCoord = ProbeCoord + Offsets[OffsetIndex];
|
|
|
|
if (all(NeighborCoord >= 0) && all(NeighborCoord < (int3)RadianceProbeClipmapResolution))
|
|
{
|
|
uint NeighborProbeIndex = GetProbeIndexFromIndirectionTexture(NeighborCoord, TraceData.ClipmapIndex);
|
|
|
|
if (NeighborProbeIndex != INVALID_PROBE_INDEX)
|
|
{
|
|
uint2 NeighborProbeAtlasBaseCoord = RadianceProbeResolution * uint2(NeighborProbeIndex & ProbeAtlasResolutionModuloMask, NeighborProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
float NeighborRadianceDepth = DepthProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord];
|
|
float3 NeighborWorldPosition = GetProbeWorldPosition(NeighborCoord, TraceData.ClipmapIndex, NeighborProbeIndex);
|
|
|
|
float OcclusionWeight = 1.0f;
|
|
|
|
// Test whether probe can see neighbor probe's ray starting point and if occluded then discard the neighbor radiance to reduce leaking.
|
|
// Need to offset starting point as all probe traces start after GetRadianceProbeTMin and there's no depth information in the region where probe TMin spheres intersect.
|
|
// That offset can't be also too large due to limited probe angular resolution making it pretty inaccurate at connecting paths at larger distances.
|
|
// Also run this test in reverse by checking whether neighbor probe can see probe's ray starting point, which improves chances of finding a thin wall between two probes.
|
|
float OcclusionTestOffset = 2.0f * GetRadianceProbeTMin(TraceData.ClipmapIndex);
|
|
|
|
// Probe to NeighborProbe's ray
|
|
{
|
|
float3 NeighborOcclusionTestPosition = NeighborWorldPosition + OcclusionTestOffset * WorldConeDirection;
|
|
float3 ToNeighborOcclusionPosition = NeighborOcclusionTestPosition - TraceData.ProbeWorldCenter;
|
|
uint2 ProbeTexelCoordForNeighborOcclusionPosition = InverseEquiAreaSphericalMapping(ToNeighborOcclusionPosition) * RadianceProbeResolution;
|
|
float ProbeDepthForNeighborOcclusionPosition = DepthProbeAtlasTexture[ProbeTexelCoordForNeighborOcclusionPosition + ProbeAtlasBaseCoord];
|
|
|
|
if (ProbeDepthForNeighborOcclusionPosition * ProbeDepthForNeighborOcclusionPosition < dot(ToNeighborOcclusionPosition, ToNeighborOcclusionPosition))
|
|
{
|
|
OcclusionWeight = 0.0f;
|
|
}
|
|
}
|
|
|
|
// NeighborProbe to Probe's ray
|
|
{
|
|
float3 OcclusionTestPosition = TraceData.ProbeWorldCenter + OcclusionTestOffset * WorldConeDirection;
|
|
float3 ToOcclusionPosition = OcclusionTestPosition - NeighborWorldPosition;
|
|
uint2 NeighborProbeTexelCoordForOcclusionPosition = InverseEquiAreaSphericalMapping(ToOcclusionPosition) * RadianceProbeResolution;
|
|
float NeighborProbeDepthForNeighborOcclusionPosition = DepthProbeAtlasTexture[NeighborProbeTexelCoordForOcclusionPosition + NeighborProbeAtlasBaseCoord];
|
|
|
|
if (NeighborProbeDepthForNeighborOcclusionPosition * NeighborProbeDepthForNeighborOcclusionPosition < dot(ToOcclusionPosition, ToOcclusionPosition))
|
|
{
|
|
OcclusionWeight = 0.0f;
|
|
}
|
|
}
|
|
|
|
// Clamp neighbor's hit distance to our own. This helps preserve contact shadows, as a long neighbor hit distance will cause a small NeighborAngle and bias toward distant lighting.
|
|
if (HitDistance >= 0)
|
|
{
|
|
NeighborRadianceDepth = min(NeighborRadianceDepth, HitDistance);
|
|
}
|
|
|
|
float3 NeighborHitPosition = NeighborWorldPosition + WorldConeDirection * NeighborRadianceDepth;
|
|
float3 ToNeighborHit = NeighborHitPosition - TraceData.ProbeWorldCenter;
|
|
float NeighborAngle = acosFast(dot(ToNeighborHit, WorldConeDirection) / length(ToNeighborHit));
|
|
float AngleWeight = 1.0f - saturate(NeighborAngle / SpatialFilterMaxRadianceHitAngle);
|
|
|
|
float Weight = AngleWeight * OcclusionWeight;
|
|
Lighting += RadianceProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord].xyz * Weight;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
SkyVisibility += SkyVisibilityProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord].x * Weight;
|
|
#endif
|
|
TotalWeight += Weight;
|
|
}
|
|
}
|
|
}
|
|
|
|
RWRadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord] = Lighting / TotalWeight;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWSkyVisibilityProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord] = SkyVisibility / TotalWeight;
|
|
#endif
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CalculateProbeIrradianceCS
|
|
|
|
RWTexture2D<float3> RWFinalIrradianceAtlas;
|
|
Buffer<float4> ProbeTraceData;
|
|
|
|
#define DOWNSAMPLED_RADIANCE_SIZE 8
|
|
groupshared float3 SharedDownsampledProbeRadiance[DOWNSAMPLED_RADIANCE_SIZE][DOWNSAMPLED_RADIANCE_SIZE];
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void CalculateProbeIrradianceCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ProbeTraceIndex = GroupId.z;
|
|
|
|
FProbeTraceData TraceData = GetProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
uint DownsampleFactor = RadianceProbeResolution / DOWNSAMPLED_RADIANCE_SIZE;
|
|
uint2 AtlasBaseCoord = RadianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
#define DOWNSAMPLE_TO_SHARED_MEMORY 1
|
|
#if DOWNSAMPLE_TO_SHARED_MEMORY
|
|
{
|
|
for (uint DestY = GroupThreadId.y; DestY < DOWNSAMPLED_RADIANCE_SIZE; DestY += THREADGROUP_SIZE)
|
|
{
|
|
for (uint DestX = GroupThreadId.x; DestX < DOWNSAMPLED_RADIANCE_SIZE; DestX += THREADGROUP_SIZE)
|
|
{
|
|
float3 Radiance = 0;
|
|
|
|
for (uint YOffset = 0; YOffset < DownsampleFactor; YOffset++)
|
|
{
|
|
for (uint XOffset = 0; XOffset < DownsampleFactor; XOffset++)
|
|
{
|
|
uint2 SourceTexelCoord = uint2(DestX * DownsampleFactor + XOffset, DestY * DownsampleFactor + YOffset);
|
|
float2 ProbeUV = (SourceTexelCoord + float2(.5f, .5f)) / (float)RadianceProbeResolution;
|
|
Radiance += RadianceProbeAtlasTexture.Load(uint3(AtlasBaseCoord + SourceTexelCoord, 0)).xyz;
|
|
}
|
|
}
|
|
|
|
SharedDownsampledProbeRadiance[DestY][DestX] = Radiance / DownsampleFactor / DownsampleFactor;
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
uint IrradianceBorderSize = 1;
|
|
uint OutputIrradianceProbeResolution = IrradianceProbeResolution + 2 * IrradianceBorderSize;
|
|
|
|
for (uint DestY = GroupThreadId.y; DestY < OutputIrradianceProbeResolution; DestY += THREADGROUP_SIZE)
|
|
{
|
|
for (uint DestX = GroupThreadId.x; DestX < OutputIrradianceProbeResolution; DestX += THREADGROUP_SIZE)
|
|
{
|
|
uint2 IrradianceProbeTexelCoord = OctahedralMapWrapBorder(uint2(DestX, DestY), OutputIrradianceProbeResolution, IrradianceBorderSize);
|
|
float2 IrradianceProbeUV = (IrradianceProbeTexelCoord + float2(0.5f, 0.5f)) / (float)IrradianceProbeResolution;
|
|
float3 IrradianceDirection = EquiAreaSphericalMapping(IrradianceProbeUV);
|
|
float3 Irradiance = 0;
|
|
float TotalWeight = 0;
|
|
|
|
uint SourceProbeResolution = DOWNSAMPLE_TO_SHARED_MEMORY ? DOWNSAMPLED_RADIANCE_SIZE : RadianceProbeResolution;
|
|
|
|
for (uint Y = 0; Y < SourceProbeResolution; Y++)
|
|
{
|
|
for (uint X = 0; X < SourceProbeResolution; X++)
|
|
{
|
|
float2 RadianceProbeUV = (float2(X, Y) + float2(0.5, 0.5)) / (float)SourceProbeResolution;
|
|
float3 RadianceDirection = EquiAreaSphericalMapping(RadianceProbeUV);
|
|
|
|
float NdotL = dot(IrradianceDirection, RadianceDirection);
|
|
|
|
if (NdotL > 0)
|
|
{
|
|
float SampleWeight = NdotL;
|
|
#if DOWNSAMPLE_TO_SHARED_MEMORY
|
|
float3 Radiance = SharedDownsampledProbeRadiance[Y][X];
|
|
#else
|
|
float3 Radiance = RadianceProbeAtlasTexture.Load(uint3(AtlasBaseCoord + uint2(X, Y), 0)).xyz;
|
|
#endif
|
|
|
|
Irradiance += Radiance * SampleWeight;
|
|
TotalWeight += SampleWeight;
|
|
}
|
|
}
|
|
}
|
|
|
|
Irradiance *= 1.0f / TotalWeight;
|
|
|
|
uint2 IrradianceAtlasCoord = uint2(DestX, DestY) + OutputIrradianceProbeResolution * uint2(TraceData.ProbeIndex & ProbeAtlasResolutionModuloMask, TraceData.ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
RWFinalIrradianceAtlas[IrradianceAtlasCoord] = Irradiance;
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef PrepareProbeOcclusionCS
|
|
|
|
RWTexture2D<float2> RWRadianceCacheProbeOcclusionAtlas;
|
|
Buffer<float4> ProbeTraceData;
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void PrepareProbeOcclusionCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ProbeTraceIndex = DispatchThreadId.z;
|
|
uint ProbeIndex = GetProbeIndexFromProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
|
|
uint2 FinalDepthTexelCoord = DispatchThreadId.xy;
|
|
uint FinalOcclusionProbeResolution = OcclusionProbeResolution + 2 * (1u << FinalRadianceAtlasMaxMip);
|
|
|
|
if (all(FinalDepthTexelCoord < FinalOcclusionProbeResolution))
|
|
{
|
|
uint2 ProbeDepthTexelCoord = OctahedralMapWrapBorder(FinalDepthTexelCoord, FinalOcclusionProbeResolution, 1u << FinalRadianceAtlasMaxMip);
|
|
uint2 AtlasBaseCoord = uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
uint DownsampleFactor = RadianceProbeResolution / OcclusionProbeResolution;
|
|
uint DepthKernel = DownsampleFactor * 1;
|
|
|
|
float AccumulatedDepth = 0;
|
|
float AccumulatedDepthSq = 0;
|
|
|
|
for (uint YOffset = 0; YOffset < DepthKernel; YOffset++)
|
|
{
|
|
for (uint XOffset = 0; XOffset < DepthKernel; XOffset++)
|
|
{
|
|
uint2 SourceTexelCoord = uint2(ProbeDepthTexelCoord.x * DepthKernel + XOffset, ProbeDepthTexelCoord.y * DepthKernel + YOffset) % RadianceProbeResolution;
|
|
uint2 AtlasCoord = SourceTexelCoord + RadianceProbeResolution * AtlasBaseCoord;
|
|
|
|
float Depth = DepthProbeAtlasTexture.Load(uint3(AtlasCoord, 0));
|
|
AccumulatedDepth += Depth;
|
|
AccumulatedDepthSq += Depth * Depth;
|
|
}
|
|
}
|
|
|
|
float Normalization = 1.0f / max(DepthKernel * DepthKernel, 1);
|
|
AccumulatedDepth *= Normalization;
|
|
AccumulatedDepthSq *= Normalization;
|
|
|
|
uint2 FinalAtlasCoord = FinalDepthTexelCoord + FinalOcclusionProbeResolution * AtlasBaseCoord;
|
|
RWRadianceCacheProbeOcclusionAtlas[FinalAtlasCoord] = float2(AccumulatedDepth, AccumulatedDepthSq);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FixupBordersAndGenerateMipsCS
|
|
|
|
RWTexture2D<float3> RWFinalRadianceAtlasMip0;
|
|
RWTexture2D<float3> RWFinalRadianceAtlasMip1;
|
|
RWTexture2D<float3> RWFinalRadianceAtlasMip2;
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
RWTexture2D<float> RWFinalSkyVisibilityAtlasMip0;
|
|
#endif
|
|
Buffer<float4> ProbeTraceData;
|
|
|
|
#if GENERATE_MIPS
|
|
groupshared float3 SharedLightingMip0[THREADGROUP_SIZE][THREADGROUP_SIZE];
|
|
groupshared float3 SharedLightingMip1[THREADGROUP_SIZE / 2][THREADGROUP_SIZE / 2];
|
|
#endif
|
|
|
|
#if GENERATE_MIPS && THREADGROUP_SIZE != 8
|
|
#error THREADGROUP_SIZE wrong size
|
|
#endif
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void FixupBordersAndGenerateMipsCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ProbeTraceIndex = DispatchThreadId.z;
|
|
uint ProbeIndex = GetProbeIndexFromProbeTraceData(ProbeTraceData[ProbeTraceIndex]);
|
|
uint2 ProbeCoord = uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
uint2 FinalProbeTexelCoord = DispatchThreadId.xy;
|
|
|
|
if (all(FinalProbeTexelCoord < FinalProbeResolution))
|
|
{
|
|
uint2 ProbeTexelCoord = OctahedralMapWrapBorder(FinalProbeTexelCoord, FinalProbeResolution, 1u << FinalRadianceAtlasMaxMip);
|
|
uint2 AtlasCoord = ProbeTexelCoord + RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
|
|
|
|
uint2 FinalAtlasCoord = FinalProbeTexelCoord + FinalProbeResolution * ProbeCoord;
|
|
|
|
float3 Lighting = RadianceProbeAtlasTexture.Load(uint3(AtlasCoord, 0)).xyz;
|
|
RWFinalRadianceAtlasMip0[FinalAtlasCoord] = Lighting;
|
|
|
|
#if RADIANCE_CACHE_SKY_VISIBILITY
|
|
float SkyVisibility = SkyVisibilityProbeAtlasTexture.Load(uint3(AtlasCoord, 0)).x;
|
|
RWFinalSkyVisibilityAtlasMip0[FinalAtlasCoord] = SkyVisibility;
|
|
#endif
|
|
|
|
#if GENERATE_MIPS
|
|
SharedLightingMip0[GroupThreadId.y][GroupThreadId.x] = Lighting;
|
|
#endif
|
|
}
|
|
|
|
#if GENERATE_MIPS
|
|
if (FinalRadianceAtlasMaxMip > 0)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint MipLevel = 1;
|
|
uint ParentMipSize = FinalProbeResolution >> (MipLevel - 1);
|
|
uint MipSize = FinalProbeResolution >> MipLevel;
|
|
uint ThreadgroupSizeForMip = (uint)THREADGROUP_SIZE >> MipLevel;
|
|
uint2 MipProbeTexelCoord = GroupThreadId.xy + ThreadgroupSizeForMip * GroupId.xy;
|
|
|
|
if (all(and(GroupThreadId.xy < ThreadgroupSizeForMip, MipProbeTexelCoord < MipSize)))
|
|
{
|
|
uint2 ParentProbeTexelCoordBase = MipProbeTexelCoord * 2;
|
|
float InvParentMipSize = 1.0f / ParentMipSize;
|
|
float2 ProbeUV00 = (ParentProbeTexelCoordBase + float2(0, 0) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV10 = (ParentProbeTexelCoordBase + float2(1, 0) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV01 = (ParentProbeTexelCoordBase + float2(0, 1) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV11 = (ParentProbeTexelCoordBase + float2(1, 1) + float2(.5f, .5f)) * InvParentMipSize;
|
|
|
|
float3 Lighting = 0;
|
|
Lighting += SharedLightingMip0[GroupThreadId.y * 2 + 0][GroupThreadId.x * 2 + 0];
|
|
Lighting += SharedLightingMip0[GroupThreadId.y * 2 + 0][GroupThreadId.x * 2 + 1];
|
|
Lighting += SharedLightingMip0[GroupThreadId.y * 2 + 1][GroupThreadId.x * 2 + 0];
|
|
Lighting += SharedLightingMip0[GroupThreadId.y * 2 + 1][GroupThreadId.x * 2 + 1];
|
|
|
|
Lighting /= 4;
|
|
|
|
SharedLightingMip1[GroupThreadId.y][GroupThreadId.x] = Lighting;
|
|
uint2 FinalAtlasCoord = MipProbeTexelCoord + MipSize * ProbeCoord;
|
|
RWFinalRadianceAtlasMip1[FinalAtlasCoord] = Lighting;
|
|
}
|
|
}
|
|
|
|
if (FinalRadianceAtlasMaxMip > 1)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint MipLevel = 2;
|
|
uint ParentMipSize = FinalProbeResolution >> (MipLevel - 1);
|
|
uint MipSize = FinalProbeResolution >> MipLevel;
|
|
uint ThreadgroupSizeForMip = (uint)THREADGROUP_SIZE >> MipLevel;
|
|
uint2 MipProbeTexelCoord = GroupThreadId.xy + ThreadgroupSizeForMip * GroupId.xy;
|
|
|
|
if (all(and(GroupThreadId.xy < ThreadgroupSizeForMip, MipProbeTexelCoord < MipSize)))
|
|
{
|
|
uint2 ParentProbeTexelCoordBase = MipProbeTexelCoord * 2;
|
|
float InvParentMipSize = 1.0f / ParentMipSize;
|
|
float2 ProbeUV00 = (ParentProbeTexelCoordBase + float2(0, 0) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV10 = (ParentProbeTexelCoordBase + float2(1, 0) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV01 = (ParentProbeTexelCoordBase + float2(0, 1) + float2(.5f, .5f)) * InvParentMipSize;
|
|
float2 ProbeUV11 = (ParentProbeTexelCoordBase + float2(1, 1) + float2(.5f, .5f)) * InvParentMipSize;
|
|
|
|
float3 Lighting = 0;
|
|
Lighting += SharedLightingMip1[GroupThreadId.y * 2 + 0][GroupThreadId.x * 2 + 0];
|
|
Lighting += SharedLightingMip1[GroupThreadId.y * 2 + 0][GroupThreadId.x * 2 + 1];
|
|
Lighting += SharedLightingMip1[GroupThreadId.y * 2 + 1][GroupThreadId.x * 2 + 0];
|
|
Lighting += SharedLightingMip1[GroupThreadId.y * 2 + 1][GroupThreadId.x * 2 + 1];
|
|
|
|
uint2 FinalAtlasCoord = MipProbeTexelCoord + MipSize * ProbeCoord;
|
|
RWFinalRadianceAtlasMip2[FinalAtlasCoord] = Lighting / 4;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#endif
|