Files
UnrealEngine/Engine/Shaders/Private/Lumen/LumenScreenProbeImportanceSampling.usf
2025-05-18 13:04:45 +08:00

747 lines
27 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "../Common.ush"
#include "LumenMaterial.ush"
#include "../DeferredShadingCommon.ush"
#include "LumenRadianceCacheCommon.ush"
#include "LumenScreenProbeCommon.ush"
#include "LumenScreenProbeTracingCommon.ush"
#include "../SHCommon.ush"
#include "LumenScreenProbeImportanceSamplingShared.ush"
#ifndef THREADGROUP_SIZE
#define THREADGROUP_SIZE 1
#endif
#ifndef LIGHTING_PDF_THREADGROUP_SIZE
#define LIGHTING_PDF_THREADGROUP_SIZE 1
#endif
#ifndef GENERATE_RAYS_THREADGROUP_SIZE
#define GENERATE_RAYS_THREADGROUP_SIZE 1
#endif
RWTexture2D<float> RWBRDFProbabilityDensityFunction;
RWBuffer<float> RWBRDFProbabilityDensityFunctionSH;
groupshared float4 PixelPlanes[PROBE_THREADGROUP_SIZE_2D][PROBE_THREADGROUP_SIZE_2D];
groupshared float PDF_SphericalHarmonic[PROBE_THREADGROUP_SIZE_2D * PROBE_THREADGROUP_SIZE_2D * 2][NUM_PDF_SH_COEFFICIENTS];
groupshared uint NumSphericalHarmonics;
FThreeBandSHVector GetGroupSharedSH(uint ThreadIndex)
{
FThreeBandSHVector BRDF;
BRDF.V0.x = PDF_SphericalHarmonic[ThreadIndex][0];
BRDF.V0.y = PDF_SphericalHarmonic[ThreadIndex][1];
BRDF.V0.z = PDF_SphericalHarmonic[ThreadIndex][2];
BRDF.V0.w = PDF_SphericalHarmonic[ThreadIndex][3];
BRDF.V1.x = PDF_SphericalHarmonic[ThreadIndex][4];
BRDF.V1.y = PDF_SphericalHarmonic[ThreadIndex][5];
BRDF.V1.z = PDF_SphericalHarmonic[ThreadIndex][6];
BRDF.V1.w = PDF_SphericalHarmonic[ThreadIndex][7];
BRDF.V2.x = PDF_SphericalHarmonic[ThreadIndex][8];
return BRDF;
}
void WriteGroupSharedSH(FThreeBandSHVector SH, uint ThreadIndex)
{
PDF_SphericalHarmonic[ThreadIndex][0] = SH.V0.x;
PDF_SphericalHarmonic[ThreadIndex][1] = SH.V0.y;
PDF_SphericalHarmonic[ThreadIndex][2] = SH.V0.z;
PDF_SphericalHarmonic[ThreadIndex][3] = SH.V0.w;
PDF_SphericalHarmonic[ThreadIndex][4] = SH.V1.x;
PDF_SphericalHarmonic[ThreadIndex][5] = SH.V1.y;
PDF_SphericalHarmonic[ThreadIndex][6] = SH.V1.z;
PDF_SphericalHarmonic[ThreadIndex][7] = SH.V1.w;
PDF_SphericalHarmonic[ThreadIndex][8] = SH.V2.x;
}
[numthreads(PROBE_THREADGROUP_SIZE_2D, PROBE_THREADGROUP_SIZE_2D, 1)]
void ScreenProbeComputeBRDFProbabilityDensityFunctionCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 ScreenProbeAtlasCoord = GroupId.xy;
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
{
float ProbeSceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
if (ProbeSceneDepth > 0)
{
#define BRDF_PDF_SPHERICAL_HARMONIC 1
#if BRDF_PDF_SPHERICAL_HARMONIC
uint ThreadIndex = GroupThreadId.y * PROBE_THREADGROUP_SIZE_2D + GroupThreadId.x;
if (ThreadIndex == 0)
{
NumSphericalHarmonics = 0;
}
GroupMemoryBarrierWithGroupSync();
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
{
float2 ThreadOffset = ((GroupThreadId.xy + 0.5f) / (float)PROBE_THREADGROUP_SIZE_2D * 2.0f - 1.0f) * ScreenProbeDownsampleFactor;
bool bCenterSample = all(GroupThreadId.xy == PROBE_THREADGROUP_SIZE_2D / 2);
if (bCenterSample)
{
// Make sure we have at least one pixel that won't be rejected by the depth weight
ThreadOffset = 0;
}
float2 PixelScreenUV = ScreenUV + ThreadOffset * View.BufferSizeAndInvSize.zw;
PixelScreenUV = clamp(PixelScreenUV, View.ViewRectMin.xy * View.BufferSizeAndInvSize.zw, (View.ViewRectMin.xy + View.ViewSizeAndInvSize.xy - 1) * View.BufferSizeAndInvSize.zw);
const uint2 PixelPos = PixelScreenUV * View.BufferSizeAndInvSize.xy;
const FLumenMaterialData Material = ReadMaterialData(PixelPos, PixelScreenUV);
float3 PixelPosition = GetWorldPositionFromScreenUV(PixelScreenUV, Material.SceneDepth);
float4 PixelPlane = float4(Material.WorldNormal, dot(Material.WorldNormal, PixelPosition));
float3 ProbeWorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ProbeSceneDepth);
float PlaneDistance = abs(dot(float4(ProbeWorldPosition, -1), PixelPlane));
float RelativeDepthDifference = PlaneDistance / ProbeSceneDepth;
float DepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));
if (DepthWeight > .1f || bCenterSample)
{
uint Index;
InterlockedAdd(NumSphericalHarmonics, 1, Index);
FThreeBandSHVector BRDF;
if (HasSphericalVisibility(Material))
{
// Avoid culling directions that the shading models will sample
BRDF = (FThreeBandSHVector)0;
BRDF.V0.x = 1.0f;
}
else
{
BRDF = CalcDiffuseTransferSH3(Material.WorldNormal, 1.0f);
}
WriteGroupSharedSH(BRDF, Index);
}
}
GroupMemoryBarrierWithGroupSync();
uint NumSHToAccumulate = NumSphericalHarmonics;
uint Offset = 0;
while (NumSHToAccumulate > 1)
{
uint ThreadBaseIndex = ThreadIndex * 4;
if (ThreadBaseIndex < NumSHToAccumulate)
{
FThreeBandSHVector PDF = GetGroupSharedSH(ThreadBaseIndex + Offset);
if (ThreadBaseIndex + 1 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 1 + Offset));
}
if (ThreadBaseIndex + 2 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 2 + Offset));
}
if (ThreadBaseIndex + 3 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 3 + Offset));
}
WriteGroupSharedSH(PDF, ThreadIndex + Offset + NumSHToAccumulate);
}
Offset += NumSHToAccumulate;
NumSHToAccumulate = (NumSHToAccumulate + 3) / 4;
GroupMemoryBarrierWithGroupSync();
}
if (ThreadIndex < NUM_PDF_SH_COEFFICIENTS)
{
uint WriteIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS + ThreadIndex;
float NormalizeWeight = 1.0f / (float)(NumSphericalHarmonics);
RWBRDFProbabilityDensityFunctionSH[WriteIndex] = PDF_SphericalHarmonic[Offset][ThreadIndex] * NormalizeWeight;
}
// 'vis Lumen.ScreenProbeGather.BRDFProbabilityDensityFunction uv1'
#define VISUALIZE_BRDF_PDF_SPHERICAL_HARMONIC 0
#if VISUALIZE_BRDF_PDF_SPHERICAL_HARMONIC
uint2 TexelCoord = GroupThreadId.xy;
if (all(TexelCoord < ScreenProbeBRDFOctahedronResolution))
{
FThreeBandSHVector BRDF;
float NormalizeWeight = 1.0f / (float)(NumSphericalHarmonics);
BRDF.V0.x = PDF_SphericalHarmonic[Offset][0] * NormalizeWeight;
BRDF.V0.y = PDF_SphericalHarmonic[Offset][1] * NormalizeWeight;
BRDF.V0.z = PDF_SphericalHarmonic[Offset][2] * NormalizeWeight;
BRDF.V0.w = PDF_SphericalHarmonic[Offset][3] * NormalizeWeight;
BRDF.V1.x = PDF_SphericalHarmonic[Offset][4] * NormalizeWeight;
BRDF.V1.y = PDF_SphericalHarmonic[Offset][5] * NormalizeWeight;
BRDF.V1.z = PDF_SphericalHarmonic[Offset][6] * NormalizeWeight;
BRDF.V1.w = PDF_SphericalHarmonic[Offset][7] * NormalizeWeight;
BRDF.V2.x = PDF_SphericalHarmonic[Offset][8] * NormalizeWeight;
float2 ProbeTexelCenter = float2(0.5, 0.5);
float2 ProbeUV = (TexelCoord + ProbeTexelCenter) / (float)ScreenProbeBRDFOctahedronResolution;
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
float PDF = max(DotSH3(BRDF, DirectionSH), 0);
RWBRDFProbabilityDensityFunction[ScreenProbeAtlasCoord * ScreenProbeBRDFOctahedronResolution + TexelCoord] = PDF;
}
#endif
#else
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
{
float2 ThreadOffset = ((GroupThreadId.xy + 0.5f) / (float)PROBE_THREADGROUP_SIZE_2D * 2.0f - 1.0f) * ScreenProbeDownsampleFactor;
if (all(GroupThreadId.xy == PROBE_THREADGROUP_SIZE_2D / 2))
{
// Make sure we have at least one pixel that won't be rejected by the depth weight
ThreadOffset = 0;
}
float2 PixelScreenUV = ScreenUV + ThreadOffset * View.BufferSizeAndInvSize.zw;
PixelScreenUV = clamp(PixelScreenUV, View.ViewRectMin.xy * View.BufferSizeAndInvSize.zw, (View.ViewRectMin.xy + View.ViewSizeAndInvSize.xy - 1) * View.BufferSizeAndInvSize.zw);
const uint2 PixelPos = PixelScreenUV * View.BufferSizeAndInvSize.xy;
const FLumenMaterialData Material = ReadMaterialData(PixelPos, PixelScreenUV);
float3 PixelPosition = GetWorldPositionFromScreenUV(PixelScreenUV, Material.SceneDepth);
float4 PixelPlane = float4(Material.WorldNormal, dot(Material.WorldNormal, PixelPosition));
PixelPlanes[GroupThreadId.x][GroupThreadId.y] = PixelPlane;
}
GroupMemoryBarrierWithGroupSync();
float3 ProbeWorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ProbeSceneDepth);
for (uint TexelCoordY = GroupThreadId.y; TexelCoordY < ScreenProbeBRDFOctahedronResolution; TexelCoordY += PROBE_THREADGROUP_SIZE_2D)
{
for (uint TexelCoordX = GroupThreadId.x; TexelCoordX < ScreenProbeBRDFOctahedronResolution; TexelCoordX += PROBE_THREADGROUP_SIZE_2D)
{
float2 PDFTexelCoord = float2(TexelCoordX, TexelCoordY);
float2 ProbeTexelCenter = float2(0.5, 0.5);
float2 ProbeUV = (PDFTexelCoord + ProbeTexelCenter) / (float)ScreenProbeBRDFOctahedronResolution;
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
float PDF = 0.0f;
for (uint Y = 0; Y < PROBE_THREADGROUP_SIZE_2D; Y++)
{
for (uint X = 0; X < PROBE_THREADGROUP_SIZE_2D; X++)
{
float4 PixelPlane = PixelPlanes[X][Y];
float PlaneDistance = abs(dot(float4(ProbeWorldPosition, -1), PixelPlane));
float RelativeDepthDifference = PlaneDistance / SceneDepth;
float DepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));
if (DepthWeight > .1f)
{
float Bias = 0.0f;
float NdotL = max((1.0f - Bias) * dot(WorldConeDirection, PixelPlane.xyz) + Bias, 0.0f);
//@todo - integrate over texel. BRDF is evaluated at lower resolution than final integral.
PDF = max(PDF, NdotL);
}
}
}
RWBRDFProbabilityDensityFunction[ScreenProbeAtlasCoord * ScreenProbeBRDFOctahedronResolution + uint2(TexelCoordX, TexelCoordY)] = PDF;
}
}
#endif
}
}
}
RWTexture2D<float> RWLightingProbabilityDensityFunction;
float4 ProbeHistoryScreenPositionScaleBias;
float4 ImportanceSamplingHistoryUVMinMax;
float ImportanceSamplingHistoryDistanceThreshold;
float PrevInvPreExposure;
Texture2D<uint> HistoryScreenProbeSceneDepth;
Texture2D<float3> HistoryScreenProbeRadiance;
Texture2D<float3> HistoryScreenProbeTranslatedWorldPosition;
groupshared float SharedPDF[LIGHTING_PDF_THREADGROUP_SIZE * LIGHTING_PDF_THREADGROUP_SIZE * 4];
[numthreads(LIGHTING_PDF_THREADGROUP_SIZE, LIGHTING_PDF_THREADGROUP_SIZE, 1)]
void ScreenProbeComputeLightingProbabilityDensityFunctionCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 ScreenProbeAtlasCoord = GroupId.xy;
uint2 ProbeTexelCoord = GroupThreadId.xy;
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
if (ScreenProbeIndex < GetNumScreenProbes()
&& ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x
&& SceneDepth > 0)
{
float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, SceneDepth);
float PDF = 0.0f;
if (all(ProbeTexelCoord < ScreenProbeTracingOctahedronResolution))
{
float3 Lighting = 0;
float Transparency = 1;
#if PROBE_RADIANCE_HISTORY
// Reproject into last frame's probe depth buffer
// Interpolate from neighboring probes last frame lighting, with position error weight
float2 ScreenPosition = (ScreenUV - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float3 HistoryScreenPosition = GetHistoryScreenPosition(ScreenPosition, ScreenUV, ConvertToDeviceZ(SceneDepth));
float2 HistoryScreenUV = HistoryScreenPosition.xy * ProbeHistoryScreenPositionScaleBias.xy + ProbeHistoryScreenPositionScaleBias.wz;
float EffectiveHistoryWeight = 1.0f;
FLATTEN
if (any(HistoryScreenUV > ImportanceSamplingHistoryUVMinMax.zw + .5f * View.BufferSizeAndInvSize.zw) || any(HistoryScreenUV < ImportanceSamplingHistoryUVMinMax.xy - .5f * View.BufferSizeAndInvSize.zw))
{
EffectiveHistoryWeight = 0.0f;
}
if (EffectiveHistoryWeight > 0.0f)
{
uint HistoryTemporalIndex = (FixedJitterIndex < 0 ? ((int)View.StateFrameIndexMod8 - 1) % 8 : FixedJitterIndex);
float2 UnclampedHistoryScreenProbeCoord = GetScreenTileCoordFromScreenUV(HistoryScreenUV, HistoryTemporalIndex);
float3 SceneNormal = GetScreenProbeNormal(ScreenProbeAtlasCoord);
float4 ScenePlane = float4(SceneNormal, dot(WorldPosition, SceneNormal));
float3 HistoryRadiance = 0;
float HistoryRadianceWeight = 0;
for (float Y = 0; Y < 2; Y++)
{
for (float X = 0; X < 2; X++)
{
uint2 NeighborHistoryScreenProbeCoord = clamp(UnclampedHistoryScreenProbeCoord + float2(X, Y), float2(0, 0), float2(ScreenProbeViewSize - 1));
float NeighborHistoryDepth = GetScreenProbeDepth(NeighborHistoryScreenProbeCoord, HistoryScreenProbeSceneDepth);
if (NeighborHistoryDepth >= 0)
{
float3 NeighborWorldPosition = HistoryScreenProbeTranslatedWorldPosition[NeighborHistoryScreenProbeCoord] - DFHackToFloat(PrimaryView.PrevPreViewTranslation);
float PlaneDistance = abs(dot(float4(NeighborWorldPosition, -1), ScenePlane));
float RelativeDepthDifference = PlaneDistance / SceneDepth;
float PositionWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference)) > .1f ? 1.0f : 0.0f;
float HistoryWeight = EffectiveHistoryWeight * PositionWeight;
uint2 HistoryRadianceCoord = NeighborHistoryScreenProbeCoord * ScreenProbeGatherOctahedronResolution + ProbeTexelCoord;
HistoryRadiance += HistoryScreenProbeRadiance.Load(int3(HistoryRadianceCoord, 0)).xyz * (PrevInvPreExposure * View.PreExposure * HistoryWeight);
HistoryRadianceWeight += HistoryWeight;
}
}
}
if (HistoryRadianceWeight > 0)
{
Lighting = HistoryRadiance / HistoryRadianceWeight;
}
Transparency = 1.0f - saturate(HistoryRadianceWeight / 4.0f);
}
#endif
uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);
float2 ProbeTexelCenter = GetProbeTexelCenter(ScreenTileCoord);
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / float(ScreenProbeTracingOctahedronResolution);
if (Transparency > 0.0f)
{
#if RADIANCE_CACHE
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
float ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(ScreenProbeTracingOctahedronResolution * ScreenProbeTracingOctahedronResolution));
FRadianceCacheCoverage Coverage = GetRadianceCacheCoverage(WorldPosition, WorldConeDirection, 0);
if (Coverage.bValid)
{
Lighting += SampleRadianceCacheInterpolated(Coverage, WorldPosition, WorldConeDirection, ConeHalfAngle, /*RandomScalarForStochasticInterpolation*/ 0.5f).Radiance * Transparency;
}
else
#endif
{
//@todo - skylight
Lighting = 1;
}
}
PDF = Luminance(Lighting);
SharedPDF[GroupThreadId.y * ScreenProbeTracingOctahedronResolution + GroupThreadId.x] = PDF;
}
GroupMemoryBarrierWithGroupSync();
uint ThreadIndex = GroupThreadId.y * ScreenProbeTracingOctahedronResolution + GroupThreadId.x;
uint NumValuesToAccumulate = ScreenProbeTracingOctahedronResolution * ScreenProbeTracingOctahedronResolution;
uint Offset = 0;
while (NumValuesToAccumulate > 1)
{
uint ThreadBaseIndex = ThreadIndex * 4;
if (ThreadBaseIndex < NumValuesToAccumulate)
{
float LocalPDF = SharedPDF[ThreadBaseIndex + Offset];
if (ThreadBaseIndex + 1 < NumValuesToAccumulate)
{
LocalPDF += SharedPDF[ThreadBaseIndex + 1 + Offset];
}
if (ThreadBaseIndex + 2 < NumValuesToAccumulate)
{
LocalPDF += SharedPDF[ThreadBaseIndex + 2 + Offset];
}
if (ThreadBaseIndex + 3 < NumValuesToAccumulate)
{
LocalPDF += SharedPDF[ThreadBaseIndex + 3 + Offset];
}
SharedPDF[ThreadIndex + Offset + NumValuesToAccumulate] = LocalPDF;
}
Offset += NumValuesToAccumulate;
NumValuesToAccumulate = (NumValuesToAccumulate + 3) / 4;
GroupMemoryBarrierWithGroupSync();
}
if (all(ProbeTexelCoord < ScreenProbeTracingOctahedronResolution))
{
float PDFSum = SharedPDF[Offset];
RWLightingProbabilityDensityFunction[ScreenProbeAtlasCoord * ScreenProbeTracingOctahedronResolution + ProbeTexelCoord] = PDF / max(PDFSum, 0.0001f);
}
}
}
// Stores a RayInfo indirection entry for each ray to trace of a Screen Probe, used by tracing passes
RWTexture2D<uint> RWStructuredImportanceSampledRayInfosForTracing;
Texture2D<float> BRDFProbabilityDensityFunction;
Texture2D<float> LightingProbabilityDensityFunction;
float MinPDFToTrace;
#define GENERATE_RAYS_NUM_TOTAL_THREADS (GENERATE_RAYS_THREADGROUP_SIZE * GENERATE_RAYS_THREADGROUP_SIZE)
groupshared uint2 RaysToRefine[GENERATE_RAYS_NUM_TOTAL_THREADS * 2];
groupshared uint NumRaysToSubdivide;
uint2 PackRaySortInfo(uint2 TexelCoord, uint Level, float PDF)
{
return uint2((TexelCoord.x & 0xFF) | ((TexelCoord.y & 0xFF) << 8) | ((Level & 0xFF) << 16), asuint(PDF));
}
void UnpackRaySortInfo(uint2 RaySortInfo, out uint2 TexelCoord, out uint Level, out float PDF)
{
TexelCoord.x = RaySortInfo.x & 0xFF;
TexelCoord.y = (RaySortInfo.x >> 8) & 0xFF;
Level = (RaySortInfo.x >> 16) & 0xFF;
PDF = asfloat(RaySortInfo.y);
}
uint Compute1dIndex(uint2 Coord, uint2 Size)
{
return Coord.y * Size.x + Coord.x;
}
void WriteRay(uint2 ScreenProbeCoord, uint2 TracingRayCoord, uint2 SourceTexelCoord, uint SourceLevel)
{
if (TracingRayCoord.x != INVALID_TRACING_COORD)
{
uint2 TraceBufferCoord = GetTraceBufferCoord(ScreenProbeCoord, TracingRayCoord);
RWStructuredImportanceSampledRayInfosForTracing[TraceBufferCoord] = PackRayInfo(SourceTexelCoord, SourceLevel);
}
}
uint SubdivideRaysSerialReference(uint2 GroupThreadId)
{
// Note: Set r.Lumen.ScreenProbeGather.ImportanceSample.NumLevels=3 when using reference to allow multi-level subdivision
uint NumRaysToTrace = ScreenProbeTracingOctahedronResolution * ScreenProbeTracingOctahedronResolution;
uint ThreadIndex = GroupThreadId.y * ScreenProbeTracingOctahedronResolution + GroupThreadId.x;
uint MaxPasses = NumRaysToTrace;
if (ThreadIndex == 0)
{
for (uint PassIndex = 0; PassIndex < MaxPasses; PassIndex++)
{
float HighestPDF = 0.0f;
uint RayIndexToRefine = NumRaysToTrace;
uint LowPDFRayIndex0 = NumRaysToTrace;
uint LowPDFRayIndex1 = NumRaysToTrace;
uint LowPDFRayIndex2 = NumRaysToTrace;
// Find the ray with the highest PDF, and 3 rays that can be culled
for (uint RayIndex = 0; RayIndex < NumRaysToTrace; RayIndex++)
{
uint2 RayTexelCoord;
uint RayLevel;
float PDF;
UnpackRaySortInfo(RaysToRefine[RayIndex], RayTexelCoord, RayLevel, PDF);
if (PDF > HighestPDF && PDF >= MinPDFToTrace && RayLevel > 0)
{
HighestPDF = PDF;
RayIndexToRefine = RayIndex;
}
if (PDF < MinPDFToTrace)
{
if (LowPDFRayIndex0 == NumRaysToTrace)
{
LowPDFRayIndex0 = RayIndex;
}
else if (LowPDFRayIndex1 == NumRaysToTrace)
{
LowPDFRayIndex1 = RayIndex;
}
else if (LowPDFRayIndex2 == NumRaysToTrace)
{
LowPDFRayIndex2 = RayIndex;
}
}
}
// Subdivide the highest PDF ray
if (LowPDFRayIndex2 < NumRaysToTrace && HighestPDF >= MinPDFToTrace)
{
uint2 OriginalRayTexelCoord;
uint OriginalRayLevel;
float OriginalPDF;
UnpackRaySortInfo(RaysToRefine[RayIndexToRefine], OriginalRayTexelCoord, OriginalRayLevel, OriginalPDF);
float NewPDF = max(OriginalPDF / 4.0f, MinPDFToTrace);
RaysToRefine[RayIndexToRefine] = PackRaySortInfo(OriginalRayTexelCoord * 2 + uint2(0, 0), OriginalRayLevel - 1, NewPDF);
RaysToRefine[LowPDFRayIndex0] = PackRaySortInfo(OriginalRayTexelCoord * 2 + uint2(1, 0), OriginalRayLevel - 1, NewPDF);
RaysToRefine[LowPDFRayIndex1] = PackRaySortInfo(OriginalRayTexelCoord * 2 + uint2(0, 1), OriginalRayLevel - 1, NewPDF);
RaysToRefine[LowPDFRayIndex2] = PackRaySortInfo(OriginalRayTexelCoord * 2 + uint2(1, 1), OriginalRayLevel - 1, NewPDF);
}
else
{
break;
}
}
}
return 0;
}
[numthreads(GENERATE_RAYS_THREADGROUP_SIZE, GENERATE_RAYS_THREADGROUP_SIZE, 1)]
void ScreenProbeGenerateRaysCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint UniformLevel = firstbithigh(MaxImportanceSamplingOctahedronResolution / ScreenProbeTracingOctahedronResolution);
uint2 ScreenProbeAtlasCoord = GroupId.xy;
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
if (ScreenProbeIndex < GetNumScreenProbes()
&& ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x
&& SceneDepth > 0)
{
#define UNIFORM_RAY_DISTRIBUTION_DEBUG 0
#if UNIFORM_RAY_DISTRIBUTION_DEBUG
uint MipSize = MaxImportanceSamplingOctahedronResolution >> UniformLevel;
uint2 TexelCoord = GroupThreadId.xy;
if (all(TexelCoord < MipSize))
{
WriteRay(ScreenProbeAtlasCoord, TexelCoord, TexelCoord, UniformLevel);
}
#else
{
uint2 TexelCoord = GroupThreadId.xy;
uint MipSize = ScreenProbeTracingOctahedronResolution;
if (all(TexelCoord < MipSize))
{
#if BRDF_PDF_SPHERICAL_HARMONIC
uint SHBaseIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS;
FThreeBandSHVector BRDF;
BRDF.V0.x = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 0];
BRDF.V0.y = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 1];
BRDF.V0.z = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 2];
BRDF.V0.w = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 3];
BRDF.V1.x = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 4];
BRDF.V1.y = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 5];
BRDF.V1.z = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 6];
BRDF.V1.w = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 7];
BRDF.V2.x = BRDFProbabilityDensityFunctionSH[SHBaseIndex + 8];
float2 ProbeUV = (TexelCoord + float2(.5f, .5f)) / float(MipSize);
float3 WorldConeDirection = EquiAreaSphericalMapping(ProbeUV);
FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
float PDF = max(DotSH3(BRDF, DirectionSH), 0);
#else
uint PDFUpsampleFactor = MipSize / ScreenProbeBRDFOctahedronResolution;
float PDF = BRDFProbabilityDensityFunction.Load(int3(ScreenProbeAtlasCoord * ScreenProbeBRDFOctahedronResolution + TexelCoord / PDFUpsampleFactor, 0));
#endif
#if IMPORTANCE_SAMPLE_LIGHTING
float LightingPDF = LightingProbabilityDensityFunction.Load(int3(ScreenProbeAtlasCoord * MipSize + TexelCoord, 0));
bool bNotCulledByBRDF = PDF >= MinPDFToTrace;
float LightingPDFScale = LightingPDF * (MipSize * MipSize);
PDF *= LightingPDFScale;
// If this ray was not culled by the BRDF, make sure it doesn't get culled by the lack of incoming lighting
if (bNotCulledByBRDF)
{
PDF = max(PDF, MinPDFToTrace);
}
#endif
RaysToRefine[TexelCoord.y * MipSize + TexelCoord.x] = PackRaySortInfo(TexelCoord, UniformLevel, PDF);
}
}
GroupMemoryBarrierWithGroupSync();
uint ThreadIndex = GroupThreadId.y * ScreenProbeTracingOctahedronResolution + GroupThreadId.x;
uint NumRaysToTrace = ScreenProbeTracingOctahedronResolution * ScreenProbeTracingOctahedronResolution;
#define SERIAL_REFERENCE_DEBUG 0
#if SERIAL_REFERENCE_DEBUG
uint SortedOffset = SubdivideRaysSerialReference(GroupThreadId.xy);
#else
uint SortedOffset = ScreenProbeTracingOctahedronResolution * ScreenProbeTracingOctahedronResolution;
uint RayIndex = ThreadIndex;
// Sort the rays by ascending PDF
// O(N^2) simple parallel sort
if (RayIndex < NumRaysToTrace)
{
uint2 RayTexelCoord;
uint RayLevel;
float SortKey;
UnpackRaySortInfo(RaysToRefine[RayIndex], RayTexelCoord, RayLevel, SortKey);
uint NumSmaller = 0;
// Count how many items have a smaller key, so we can insert ourselves into the correct position, without requiring interaction between threads
for (uint OtherRayIndex = 0; OtherRayIndex < NumRaysToTrace; OtherRayIndex++)
{
uint2 OtherRayTexelCoord;
uint OtherRayLevel;
float OtherSortKey;
UnpackRaySortInfo(RaysToRefine[OtherRayIndex], OtherRayTexelCoord, OtherRayLevel, OtherSortKey);
if (OtherSortKey < SortKey
// Provide sort stability and resolve collisions based on unsorted array index
|| (OtherSortKey == SortKey && OtherRayIndex > RayIndex))
{
NumSmaller++;
}
}
// Move this entry into its sorted position
RaysToRefine[NumSmaller + SortedOffset] = RaysToRefine[RayIndex];
}
NumRaysToSubdivide = 0;
GroupMemoryBarrierWithGroupSync();
if (UniformLevel > 0)
{
// For each 3 rays with PDF < threshold, refine matching high PDF ray
uint MergeThreadIndex = ThreadIndex % 3;
uint MergeIndex = ThreadIndex / 3;
uint RayIndexToRefine = max((int)NumRaysToTrace - (int)MergeIndex - 1, 0);
uint RayIndexToMerge2 = MergeIndex * 3 + 2;
// Only continue for threads which have a valid matching ray to refine
if (RayIndexToMerge2 < RayIndexToRefine)
{
uint2 RayTexelCoord2;
uint RayLevel2;
float PDF2;
UnpackRaySortInfo(RaysToRefine[SortedOffset + RayIndexToMerge2], RayTexelCoord2, RayLevel2, PDF2);
// Only continue if our thread is part of a packet of 3 which are all below the threshold to be traced
if (PDF2 < MinPDFToTrace)
{
// Fetch the properties of the ray we will subdivide
uint2 OriginalRayTexelCoord;
uint OriginalRayLevel;
float OriginalPDF;
UnpackRaySortInfo(RaysToRefine[SortedOffset + RayIndexToRefine], OriginalRayTexelCoord, OriginalRayLevel, OriginalPDF);
// Reassign this ray to the new subdivided texel
RaysToRefine[SortedOffset + ThreadIndex] = PackRaySortInfo(OriginalRayTexelCoord * 2 + uint2((MergeThreadIndex + 1) % 2, (MergeThreadIndex + 1) / 2), OriginalRayLevel - 1, 0.0f);
if (MergeThreadIndex == 0)
{
// Queue overwriting the ray we chose to subdivide after a group sync
InterlockedAdd(NumRaysToSubdivide, 1);
}
}
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < NumRaysToSubdivide)
{
uint RayIndexToSubdivide = NumRaysToTrace - ThreadIndex - 1;
uint2 OriginalRayTexelCoord;
uint OriginalRayLevel;
float OriginalPDF;
UnpackRaySortInfo(RaysToRefine[SortedOffset + RayIndexToSubdivide], OriginalRayTexelCoord, OriginalRayLevel, OriginalPDF);
RaysToRefine[SortedOffset + RayIndexToSubdivide] = PackRaySortInfo(OriginalRayTexelCoord * 2, OriginalRayLevel - 1, 0.0f);
}
}
#endif
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < NumRaysToTrace)
{
uint2 RayTexelCoord;
uint RayLevel;
float RayPDF;
UnpackRaySortInfo(RaysToRefine[SortedOffset + ThreadIndex], RayTexelCoord, RayLevel, RayPDF);
uint2 RayCoord = uint2(ThreadIndex % ScreenProbeTracingOctahedronResolution, ThreadIndex / ScreenProbeTracingOctahedronResolution);
WriteRay(ScreenProbeAtlasCoord, RayCoord, RayTexelCoord, RayLevel);
}
#endif
}
}