1309 lines
48 KiB
HLSL
1309 lines
48 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "SSDDefinitions.ush"
|
|
|
|
|
|
//------------------------------------------------------- ENUM VALUES
|
|
|
|
/** Different possible stage for spatial accumulation. Matches */
|
|
#define STAGE_RECONSTRUCTION 0
|
|
#define STAGE_PRE_CONVOLUTION 1
|
|
#define STAGE_REJECTION_PRE_CONVOLUTION 2
|
|
#define STAGE_POST_FILTERING 3
|
|
#define STAGE_FINAL_OUTPUT 4
|
|
|
|
/** Policy to use to change the size of kernel. */
|
|
#define SAMPLE_COUNT_POLICY_DISABLED 0
|
|
#define SAMPLE_COUNT_POLICY_SAMPLE_ACCUMULATION_BASED 4
|
|
|
|
/** What signal should be outputed. */
|
|
// Only output the sum of the signal 0.
|
|
#define OUTPUT_MODE_SUM 0
|
|
// Only output the sum of the momment 1 & 2 of the signal 0.
|
|
#define OUTPUT_MODE_2MOMMENT_SUM 1
|
|
|
|
// Output the result of descending ring bucketing.
|
|
#define OUTPUT_MODE_DRB 2
|
|
|
|
|
|
//------------------------------------------------------- CONFIGS
|
|
|
|
#define TILE_PIXEL_SIZE 8
|
|
|
|
#define CONFIG_SIGNAL_PROCESSING DIM_SIGNAL_PROCESSING
|
|
#define CONFIG_UPSCALE DIM_UPSCALE
|
|
#define CONFIG_SIGNAL_BATCH_SIZE DIM_SIGNAL_BATCH_SIZE
|
|
|
|
|
|
// Configures all the pass for each individual signals.
|
|
#if CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SHADOW_VISIBILITY_MASK
|
|
#define MAX_SIGNAL_BATCH_SIZE CONFIG_SIGNAL_BATCH_SIZE
|
|
#define SIGNAL_ARRAY_SIZE CONFIG_SIGNAL_BATCH_SIZE
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_MONOCHROMATIC_PENUMBRA
|
|
#define CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN 1
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_INJESTION_NSPP
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_UINT2
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
#define CONFIG_INPUT_TEXTURE_COUNT ((CONFIG_SIGNAL_BATCH_SIZE + 1) / 2)
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_PRECOMPUTED_BLURING_RADIUS
|
|
#define CONFIG_MAX_WITH_REF_DISTANCE 1
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
|
|
#if DIM_SIGNAL_BATCH_SIZE > 1
|
|
#define CONFIG_CLAMP_UV_PER_SIGNAL 1
|
|
#endif
|
|
|
|
#elif DIM_STAGE == STAGE_PRE_CONVOLUTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_HEXAWEB
|
|
#define CONFIG_CUSTOM_SPREAD_FACTOR 1
|
|
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_PRECOMPUTED_BLURING_RADIUS
|
|
#define CONFIG_MAX_WITH_REF_DISTANCE 1
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
|
|
#elif DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
|
|
#elif DIM_STAGE == STAGE_POST_FILTERING
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
//#define CONFIG_SAMPLE_SUBSET 1
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_PRECOMPUTED_BLURING_RADIUS
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
|
|
#elif DIM_STAGE == STAGE_FINAL_OUTPUT
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_PENUMBRA_HISTORY
|
|
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT CONFIG_SIGNAL_BATCH_SIZE
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_1X1
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
// Compress the DRB accumulator to have lower VGPR footprint.
|
|
#if defined(CONFIG_OUTPUT_MODE) && CONFIG_OUTPUT_MODE == OUTPUT_MODE_DRB
|
|
// Looks like shader compilers completly give up.
|
|
// #define CONFIG_ACCUMULATOR_VGPR_COMPRESSION ACCUMULATOR_COMPRESSION_PENUMBRA_DRB
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_POLYCHROMATIC_PENUMBRA_HARMONIC
|
|
// Denoise diffuse and specular harmonics at the same time.
|
|
#define MAX_SIGNAL_BATCH_SIZE 2
|
|
#define SIGNAL_ARRAY_SIZE 2
|
|
#undef CONFIG_SIGNAL_BATCH_SIZE
|
|
#define CONFIG_SIGNAL_BATCH_SIZE 2
|
|
|
|
// Each harmonic requires input and output RGB.
|
|
#define COMPILE_SIGNAL_COLOR_ARRAY 2
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_POLYCHROMATIC_PENUMBRA
|
|
#define CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN 1
|
|
|
|
// Any world distance depends on the harmonic being processed.
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_HARMONIC
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_POLYCHROMATIC_PENUMBRA_HARMONIC_INPUT
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_POLYCHROMATIC_PENUMBRA_HARMONIC_RECONSTRUCTION
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 4
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 4
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
//#define CONFIG_MAX_WITH_REF_DISTANCE 1
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_REFLECTIONS
|
|
// Denoise only specular.
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR 1
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_REFLECTIONS
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION || DIM_STAGE == STAGE_PRE_CONVOLUTION
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
// Input and output layout.
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_REFLECTIONS_INPUT
|
|
#else
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_REFLECTIONS_HISTORY
|
|
#endif
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_REFLECTIONS_HISTORY
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 2
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_DIRECTIONAL_ELLIPSE
|
|
|
|
// Do color accumulation with karis weighting to avoid flickering specular highlight to show up the kernel pattern.
|
|
// TODO(Denoiser): This is a bit agressive.
|
|
#define CONFIG_ACCUMULATION_COLOR_SPACE (COLOR_SPACE_RGB | COLOR_SPACE_KARIS_WEIGHTING)
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_AO
|
|
// Denoise only AO.
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_INPUT
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 1
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_AO
|
|
|
|
#elif DIM_STAGE == STAGE_PRE_CONVOLUTION
|
|
#define SIGNAL_ARRAY_SIZE 1 // first and second momment to measure variance in temporal accumulation.
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 1
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 1
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_HEXAWEB
|
|
#define CONFIG_CUSTOM_SPREAD_FACTOR 1
|
|
|
|
//#define CONFIG_MAX_WITH_REF_DISTANCE 1
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_AO
|
|
|
|
#elif DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
#define SIGNAL_ARRAY_SIZE 2 // first and second momment to measure variance in temporal accumulation.
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_REJECTION
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 1
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 1
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_AO_HISTORY
|
|
|
|
#elif DIM_STAGE == STAGE_POST_FILTERING
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_AO_HISTORY
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 1
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 1
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_HEXAWEB
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_DRB
|
|
#define CONFIG_CUSTOM_SPREAD_FACTOR 1
|
|
|
|
#define CONFIG_SAMPLE_COUNT_POLICY SAMPLE_COUNT_POLICY_SAMPLE_ACCUMULATION_BASED
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_AO
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_INDIRECT_AND_AO
|
|
// Denoise diffuse and AO, but AO is FSSDSignalSample::MissCount.
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR 1
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_DIFFUSE
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 2
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_INPUT_NSPP
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_RECONSTRUCTION
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_HIT_DISTANCE
|
|
|
|
#elif DIM_STAGE == STAGE_PRE_CONVOLUTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_RECONSTRUCTION
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_RECONSTRUCTION
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_HEXAWEB
|
|
#define CONFIG_CUSTOM_SPREAD_FACTOR 1
|
|
//#define CONFIG_MAX_WITH_REF_DISTANCE 1
|
|
|
|
#elif DIM_STAGE == STAGE_POST_FILTERING
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_HISTORY
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_AND_AO_HISTORY
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define CONFIG_SAMPLE_SUBSET 1
|
|
|
|
#define CONFIG_SAMPLE_COUNT_POLICY SAMPLE_COUNT_POLICY_SAMPLE_ACCUMULATION_BASED
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_SPHERICAL_HARMONIC && 0
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR_SH 1
|
|
|
|
// Given it's a spherical harmonic that store directionality, only need position based rejection.
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_SPHERICAL_HARMONIC
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 4
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 4
|
|
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_MIN_METADATA
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_HARMONIC
|
|
#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_UINT2
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_INDIRECT_HARMONIC
|
|
#define CONFIG_SIGNAL_OUTPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_UINT2
|
|
|
|
// Spherical harmonics are a lot of data, need to shrink VGPR pressure to improve lattency hidding when fetching the buffer.
|
|
// TODO(Denoiser): some shader compiler completly falls apparts with the current implementation of
|
|
// CONFIG_SIGNAL_VGPR_COMPRESSION and actually drops in occupency.
|
|
#define CONFIG_SIGNAL_VGPR_COMPRESSION SIGNAL_COMPRESSION_DIFFUSE_INDIRECT_HARMONIC
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
|
|
#elif DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_SPHERICAL_HARMONIC && 1
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR_SH 1
|
|
|
|
// Given it's a spherical harmonic that store directionality, only need position based rejection.
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_MIN_METADATA
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_SPHERICAL_HARMONIC
|
|
//#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_DISABLED
|
|
#define CONFIG_BILATERAL_DISTANCE_MULTIPLIER 6.0
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 2
|
|
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_LUMEN_DIFFUSE_INPUT
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_LUMEN_DIFFUSE_HISTORY
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
//#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_RAW_EXPERIMENTAL_KERNEL
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SSGI
|
|
// Denoise diffuse and AO, but AO is FSSDSignalSample::MissCount.
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR 1
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_DIFFUSE
|
|
//#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_DISABLED
|
|
|
|
// SSGI doesn't have any bilateral distance computed from hitT, so allow to blur spatially by about the size of the kernel.
|
|
#define CONFIG_BILATERAL_DISTANCE_MULTIPLIER 3.0
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 2
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_SSGI_INPUT
|
|
//#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_UINT2
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_SSGI_HISTORY_R11G11B10
|
|
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
//#define CONFIG_SAMPLE_SET SAMPLE_SET_1X1
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#elif CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_PROBE_HIERARCHY
|
|
// Denoise diffuse and AO, but AO is FSSDSignalSample::MissCount.
|
|
#define MAX_SIGNAL_BATCH_SIZE 1
|
|
#define SIGNAL_ARRAY_SIZE 1
|
|
#define CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN 1
|
|
|
|
#define COMPILE_SIGNAL_COLOR_ARRAY 2
|
|
|
|
#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_PROBE_HIERARCHY
|
|
//#define CONFIG_BILATERAL_PRESET BILATERAL_PRESET_DISABLED
|
|
|
|
// SSGI doesn't have any bilateral distance computed from hitT, so allow to blur spatially by about the size of the kernel.
|
|
#define CONFIG_BILATERAL_DISTANCE_MULTIPLIER 3.0
|
|
|
|
#define CONFIG_INPUT_TEXTURE_COUNT 2
|
|
#define CONFIG_OUTPUT_TEXTURE_COUNT 2
|
|
|
|
#if DIM_STAGE == STAGE_RECONSTRUCTION
|
|
// Input and output layout.
|
|
#define CONFIG_SIGNAL_INPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_PROBE_HIERARCHY_INPUT
|
|
//#define CONFIG_SIGNAL_INPUT_TEXTURE_TYPE SIGNAL_TEXTURE_TYPE_UINT2
|
|
#define CONFIG_SIGNAL_OUTPUT_LAYOUT SIGNAL_BUFFER_LAYOUT_DIFFUSE_PROBE_HIERARCHY_HISTORY
|
|
|
|
//#define CONFIG_SAMPLE_SET SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_1X1
|
|
|
|
#else
|
|
#error Unexpected stage.
|
|
#endif
|
|
|
|
#else
|
|
#error Unknown signal processing.
|
|
#endif
|
|
|
|
// Configures pass regardless of the signals.
|
|
#if DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
#define CONFIG_SAMPLE_SET SAMPLE_SET_3X3_PLUS
|
|
|
|
// Normalize the input, because want to measure the spatial variance regardless of how many samples where used to reconstruct the signal.
|
|
#define CONFIG_NORMALIZE_INPUT 1
|
|
|
|
// Output the 2 momment because history rejection is varaiance based, and may flicker with momment 2 loss since the pre
|
|
// convolution will reduce the variance of momment 1.
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_2MOMMENT_SUM
|
|
|
|
#endif
|
|
|
|
|
|
// No previous frame reprojection, save VGPR.
|
|
//#define CONFIG_NEIGHBOR_TO_REF_COMPUTATION NEIGHBOR_TO_REF_LOWEST_VGPR_PRESSURE
|
|
|
|
|
|
//------------------------------------------------------- CONFIG DISABLED DEFAULTS
|
|
|
|
/** Whether should clamp the UV individually per texture. */
|
|
#ifndef CONFIG_CLAMP_UV_PER_SIGNAL
|
|
#define CONFIG_CLAMP_UV_PER_SIGNAL 0
|
|
#endif
|
|
|
|
/** Changes the logic controling the number of sample to do. */
|
|
#ifndef CONFIG_SAMPLE_COUNT_POLICY
|
|
#define CONFIG_SAMPLE_COUNT_POLICY SAMPLE_COUNT_POLICY_DISABLED
|
|
#endif
|
|
|
|
/** Selects a subset of sample of a given CONFIG_SAMPLE_SET */
|
|
#ifndef CONFIG_SAMPLE_SUBSET
|
|
#define CONFIG_SAMPLE_SUBSET 0
|
|
#endif
|
|
|
|
/** Whether the ray tracing input may needs to be upscale to the view's resolution. */
|
|
#ifndef CONFIG_UPSCALE
|
|
#define CONFIG_UPSCALE 0
|
|
#endif
|
|
|
|
/** Color space of the input signal. */
|
|
#ifndef CONFIG_INPUT_COLOR_SPACE
|
|
#define CONFIG_INPUT_COLOR_SPACE STANDARD_BUFFER_COLOR_SPACE
|
|
#endif
|
|
|
|
/** Color space to use for the accumulation. */
|
|
#ifndef CONFIG_ACCUMULATION_COLOR_SPACE
|
|
#define CONFIG_ACCUMULATION_COLOR_SPACE STANDARD_BUFFER_COLOR_SPACE
|
|
#endif
|
|
|
|
/** Color space to output in the signal. */
|
|
#ifndef CONFIG_OUTPUT_COLOR_SPACE
|
|
#define CONFIG_OUTPUT_COLOR_SPACE STANDARD_BUFFER_COLOR_SPACE
|
|
#endif
|
|
|
|
/** Removes the highest color. */
|
|
#ifndef CONFIG_REJECT_HIGHEST_COLOR
|
|
#define CONFIG_REJECT_HIGHEST_COLOR 0
|
|
#endif
|
|
|
|
/** Whether the input signal should be normalized. */
|
|
#ifndef CONFIG_NORMALIZE_INPUT
|
|
#define CONFIG_NORMALIZE_INPUT 0
|
|
#endif
|
|
|
|
/** The oupput mode that should be use. */
|
|
#ifndef CONFIG_OUTPUT_MODE
|
|
#define CONFIG_OUTPUT_MODE OUTPUT_MODE_SUM
|
|
#endif
|
|
|
|
/** The number of signal that should be processed per signal domain. */
|
|
#ifndef CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN
|
|
#define CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN SIGNAL_ARRAY_SIZE
|
|
#endif
|
|
|
|
/** Selects how the world distance should be computed for bilateral rejection. */
|
|
#ifndef CONFIG_BILATERAL_DISTANCE_COMPUTATION
|
|
#define CONFIG_BILATERAL_DISTANCE_COMPUTATION SIGNAL_WORLD_FREQUENCY_MIN_METADATA
|
|
#endif
|
|
|
|
/** Adds a multiplier on how the distance should be computed. */
|
|
#ifndef CONFIG_BILATERAL_DISTANCE_MULTIPLIER
|
|
#define CONFIG_BILATERAL_DISTANCE_MULTIPLIER 1.0
|
|
#endif
|
|
|
|
/** Whether neighbor bilateral distance should be maxed with reference one. */
|
|
#ifndef CONFIG_MAX_WITH_REF_DISTANCE
|
|
#define CONFIG_MAX_WITH_REF_DISTANCE 0
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- COMPILATION CONFIGURATION
|
|
|
|
// Choose kernel to compile.
|
|
#if CONFIG_SAMPLE_SET == SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#define COMPILE_STACKOWIAK_KERNEL 1
|
|
#elif CONFIG_SAMPLE_SET == SAMPLE_SET_HEXAWEB
|
|
#define COMPILE_DISK_KERNEL 1
|
|
#elif CONFIG_SAMPLE_SET == SAMPLE_SET_DIRECTIONAL_RECT || CONFIG_SAMPLE_SET == SAMPLE_SET_DIRECTIONAL_ELLIPSE
|
|
#define COMPILE_DIRECTIONAL_KERNEL 1
|
|
#elif CONFIG_SAMPLE_SET == SAMPLE_SET_RAW_EXPERIMENTAL_KERNEL
|
|
#define COMPILE_RAW_EXPERIMENTAL_KERNEL 1
|
|
#else
|
|
#define COMPILE_BOX_KERNEL 1
|
|
#endif
|
|
|
|
// Choose accumulators to compile.
|
|
#if CONFIG_OUTPUT_MODE == OUTPUT_MODE_DRB
|
|
#define COMPILE_DRB_ACCUMULATOR 1
|
|
#define COMPILE_MIN_FREQUENCY_ACCUMULATOR 1
|
|
|
|
#elif CONFIG_OUTPUT_MODE == OUTPUT_MODE_2MOMMENT_SUM
|
|
#define COMPILE_MOMENT1_ACCUMULATOR 1
|
|
#define COMPILE_MOMENT2_ACCUMULATOR 1
|
|
|
|
#elif CONFIG_OUTPUT_MODE == OUTPUT_MODE_SUM
|
|
#define COMPILE_MOMENT1_ACCUMULATOR 1
|
|
#define COMPILE_MIN_FREQUENCY_ACCUMULATOR 1
|
|
|
|
#else
|
|
#error Unknown output mode.
|
|
#endif
|
|
|
|
//------------------------------------------------------- INCLUDES
|
|
|
|
#include "SSDSignalFramework.ush"
|
|
#include "SSDSignalArray.ush"
|
|
#include "SSDSpatialKernel.ush"
|
|
|
|
|
|
//------------------------------------------------------- LATE CONFIG DEFAULTS
|
|
|
|
/** Choose how the reference metadata should be compressed. */
|
|
#ifndef CONFIG_REF_METADATA_COMPRESSION
|
|
#define CONFIG_REF_METADATA_COMPRESSION CONFIG_METADATA_BUFFER_LAYOUT
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- PARAMETERS
|
|
|
|
uint MaxSampleCount;
|
|
uint PreviousCumulativeMaxSampleCount;
|
|
uint UpscaleFactor;
|
|
|
|
#if !CONFIG_UPSCALE && CONFIG_CUSTOM_SPREAD_FACTOR
|
|
float KernelSpreadFactor;
|
|
#endif
|
|
|
|
float HarmonicPeriode;
|
|
|
|
float4 InputBufferUVMinMax[CONFIG_SIGNAL_BATCH_SIZE];
|
|
|
|
|
|
#if !defined(CONFIG_INPUT_TEXTURE_COUNT)
|
|
#error Missing CONFIG_INPUT_TEXTURE_COUNT
|
|
#endif
|
|
|
|
FSSDTexture2D SignalInput_Textures_0;
|
|
FSSDTexture2D SignalInputUint_Textures_0;
|
|
|
|
#if CONFIG_INPUT_TEXTURE_COUNT > 1
|
|
FSSDTexture2D SignalInput_Textures_1;
|
|
FSSDTexture2D SignalInputUint_Textures_1;
|
|
#else
|
|
#define SignalInput_Textures_1 SignalInput_Textures_0
|
|
#define SignalInputUint_Textures_1 SignalInputUint_Textures_0
|
|
#endif
|
|
|
|
#if CONFIG_INPUT_TEXTURE_COUNT > 2
|
|
FSSDTexture2D SignalInput_Textures_2;
|
|
FSSDTexture2D SignalInputUint_Textures_2;
|
|
#else
|
|
#define SignalInput_Textures_2 SignalInput_Textures_0
|
|
#define SignalInputUint_Textures_2 SignalInputUint_Textures_0
|
|
#endif
|
|
|
|
#if CONFIG_INPUT_TEXTURE_COUNT > 3
|
|
FSSDTexture2D SignalInput_Textures_3;
|
|
FSSDTexture2D SignalInputUint_Textures_3;
|
|
#else
|
|
#define SignalInput_Textures_3 SignalInput_Textures_0
|
|
#define SignalInputUint_Textures_3 SignalInputUint_Textures_0
|
|
#endif
|
|
|
|
|
|
#if !defined(CONFIG_OUTPUT_TEXTURE_COUNT)
|
|
#error Missing CONFIG_OUTPUT_TEXTURE_COUNT
|
|
#endif
|
|
|
|
FSSDRWTexture2D SignalOutput_UAVs_0;
|
|
|
|
#if CONFIG_OUTPUT_TEXTURE_COUNT > 1
|
|
FSSDRWTexture2D SignalOutput_UAVs_1;
|
|
#else
|
|
#define SignalOutput_UAVs_1 SignalOutput_UAVs_0
|
|
#endif
|
|
|
|
#if CONFIG_OUTPUT_TEXTURE_COUNT > 2
|
|
FSSDRWTexture2D SignalOutput_UAVs_2;
|
|
#else
|
|
#define SignalOutput_UAVs_2 SignalOutput_UAVs_0
|
|
#endif
|
|
|
|
#if CONFIG_OUTPUT_TEXTURE_COUNT > 3
|
|
FSSDRWTexture2D SignalOutput_UAVs_3;
|
|
#else
|
|
#define SignalOutput_UAVs_3 SignalOutput_UAVs_0
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- FUNCTIONS
|
|
|
|
// TODO(Denoiser): duplicated with reflection code.
|
|
uint2 GetPixelCoord(uint2 DispatchThreadId)
|
|
{
|
|
uint UpscaleFactorPow2 = UpscaleFactor * UpscaleFactor;
|
|
|
|
// TODO(Denoiser): find a way to not interfer with TAA's jittering.
|
|
uint SubPixelId = View.StateFrameIndex & (UpscaleFactorPow2 - 1);
|
|
|
|
return DispatchThreadId * UpscaleFactor + uint2(SubPixelId & (UpscaleFactor - 1), SubPixelId / UpscaleFactor);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------- ENTRY POINTS
|
|
|
|
[numthreads(TILE_PIXEL_SIZE, TILE_PIXEL_SIZE, 1)]
|
|
void MainCS(
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 GroupThreadId : SV_GroupThreadID,
|
|
uint GroupThreadIndex : SV_GroupIndex)
|
|
{
|
|
#if CONFIG_SIGNAL_INPUT_TEXTURE_TYPE == SIGNAL_TEXTURE_TYPE_FLOAT4
|
|
Texture2D Signal_Textures_0 = SignalInput_Textures_0;
|
|
Texture2D Signal_Textures_1 = SignalInput_Textures_1;
|
|
Texture2D Signal_Textures_2 = SignalInput_Textures_2;
|
|
Texture2D Signal_Textures_3 = SignalInput_Textures_3;
|
|
#else
|
|
FSSDTexture2D Signal_Textures_0 = SignalInput_Textures_0;
|
|
FSSDTexture2D Signal_Textures_1 = SignalInput_Textures_1;
|
|
FSSDTexture2D Signal_Textures_2 = SignalInput_Textures_2;
|
|
FSSDTexture2D Signal_Textures_3 = SignalInput_Textures_3;
|
|
#endif
|
|
|
|
// Find out scene buffer UV.
|
|
float2 SceneBufferUV = DispatchThreadId * ThreadIdToBufferUV.xy + ThreadIdToBufferUV.zw;
|
|
if (true)
|
|
{
|
|
SceneBufferUV = clamp(SceneBufferUV, DenoiserBufferBilinearUVMinMax.xy, DenoiserBufferBilinearUVMinMax.zw);
|
|
}
|
|
|
|
// Read reference meta data.
|
|
FSSDCompressedSceneInfos CompressedRefSceneMetadata;
|
|
FSSDSampleSceneInfos RefSceneMetadata;
|
|
{
|
|
CompressedRefSceneMetadata = SampleCompressedSceneMetadata(
|
|
/* bPrevFrame = */ false,
|
|
SceneBufferUV, BufferUVToBufferPixelCoord(SceneBufferUV));
|
|
|
|
float2 ScreenPosition = DenoiserBufferUVToScreenPosition(SceneBufferUV);
|
|
|
|
RefSceneMetadata = UncompressSampleSceneInfo(
|
|
CONFIG_METADATA_BUFFER_LAYOUT, /* bPrevFrame = */ false,
|
|
ScreenPosition, CompressedRefSceneMetadata);
|
|
}
|
|
|
|
// Sample the reference sample.
|
|
#if !CONFIG_UPSCALE || 1
|
|
FSSDSignalArray RefSamples;
|
|
FSSDSignalFrequencyArray RefFrequencies;
|
|
SampleMultiplexedSignals(
|
|
Signal_Textures_0,
|
|
Signal_Textures_1,
|
|
Signal_Textures_2,
|
|
Signal_Textures_3,
|
|
GlobalPointClampedSampler,
|
|
CONFIG_SIGNAL_INPUT_LAYOUT,
|
|
/* MultiplexedSampleId = */ 0,
|
|
/* bNormalizeSample = */ CONFIG_NORMALIZE_INPUT != 0,
|
|
SceneBufferUV,
|
|
/* out */ RefSamples,
|
|
/* out */ RefFrequencies);
|
|
|
|
#if CONFIG_NORMALIZE_INPUT
|
|
FSSDSignalArray NormalizedRefSamples = RefSamples;
|
|
#else
|
|
// TODO(Denoiser): Decode twice instead.
|
|
FSSDSignalArray NormalizedRefSamples = NormalizeToOneSampleArray(RefSamples);
|
|
#endif
|
|
#endif
|
|
|
|
//DebugOutput[DispatchThreadId] = float4(GetWorldNormal(RefSceneMetadata)* 0.5 + 0.5, GetWorldDepth(RefSceneMetadata));
|
|
|
|
/** factor by witch should be spread out. */
|
|
#if CONFIG_UPSCALE
|
|
float KernelSpreadFactor = UpscaleFactor;
|
|
#elif !CONFIG_CUSTOM_SPREAD_FACTOR
|
|
const float KernelSpreadFactor = 1;
|
|
#endif
|
|
|
|
/** Find out the number of samples that should be done. */
|
|
float RequestedSampleCount = 1024;
|
|
|
|
#if CONFIG_SAMPLE_SET == SAMPLE_SET_NONE
|
|
RequestedSampleCount = 1;
|
|
#elif CONFIG_SAMPLE_COUNT_POLICY == SAMPLE_COUNT_POLICY_DISABLED
|
|
// NOP
|
|
#elif CONFIG_SAMPLE_COUNT_POLICY == SAMPLE_COUNT_POLICY_SAMPLE_ACCUMULATION_BASED
|
|
{
|
|
#if CONFIG_SIGNAL_BATCH_SIZE != 1
|
|
#error Unable to support more than one signal.
|
|
#endif
|
|
RequestedSampleCount = clamp(TARGETED_SAMPLE_COUNT / RefSamples.Array[0].SampleCount, 1, MaxSampleCount);
|
|
}
|
|
#else
|
|
#error Unknown policy to control the number of samples.
|
|
#endif
|
|
|
|
// Register renaming of members of FSSDKernelConfig to survive until the output to UAV
|
|
#if (CONFIG_SAMPLE_SET == SAMPLE_SET_STACKOWIAK_4_SETS) && CONFIG_VGPR_OPTIMIZATION
|
|
float2 KernelBufferUV;
|
|
uint SampleTrackId;
|
|
#endif
|
|
|
|
// Accumulate spatially the input.
|
|
FSSDSignalAccumulatorArray SignalAccumulators;
|
|
{
|
|
FSSDKernelConfig KernelConfig = CreateKernelConfig();
|
|
|
|
#if DEBUG_OUTPUT
|
|
{
|
|
KernelConfig.DebugPixelPosition = DispatchThreadId;
|
|
KernelConfig.DebugEventCounter = 0;
|
|
}
|
|
#endif
|
|
|
|
// Compile time.
|
|
KernelConfig.SampleSet = CONFIG_SAMPLE_SET;
|
|
KernelConfig.SampleSubSetId = CONFIG_SAMPLE_SUBSET;
|
|
KernelConfig.BufferLayout = CONFIG_SIGNAL_INPUT_LAYOUT;
|
|
KernelConfig.MultiplexedSignalsPerSignalDomain = CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN;
|
|
KernelConfig.NeighborToRefComputation = NEIGHBOR_TO_REF_LOWEST_VGPR_PRESSURE;
|
|
KernelConfig.bUnroll = CONFIG_SAMPLE_SET != SAMPLE_SET_STACKOWIAK_4_SETS;
|
|
KernelConfig.bDescOrder = CONFIG_OUTPUT_MODE == OUTPUT_MODE_DRB;
|
|
KernelConfig.BilateralDistanceComputation = CONFIG_BILATERAL_DISTANCE_COMPUTATION;
|
|
KernelConfig.WorldBluringDistanceMultiplier = CONFIG_BILATERAL_DISTANCE_MULTIPLIER;
|
|
KernelConfig.bNormalizeSample = CONFIG_NORMALIZE_INPUT != 0;
|
|
KernelConfig.bSampleKernelCenter = CONFIG_UPSCALE;
|
|
KernelConfig.bForceKernelCenterAccumulation = true;
|
|
KernelConfig.bClampUVPerMultiplexedSignal = CONFIG_CLAMP_UV_PER_SIGNAL != 0;
|
|
|
|
// Reconstruct the spherical harmonic when reconstructing from 1spp.
|
|
KernelConfig.bComputeSampleColorSH = DIM_STAGE == STAGE_RECONSTRUCTION && DIM_MULTI_SPP == 0;
|
|
|
|
{
|
|
UNROLL_N(SIGNAL_ARRAY_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < SIGNAL_ARRAY_SIZE; MultiplexId++)
|
|
{
|
|
KernelConfig.BufferColorSpace[MultiplexId] = CONFIG_INPUT_COLOR_SPACE;
|
|
KernelConfig.AccumulatorColorSpace[MultiplexId] = CONFIG_ACCUMULATION_COLOR_SPACE;
|
|
}
|
|
}
|
|
|
|
SetBilateralPreset(CONFIG_BILATERAL_PRESET, /* inout */ KernelConfig);
|
|
|
|
// SGPRs
|
|
KernelConfig.BufferSizeAndInvSize = DenoiserBufferSizeAndInvSize;
|
|
KernelConfig.BufferBilinearUVMinMax = DenoiserBufferBilinearUVMinMax;
|
|
KernelConfig.KernelSpreadFactor = KernelSpreadFactor;
|
|
KernelConfig.HarmonicPeriode = HarmonicPeriode;
|
|
|
|
#if CONFIG_CLAMP_UV_PER_SIGNAL
|
|
{
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint BatchedSignalId = 0; BatchedSignalId < CONFIG_SIGNAL_BATCH_SIZE; BatchedSignalId++)
|
|
{
|
|
uint MultiplexId = BatchedSignalId / CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN;
|
|
KernelConfig.PerSignalUVMinMax[MultiplexId] = InputBufferUVMinMax[MultiplexId];
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// VGPRs
|
|
KernelConfig.BufferUV = SceneBufferUV;
|
|
{
|
|
#if CONFIG_REF_METADATA_COMPRESSION == CONFIG_METADATA_BUFFER_LAYOUT
|
|
// Straight up plumb down the compress layout to save any VALU.
|
|
KernelConfig.CompressedRefSceneMetadata = CompressedRefSceneMetadata;
|
|
#else
|
|
// Recompress the reference scene metadata
|
|
KernelConfig.CompressedRefSceneMetadata = CompressSampleSceneInfo(CONFIG_REF_METADATA_COMPRESSION, RefSceneMetadata);
|
|
#endif
|
|
KernelConfig.RefBufferUV = SceneBufferUV;
|
|
KernelConfig.RefSceneMetadataLayout = CONFIG_REF_METADATA_COMPRESSION;
|
|
}
|
|
KernelConfig.HammersleySeed = Rand3DPCG16(int3(SceneBufferUV * BufferUVToOutputPixelPosition, View.StateFrameIndexMod8)).xy;
|
|
|
|
// Set up reference distance for all signals.
|
|
#if CONFIG_MAX_WITH_REF_DISTANCE
|
|
{
|
|
KernelConfig.bMaxWithRefBilateralDistance = true;
|
|
|
|
UNROLL_N(SIGNAL_ARRAY_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < SIGNAL_ARRAY_SIZE; MultiplexId++)
|
|
{
|
|
if (KernelConfig.BilateralDistanceComputation == SIGNAL_WORLD_FREQUENCY_PRECOMPUTED_BLURING_RADIUS)
|
|
{
|
|
KernelConfig.RefBilateralDistance[MultiplexId] = RefFrequencies.Array[MultiplexId].WorldBluringRadius;
|
|
}
|
|
else if (KernelConfig.BilateralDistanceComputation == SIGNAL_WORLD_FREQUENCY_HIT_DISTANCE)
|
|
{
|
|
KernelConfig.RefBilateralDistance[MultiplexId] = RefFrequencies.Array[MultiplexId].ClosestHitDistance;
|
|
}
|
|
else
|
|
{
|
|
const uint BatchedSignalId = ComputeSignalBatchIdFromSignalMultiplexId(KernelConfig, MultiplexId);
|
|
FSSDSignalDomainKnowledge DomainKnowledge = GetSignalDomainKnowledge(BatchedSignalId);
|
|
|
|
KernelConfig.RefBilateralDistance[MultiplexId] = GetSignalWorldBluringRadius(RefFrequencies.Array[MultiplexId], RefSceneMetadata, DomainKnowledge);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// When doing history rejection preconvolution may have invalid ref sample, in witch case need to force take neighborhood to have a clamping box.
|
|
#if DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION && CONFIG_UPSCALE
|
|
{
|
|
KernelConfig.bForceAllAccumulation = RefSamples.Array[0].SampleCount == 0;
|
|
KernelConfig.SampleSet = SAMPLE_SET_3X3_PLUS;
|
|
}
|
|
#endif
|
|
|
|
#if CONFIG_SAMPLE_SET == SAMPLE_SET_HEXAWEB
|
|
{
|
|
KernelConfig.RingCount = 1;
|
|
|
|
// TODO(Denoiser): could be improved.
|
|
//KernelConfig.bMinSamplePairInvFrequency = true;
|
|
|
|
float2 E = float2(
|
|
InterleavedGradientNoise(DispatchThreadId, 0),
|
|
InterleavedGradientNoise(DispatchThreadId, 1));
|
|
|
|
// Add a bit of jittering to hide low sample.
|
|
KernelConfig.bSampleKernelCenter = false;
|
|
KernelConfig.BufferUV += View.ViewSizeAndInvSize.zw * (E - 0.5) * (KernelConfig.KernelSpreadFactor);
|
|
}
|
|
#endif
|
|
|
|
FSSDSignalAccumulatorArray UncompressedAccumulators = CreateSignalAccumulatorArray();
|
|
|
|
// When not upscaling, manually force accumulate the sample of the kernel.
|
|
if (!KernelConfig.bSampleKernelCenter && !KernelConfig.bDescOrder)
|
|
{
|
|
UNROLL_N(SIGNAL_ARRAY_SIZE)
|
|
for (uint SignalMultiplexId = 0; SignalMultiplexId < SIGNAL_ARRAY_SIZE; SignalMultiplexId++)
|
|
{
|
|
const uint BatchedSignalId = ComputeSignalBatchIdFromSignalMultiplexId(KernelConfig, SignalMultiplexId);
|
|
FSSDSignalDomainKnowledge DomainKnowledge = GetSignalDomainKnowledge(BatchedSignalId);
|
|
|
|
uint2 RefPixelCoord = floor(KernelConfig.BufferUV * KernelConfig.BufferSizeAndInvSize.xy);
|
|
FSSDSignalSample CenterSample = TransformSignalSampleForAccumulation(
|
|
KernelConfig,
|
|
SignalMultiplexId,
|
|
RefSceneMetadata,
|
|
RefSamples.Array[SignalMultiplexId],
|
|
RefPixelCoord);
|
|
|
|
FSSDSampleAccumulationInfos SampleInfos;
|
|
SampleInfos.Sample = CenterSample;
|
|
SampleInfos.Frequency = RefFrequencies.Array[SignalMultiplexId];
|
|
SampleInfos.FinalWeight = 1.0;
|
|
SampleInfos.InvFrequency = GetSignalWorldBluringRadius(SampleInfos.Frequency, RefSceneMetadata, DomainKnowledge);
|
|
|
|
if (KernelConfig.BilateralDistanceComputation == SIGNAL_WORLD_FREQUENCY_PRECOMPUTED_BLURING_RADIUS)
|
|
{
|
|
SampleInfos.InvFrequency = SampleInfos.Frequency.WorldBluringRadius;
|
|
}
|
|
|
|
AccumulateSample(
|
|
/* inout */ UncompressedAccumulators.Array[SignalMultiplexId],
|
|
SampleInfos);
|
|
}
|
|
}
|
|
|
|
#if CONFIG_SAMPLE_SET == SAMPLE_SET_STACKOWIAK_4_SETS
|
|
{
|
|
KernelConfig.SampleCount = clamp(uint(RequestedSampleCount) / kStackowiakSampleSetCount, 1, MaxSampleCount);
|
|
|
|
#if CONFIG_UPSCALE
|
|
{
|
|
// TODO(Denoiser): could be optimised, but currently reusing same peace of code as reflection for maintainability.
|
|
uint2 RayDispatchThreadId = (DispatchThreadId - UpscaleFactor / 2) / UpscaleFactor;
|
|
uint2 ClosestRayPixelCoord = GetPixelCoord(RayDispatchThreadId);
|
|
|
|
uint RaySubPixelId = View.StateFrameIndex & (UpscaleFactor * UpscaleFactor - 1);
|
|
|
|
KernelConfig.BufferUV = ((ViewportMin + ClosestRayPixelCoord + (0.5 * KernelSpreadFactor + 0.5))) * KernelConfig.BufferSizeAndInvSize.zw;
|
|
|
|
// Sample the center of the kernel by comparing it against the RefSceneMetadata, since it may no match.
|
|
KernelConfig.bSampleKernelCenter = true;
|
|
|
|
// Id of the pixel in the quad.
|
|
KernelConfig.SampleTrackId = ((DispatchThreadId.x & 1) | ((DispatchThreadId.y & 1) << 1)) ^ 0x3;
|
|
|
|
// To avoid precision problem when comparing potentially identicall
|
|
KernelConfig.bForceKernelCenterAccumulation = RaySubPixelId == ((DispatchThreadId.x & 1) | ((DispatchThreadId.y & 1) << 1));
|
|
}
|
|
#else
|
|
{
|
|
// Put the kernel center at the center of the quad. Half pixel shift is done in the sample offsets.
|
|
KernelConfig.BufferUV = float2(DispatchThreadId | 1) * ThreadIdToBufferUV.xy + ThreadIdToBufferUV.zw;
|
|
|
|
// Id of the pixel in the quad. This is to match hard coded first samples of the sample set.
|
|
KernelConfig.SampleTrackId = ((DispatchThreadId.x & 1) | ((DispatchThreadId.y & 1) << 1));
|
|
}
|
|
#endif
|
|
|
|
#if CONFIG_VGPR_OPTIMIZATION
|
|
// Keek sample SampleTrackId & SceneBufferUV arround for computation of pixel output coordinate.
|
|
// Should be VGPR free given it's curernt is being used in accumulation has well that is highest VGPR pressure of the shader.
|
|
// TODO(Denoiser): could save 1 VGPR by using 2 SGPR instead of SampleTrackId.
|
|
SampleTrackId = KernelConfig.SampleTrackId;
|
|
KernelBufferUV = KernelConfig.BufferUV;
|
|
#endif
|
|
}
|
|
#elif CONFIG_SAMPLE_SET == SAMPLE_SET_DIRECTIONAL_RECT || CONFIG_SAMPLE_SET == SAMPLE_SET_DIRECTIONAL_ELLIPSE
|
|
{
|
|
#if CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_REFLECTIONS
|
|
{
|
|
const float TargetSamplePerPixel = 0.25;
|
|
const float MinimalPixelRadius = 0.5 * rsqrt(2.0);
|
|
|
|
// Project GGX lobe into screen space.
|
|
float2 NormalizedScreenMajorAxis;
|
|
float InifinityMajorViewportRadius;
|
|
float InifinityMinorViewportRadius;
|
|
ProjectSpecularLobeToScreenSpace(
|
|
RefSceneMetadata,
|
|
/* out */ NormalizedScreenMajorAxis,
|
|
/* out */ InifinityMajorViewportRadius,
|
|
/* out */ InifinityMinorViewportRadius);
|
|
|
|
float ConfusionFactor = saturate(RefFrequencies.Array[0].ConfusionFactor);
|
|
float AspectRatio = InifinityMinorViewportRadius / InifinityMajorViewportRadius;
|
|
|
|
float PreviousMaxPixelDiameter = sqrt(rcp(TargetSamplePerPixel) * PreviousCumulativeMaxSampleCount / AspectRatio);
|
|
float MaxPixelDiameter = sqrt(rcp(TargetSamplePerPixel) * MaxSampleCount * PreviousCumulativeMaxSampleCount / AspectRatio);
|
|
|
|
KernelConfig.MajorAxis = NormalizedScreenMajorAxis * float2(1, -1);
|
|
KernelConfig.MajorPixelRadius = InifinityMajorViewportRadius * ConfusionFactor * View.ViewSizeAndInvSize.x - PreviousMaxPixelDiameter;
|
|
|
|
float MaxPixelRadius = 0.5 * MaxPixelDiameter;
|
|
|
|
KernelConfig.MajorPixelRadius = clamp(KernelConfig.MajorPixelRadius, 0, MaxPixelRadius);
|
|
KernelConfig.MinorPixelRadius = AspectRatio * KernelConfig.MajorPixelRadius;
|
|
|
|
// *4 to multiply from radii area to diameters area.
|
|
float ConvolutionArea = 4.0 * max(KernelConfig.MajorPixelRadius, MinimalPixelRadius) * max(KernelConfig.MinorPixelRadius, MinimalPixelRadius);
|
|
|
|
KernelConfig.SampleCount = clamp(ConvolutionArea * TargetSamplePerPixel * rcp(PreviousCumulativeMaxSampleCount), 0, MaxSampleCount);
|
|
|
|
#if 0
|
|
{
|
|
DebugOutput[DispatchThreadId] = float4(
|
|
KernelConfig.SampleCount,
|
|
KernelConfig.MajorPixelRadius,
|
|
KernelConfig.MinorPixelRadius,
|
|
ConfusionFactor);
|
|
}
|
|
#elif 0
|
|
{
|
|
// DebugOutput[DispatchThreadId] = float4(
|
|
// KernelConfig.SampleCount,
|
|
// InifinityMajorViewportRadius,
|
|
// InifinityMinorViewportRadius,
|
|
// AspectRatio);
|
|
}
|
|
#elif 0
|
|
{
|
|
DebugOutput[DispatchThreadId] = float4(
|
|
//GetWorldNormal(RefSceneMetadata) * 0.5 + 0.5,
|
|
abs(GetTranslatedWorldPosition(RefSceneMetadata) * 0.001),
|
|
RefSceneMetadata.WorldDepth);
|
|
}
|
|
#endif
|
|
|
|
// DebugOutput[DispatchThreadId] = float4(
|
|
// KernelConfig.SampleCount,
|
|
// KernelConfig.MajorPixelRadius,
|
|
// KernelConfig.MinorPixelRadius,
|
|
// OutOfFocus);
|
|
}
|
|
#else
|
|
#error Directional rect sample set is not supported.
|
|
#endif
|
|
}
|
|
#endif // CONFIG_SAMPLE_SET == SAMPLE_SET_DIRECTIONAL_*
|
|
|
|
FSSDCompressedSignalAccumulatorArray CompressedAccumulators = CompressAccumulatorArray(UncompressedAccumulators, CONFIG_ACCUMULATOR_VGPR_COMPRESSION);
|
|
|
|
|
|
// Performance: skip pixels/regions where the center sample is invalid (SampleCount = 0) for virtual shadow maps
|
|
#if CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_VIRTUAL_SHADOW_MAP_MASK
|
|
bool bRefHasSamples = false;
|
|
for (uint SignalMultiplexId = 0; SignalMultiplexId < SIGNAL_ARRAY_SIZE; SignalMultiplexId++)
|
|
{
|
|
bRefHasSamples = bRefHasSamples || (RefSamples.Array[SignalMultiplexId].SampleCount > 0);
|
|
}
|
|
BRANCH
|
|
if (bRefHasSamples)
|
|
#endif
|
|
{
|
|
AccumulateKernel(
|
|
KernelConfig,
|
|
Signal_Textures_0,
|
|
Signal_Textures_1,
|
|
Signal_Textures_2,
|
|
Signal_Textures_3,
|
|
/* inout */ UncompressedAccumulators,
|
|
/* inout */ CompressedAccumulators);
|
|
}
|
|
|
|
// When doing history rejection pre convolution, could still have no information found with the 3x3 + kernel,
|
|
// therefore dynamically complete to form an entire 3x3 convolution.
|
|
#if DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
{
|
|
BRANCH
|
|
if (KernelConfig.SampleSet == SAMPLE_SET_3X3_PLUS &&
|
|
KernelConfig.bForceAllAccumulation)
|
|
{
|
|
KernelConfig.SampleSet = SAMPLE_SET_3X3_CROSS;
|
|
KernelConfig.bSampleKernelCenter = false;
|
|
|
|
AccumulateKernel(
|
|
KernelConfig,
|
|
Signal_Textures_0,
|
|
Signal_Textures_1,
|
|
Signal_Textures_2,
|
|
Signal_Textures_3,
|
|
/* inout */ UncompressedAccumulators,
|
|
/* inout */ CompressedAccumulators);
|
|
}
|
|
}
|
|
#endif // DIM_STAGE == STAGE_REJECTION_PRE_CONVOLUTION
|
|
|
|
// Manually sample the center of the kernel after any accumulation when accumulating in descending order.
|
|
if (!KernelConfig.bSampleKernelCenter && KernelConfig.bDescOrder)
|
|
{
|
|
// Remove any jitter the kernel may have. Won't have ant VGPR cost when no jittering, because KernelConfig.BufferUV == SceneBufferUV.
|
|
// TODO(Denoiser): This is costly for VGPR pressure if using KernelConfig.BufferUV was != SceneBufferUV.
|
|
KernelConfig.BufferUV = SceneBufferUV;
|
|
|
|
SampleAndAccumulateCenterSampleAsItsOwnCluster(
|
|
KernelConfig,
|
|
Signal_Textures_0,
|
|
Signal_Textures_1,
|
|
Signal_Textures_2,
|
|
Signal_Textures_3,
|
|
/* inout */ UncompressedAccumulators,
|
|
/* inout */ CompressedAccumulators);
|
|
}
|
|
|
|
#if CONFIG_ACCUMULATOR_VGPR_COMPRESSION == ACCUMULATOR_COMPRESSION_DISABLED
|
|
SignalAccumulators = UncompressedAccumulators;
|
|
#else
|
|
SignalAccumulators = UncompressAccumulatorArray(CompressedAccumulators, CONFIG_ACCUMULATOR_VGPR_COMPRESSION);
|
|
#endif
|
|
}
|
|
|
|
// Color processing of the signal to reduce highlight flickering.
|
|
#if CONFIG_ACCUMULATION_COLOR_SPACE != CONFIG_OUTPUT_COLOR_SPACE || CONFIG_REJECT_HIGHEST_COLOR
|
|
{
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < CONFIG_SIGNAL_BATCH_SIZE; MultiplexId++)
|
|
{
|
|
UncompressSignalAccumulator(/* inout */ SignalAccumulators.Array[MultiplexId]);
|
|
|
|
#if CONFIG_REJECT_HIGHEST_COLOR
|
|
{
|
|
#if !COMPILE_SIGNAL_COLOR
|
|
#error Need to compile signal color.
|
|
#endif
|
|
if (Accumulator.Moment1[MultiplexId].SampleCount > 0)
|
|
{
|
|
const float MaxNeighborWeight = saturate(SignalAccumulators.Array[MultiplexId].Moment1.SampleCount * rcp(10) - 1);
|
|
|
|
SignalAccumulators.Array[MultiplexId].Moment1.SceneColor.rgb =
|
|
(SignalAccumulators.Array[MultiplexId].Moment1.SceneColor.rgb - MaxNeighbor.SceneColor.rgb * MaxNeighborWeight) *
|
|
(SignalAccumulators.Array[MultiplexId].Moment1.SampleCount / (SignalAccumulators.Array[MultiplexId].Moment1.SampleCount - MaxNeighborWeight));
|
|
}
|
|
}
|
|
#endif // CONFIG_REJECT_HIGHEST_COLOR
|
|
|
|
#if CONFIG_ACCUMULATION_COLOR_SPACE != CONFIG_OUTPUT_COLOR_SPACE
|
|
{
|
|
#if COMPILE_MOMENT1_ACCUMULATOR
|
|
SignalAccumulators.Array[MultiplexId].Moment1 = TransformSignal(
|
|
SignalAccumulators.Array[MultiplexId].Moment1,
|
|
/* SrcBasis = */ CONFIG_ACCUMULATION_COLOR_SPACE,
|
|
/* DestBasis = */ CONFIG_OUTPUT_COLOR_SPACE);
|
|
#endif
|
|
|
|
#if COMPILE_MOMENT2_ACCUMULATOR
|
|
SignalAccumulators.Array[MultiplexId].Moment2 = TransformSignal(
|
|
SignalAccumulators.Array[MultiplexId].Moment2,
|
|
/* SrcBasis = */ CONFIG_ACCUMULATION_COLOR_SPACE,
|
|
/* DestBasis = */ CONFIG_OUTPUT_COLOR_SPACE);
|
|
#endif
|
|
}
|
|
#endif // CONFIG_ACCUMULATION_COLOR_SPACE != CONFIG_OUTPUT_COLOR_SPACE
|
|
|
|
// TODO(Denoiser): it might be better to just uncompress before this for loop and remain uncompressed,
|
|
// so the color operation get done in practice during the output sample transcoding.
|
|
CompressSignalAccumulator(/* inout */ SignalAccumulators.Array[MultiplexId]);
|
|
}
|
|
}
|
|
#endif // CONFIG_ACCUMULATION_COLOR_SPACE != CONFIG_OUTPUT_COLOR_SPACE || CONFIG_REJECT_HIGHEST_COLOR
|
|
|
|
// Transcode the spatial accumulation into multiplexed signal according to different modes.
|
|
uint MultiplexCount = 1;
|
|
FSSDSignalArray OutputSamples = CreateSignalArrayFromScalarValue(0.0);
|
|
FSSDSignalFrequencyArray OutputFrequencies = CreateInvalidSignalFrequencyArray();
|
|
{
|
|
#if CONFIG_OUTPUT_MODE == OUTPUT_MODE_SUM
|
|
{
|
|
MultiplexCount = CONFIG_SIGNAL_BATCH_SIZE;
|
|
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < CONFIG_SIGNAL_BATCH_SIZE; MultiplexId++)
|
|
{
|
|
UncompressSignalAccumulator(/* inout */ SignalAccumulators.Array[MultiplexId]);
|
|
|
|
OutputSamples.Array[MultiplexId] = SignalAccumulators.Array[MultiplexId].Moment1;
|
|
|
|
// Output the minimal inverse frequency as new world bluring radius for subsequent passes.
|
|
OutputFrequencies.Array[MultiplexId] = SignalAccumulators.Array[MultiplexId].MinFrequency;
|
|
}
|
|
}
|
|
#elif CONFIG_OUTPUT_MODE == OUTPUT_MODE_2MOMMENT_SUM
|
|
{
|
|
#if SIGNAL_ARRAY_SIZE != 2 * MAX_SIGNAL_BATCH_SIZE
|
|
#error Invalid signal array size.
|
|
#endif
|
|
|
|
MultiplexCount = 2 * CONFIG_SIGNAL_BATCH_SIZE;
|
|
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint BatchedSignalId = 0; BatchedSignalId < CONFIG_SIGNAL_BATCH_SIZE; BatchedSignalId++)
|
|
{
|
|
UncompressSignalAccumulator(/* inout */ SignalAccumulators.Array[BatchedSignalId * CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN + 0]);
|
|
|
|
OutputSamples.Array[BatchedSignalId * 2 + 0] = SignalAccumulators.Array[BatchedSignalId * CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN + 0].Moment1;
|
|
OutputSamples.Array[BatchedSignalId * 2 + 1] = SignalAccumulators.Array[BatchedSignalId * CONFIG_MULTIPLEXED_SIGNALS_PER_SIGNAL_DOMAIN + 0].Moment2;
|
|
}
|
|
}
|
|
#elif CONFIG_OUTPUT_MODE == OUTPUT_MODE_DRB
|
|
{
|
|
MultiplexCount = CONFIG_SIGNAL_BATCH_SIZE;
|
|
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < CONFIG_SIGNAL_BATCH_SIZE; MultiplexId++)
|
|
{
|
|
UncompressSignalAccumulator(/* inout */ SignalAccumulators.Array[MultiplexId]);
|
|
|
|
OutputSamples.Array[MultiplexId] = SignalAccumulators.Array[MultiplexId].Previous;
|
|
|
|
// Output the minimal inverse frequency as new world bluring radius for subsequent passes.
|
|
OutputFrequencies.Array[MultiplexId] = SignalAccumulators.Array[MultiplexId].MinFrequency;
|
|
|
|
// No need to keep the VGPR pressure at this point for WorldBluringRadius, because no passes use it after.
|
|
if (DIM_STAGE == STAGE_POST_FILTERING && 0)
|
|
{
|
|
OutputFrequencies.Array[MultiplexId].WorldBluringRadius = 0;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
#error Unknown output mode.
|
|
#endif
|
|
}
|
|
|
|
// Clamp the number of sample recorded.
|
|
#if DIM_STAGE == STAGE_POST_FILTERING
|
|
{
|
|
UNROLL_N(CONFIG_SIGNAL_BATCH_SIZE)
|
|
for (uint MultiplexId = 0; MultiplexId < CONFIG_SIGNAL_BATCH_SIZE; MultiplexId++)
|
|
{
|
|
float CurrentSampleCount = RefSamples.Array[MultiplexId].SampleCount;
|
|
float NewSampleCount = min(CurrentSampleCount, TARGETED_SAMPLE_COUNT);
|
|
|
|
OutputSamples.Array[MultiplexId] = MulSignal(OutputSamples.Array[MultiplexId], CurrentSampleCount > 0 ? NewSampleCount / CurrentSampleCount : 0);
|
|
}
|
|
}
|
|
#endif // DIM_STAGE == STAGE_POST_FILTERING
|
|
|
|
#if CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_DIFFUSE_SPHERICAL_HARMONIC && 0
|
|
DebugOutput[DispatchThreadId] = float4(
|
|
OutputSamples.Array[0].ColorSH.R.V.x,
|
|
OutputSamples.Array[0].ColorSH.G.V.x,
|
|
OutputSamples.Array[0].ColorSH.B.V.x,
|
|
OutputSamples.Array[0].SampleCount);
|
|
#endif
|
|
|
|
// TODO(Denoiser): LeaveRayCount = (LeaveRayCount - 1) * 9 / (9 - 2) post processing to reject when for history rejection
|
|
uint2 OutputPixelPostion;
|
|
#if CONFIG_VGPR_OPTIMIZATION && !CONFIG_UPSCALE // TODO(Denoiser)
|
|
{
|
|
// No need to keep DispatchThreadId, can recompute the output pixel position based on information stored in VGPRs for spatial kernel.
|
|
#if CONFIG_SAMPLE_SET == SAMPLE_SET_STACKOWIAK_4_SETS
|
|
#if CONFIG_UPSCALE
|
|
SampleTrackId ^= 0x3;
|
|
#endif
|
|
OutputPixelPostion = (uint2(KernelBufferUV * BufferUVToOutputPixelPosition) & ~0x1) | (uint2(SampleTrackId, SampleTrackId >> 1) & 0x1);
|
|
#else
|
|
OutputPixelPostion = BufferUVToBufferPixelCoord(SceneBufferUV);
|
|
#endif
|
|
}
|
|
#else
|
|
OutputPixelPostion = ViewportMin + DispatchThreadId;
|
|
#endif
|
|
|
|
#if DEBUG_OUTPUT && CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SHADOW_VISIBILITY_MASK && DIM_STAGE == STAGE_RECONSTRUCTION
|
|
DebugOutput[DispatchThreadId] = float4(OutputSamples.Array[0].SampleCount, 0, 0, 0);
|
|
#endif
|
|
|
|
BRANCH
|
|
if (all(OutputPixelPostion < ViewportMax))
|
|
{
|
|
// Output the multiplexed signal.
|
|
#if DIM_STAGE == STAGE_FINAL_OUTPUT && (CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_SHADOW_VISIBILITY_MASK || CONFIG_SIGNAL_PROCESSING == SIGNAL_PROCESSING_VIRTUAL_SHADOW_MAP_MASK)
|
|
{
|
|
UNROLL
|
|
for (uint MultiplexId = 0; MultiplexId < MultiplexCount; MultiplexId++)
|
|
{
|
|
float Shadow = GetSamplePenumbraSafe(OutputSamples.Array[MultiplexId]);
|
|
|
|
const float ShadowFadeFraction = 1;
|
|
float SSSTransmission = (OutputSamples.Array[MultiplexId].SampleCount > 0 ? OutputSamples.Array[MultiplexId].TransmissionDistance / OutputSamples.Array[MultiplexId].SampleCount : OutputSamples.Array[MultiplexId].TransmissionDistance);
|
|
|
|
// 0 is shadowed, 1 is unshadowed
|
|
// RETURN_COLOR not needed unless writing to SceneColor;
|
|
float FadedShadow = lerp(1.0f, Shadow, ShadowFadeFraction);
|
|
float FadedSSSShadow = lerp(1.0f, SSSTransmission, ShadowFadeFraction);
|
|
|
|
// the channel assignment is documented in ShadowRendering.cpp (look for Light Attenuation channel assignment)
|
|
float4 OutColor;
|
|
if (GET_SCALAR_ARRAY_ELEMENT(LightType, MultiplexId) == LIGHT_TYPE_DIRECTIONAL)
|
|
{
|
|
OutColor = EncodeLightAttenuation(half4(FadedShadow, FadedSSSShadow, 1.0, FadedSSSShadow));
|
|
}
|
|
else
|
|
{
|
|
OutColor = EncodeLightAttenuation(half4(FadedShadow, FadedSSSShadow, FadedShadow, FadedSSSShadow));
|
|
}
|
|
|
|
if (MultiplexId == 0)
|
|
SignalOutput_UAVs_0[OutputPixelPostion] = OutColor;
|
|
if (MultiplexId == 1)
|
|
SignalOutput_UAVs_1[OutputPixelPostion] = OutColor;
|
|
if (MultiplexId == 2)
|
|
SignalOutput_UAVs_2[OutputPixelPostion] = OutColor;
|
|
if (MultiplexId == 3)
|
|
SignalOutput_UAVs_3[OutputPixelPostion] = OutColor;
|
|
}
|
|
}
|
|
#else
|
|
{
|
|
OutputMultiplexedSignal(
|
|
SignalOutput_UAVs_0,
|
|
SignalOutput_UAVs_1,
|
|
SignalOutput_UAVs_2,
|
|
SignalOutput_UAVs_3,
|
|
CONFIG_SIGNAL_OUTPUT_LAYOUT,
|
|
MultiplexCount,
|
|
OutputPixelPostion,
|
|
OutputSamples,
|
|
OutputFrequencies);
|
|
}
|
|
#endif
|
|
}
|
|
} // MainCS
|