1149 lines
42 KiB
HLSL
1149 lines
42 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#define CONFIG_MAX_RANGE_SIZE DIM_BLUR_DIRECTIONS
|
|
|
|
|
|
#include "MotionBlurCommon.ush"
|
|
#include "../LensDistortion.ush"
|
|
#include "../Random.ush"
|
|
#include "/Engine/Public/WaveBroadcastIntrinsics.ush"
|
|
|
|
|
|
//------------------------------------------------------- DEBUG
|
|
|
|
// Debug the input scene color resolution.
|
|
#define DEBUG_OVERLAY_INPUT_RES 0
|
|
|
|
// Debug the gathered resolution.
|
|
#define DEBUG_OVERLAY_GATHER_RES 0
|
|
|
|
// Debug the tile classification
|
|
#define DEBUG_OVERLAY_TILE_CLASSIFICATION 0
|
|
|
|
// Debug whether the full res center is fecthed or not.
|
|
#define DEBUG_OVERLAY_SKIP_CENTER 0
|
|
|
|
// Debug the number of samples
|
|
// 1: show number of samples
|
|
// 2: show the density of samples (lower at half res)
|
|
#define DEBUG_OVERLAY_SAMPLES 0
|
|
|
|
|
|
//------------------------------------------------------- CONFIG
|
|
|
|
#if DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_GATHER_HALF_RES
|
|
#define CONFIG_IS_HALF_RES 1
|
|
|
|
#elif DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_GATHER_FULL_RES
|
|
#define CONFIG_IS_HALF_RES 0
|
|
|
|
#elif DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_1_VELOCITY_HALF_RES
|
|
#define CONFIG_IS_HALF_RES 1
|
|
|
|
#elif DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_1_VELOCITY_FULL_RES
|
|
#define CONFIG_IS_HALF_RES 0
|
|
|
|
#elif DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_2_VELOCITY_FULL_RES
|
|
#define CONFIG_IS_HALF_RES 0
|
|
|
|
#else
|
|
#error unknown tile classification
|
|
#endif
|
|
|
|
// from the paper: We use SOFT Z EXTENT = 1mm to 10cm for our results
|
|
#define SOFT_Z_EXTENT 1
|
|
|
|
// Whether should interleave the half res motion blur with the full res to reduce the size of the grain on screen.
|
|
#define CONFIG_SHUFFLE_HALF_RES 0
|
|
|
|
// Whether should do post motion blur translucency.
|
|
#define CONFIG_POST_MOTIONBLUR_TRANSLUCENCY 1
|
|
|
|
// Save memory bandwidth by not fetching full res center if output is fully gathered.
|
|
#if COMPILER_SUPPORTS_WAVE_MINMAX
|
|
#define CONFIG_SKIP_CENTER 1
|
|
#else
|
|
#define CONFIG_SKIP_CENTER 0
|
|
#endif
|
|
|
|
#define CONFIG_SCENE_COLOR_ALPHA (DIM_ALPHA_CHANNEL)
|
|
|
|
|
|
//------------------------------------------------------- CONSTANTS
|
|
|
|
#if CONFIG_IS_HALF_RES
|
|
#define TILE_SIZE (VELOCITY_FILTER_TILE_SIZE / 2)
|
|
#else
|
|
#define TILE_SIZE (VELOCITY_FILTER_TILE_SIZE)
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- PARAMETERS
|
|
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(Color)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(Velocity)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(VelocityTile)
|
|
FScreenTransform ColorToVelocity;
|
|
FScreenTransform SeparateTranslucencyUVToViewportUV;
|
|
FScreenTransform ViewportUVToSeparateTranslucencyUV;
|
|
uint MaxSampleCount;
|
|
uint OutputMip1;
|
|
uint OutputMip2;
|
|
uint bLensDistortion;
|
|
|
|
uint TileListOffset;
|
|
StructuredBuffer<uint> TileListsBuffer;
|
|
StructuredBuffer<uint> TileListsSizeBuffer;
|
|
|
|
Texture2D<mb_half4> ColorTexture;
|
|
Texture2D<mb_half3> VelocityFlatTexture;
|
|
Texture2D VelocityTileTextures_Textures_0;
|
|
Texture2D VelocityTileTextures_Textures_1;
|
|
Texture2D<mb_half4> HalfResMotionBlurTexture;
|
|
|
|
SamplerState ColorSampler;
|
|
SamplerState VelocitySampler;
|
|
SamplerState VelocityTileSampler;
|
|
SamplerState VelocityFlatSampler;
|
|
SamplerState DepthSampler;
|
|
|
|
Texture2D<float2> UndistortingDisplacementTexture;
|
|
SamplerState UndistortingDisplacementSampler;
|
|
|
|
Texture2D TranslucencyTexture;
|
|
SamplerState TranslucencySampler;
|
|
FScreenTransform ColorToTranslucency;
|
|
float2 TranslucencyUVMin;
|
|
float2 TranslucencyUVMax;
|
|
float2 TranslucencyExtentInverse;
|
|
|
|
#if SUPPORTS_INDEPENDENT_SAMPLERS
|
|
#define SharedVelocitySampler VelocitySampler
|
|
#define SharedVelocityTileSampler VelocitySampler
|
|
#define SharedVelocityFlatSampler VelocitySampler
|
|
#define SharedDepthSampler VelocitySampler
|
|
#else
|
|
#define SharedVelocitySampler VelocitySampler
|
|
#define SharedVelocityTileSampler VelocityTileSampler
|
|
#define SharedVelocityFlatSampler VelocityFlatSampler
|
|
#define SharedDepthSampler DepthSampler
|
|
#endif
|
|
|
|
RWTexture2D<mb_half4> SceneColorOutputMip0;
|
|
RWTexture2D<mb_half4> SceneColorOutputMip1;
|
|
RWTexture2D<mb_half4> SceneColorOutputMip2;
|
|
RWTexture2D<float4> DebugOutput;
|
|
|
|
|
|
//------------------------------------------------------- LDS
|
|
|
|
#if !PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
|
|
groupshared mb_half4 SharedArray0[TILE_SIZE * TILE_SIZE];
|
|
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
groupshared mb_half SharedArray1[TILE_SIZE * TILE_SIZE];
|
|
#endif
|
|
|
|
#endif // !PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
|
|
#if !COMPILER_SUPPORTS_WAVE_VOTE
|
|
|
|
groupshared uint SharedSampleCount;
|
|
groupshared uint SharedFastPath;
|
|
|
|
#endif
|
|
|
|
|
|
//------------------------------------------------------- FUNCTIONS
|
|
|
|
float NormalizeAccumulator(float Accumulator, mb_half AccumulatorWeight)
|
|
{
|
|
return -min(-Accumulator * rcp(AccumulatorWeight), mb_half(0.0));
|
|
}
|
|
|
|
float4 NormalizeAccumulator(float4 Accumulator, float AccumulatorWeight)
|
|
{
|
|
return -min(-Accumulator * rcp(AccumulatorWeight), float(0.0));
|
|
}
|
|
|
|
#if CONFIG_MOTION_BLUR_COMPILE_FP16
|
|
|
|
mb_half NormalizeAccumulator(mb_half Accumulator, mb_half AccumulatorWeight)
|
|
{
|
|
return -min(-Accumulator * rcp(AccumulatorWeight), mb_half(0.0));
|
|
}
|
|
|
|
mb_half4 NormalizeAccumulator(mb_half4 Accumulator, mb_half AccumulatorWeight)
|
|
{
|
|
return -min(-Accumulator * rcp(AccumulatorWeight), mb_half(0.0));
|
|
}
|
|
|
|
#endif // CONFIG_MOTION_BLUR_COMPILE_FP16
|
|
|
|
void NormalizeAccumulatorWithHoleFill(
|
|
mb_half4 Color,
|
|
mb_half ColorWeight,
|
|
mb_half HoleFillWeight,
|
|
mb_half DepthAware,
|
|
mb_half4 HoleFillingColor,
|
|
mb_half InvSampleCount,
|
|
out mb_half4 ColorOutput,
|
|
out mb_half OpacityOutput,
|
|
out bool bValidColorOutput)
|
|
{
|
|
OpacityOutput = min(saturate(ColorWeight * InvSampleCount * mb_half(2)), (DepthAware * InvSampleCount * mb_half(2)));
|
|
|
|
mb_half4 FinalAccumulator = Color + HoleFillWeight * HoleFillingColor;
|
|
|
|
ColorOutput = NormalizeAccumulator(FinalAccumulator, ColorWeight + HoleFillWeight);
|
|
bValidColorOutput = (ColorWeight + HoleFillWeight) > mb_half(0.0);
|
|
}
|
|
|
|
|
|
// Compute the weight of the sample for hole filling.
|
|
mb_half ComputeSampleHoleFillWeight(mb_half CenterDepth, mb_half SampleDepth, mb_half DepthScale)
|
|
{
|
|
return saturate(DepthScale * (SampleDepth - CenterDepth));
|
|
}
|
|
|
|
// Computes the contribution weight of one sample.
|
|
mb_half ComputeSampleConvolutionWeight(
|
|
mb_half SampleDepth, mb_half SampleSpreadLength, mb_half SampleVelocityAngle,
|
|
mb_half OffsetLength, mb_half BlurAngle, mb_half PixelToSampleScale)
|
|
{
|
|
// Compare the length
|
|
mb_half SpreadWeights = saturate(PixelToSampleScale * SampleSpreadLength - max(OffsetLength - mb_half(1), mb_half(0)));
|
|
|
|
// Compare the directions
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
mb_half DirectionWeights = saturate(mb_half(1.0) - max(GetPolarRelativeAngle(SampleVelocityAngle, BlurAngle) - mb_half(0.1), mb_half(0.0)) * mb_half(4.0));
|
|
#else
|
|
mb_half DirectionWeights = mb_half(1.0);
|
|
#endif
|
|
|
|
return SpreadWeights * DirectionWeights;
|
|
}
|
|
|
|
// Selectively computes the contribution weight of the center or the sample based on whether sample is respectively behind center or not
|
|
mb_half ComputeCenterOrSampleWeight(
|
|
mb_half CenterDepth, mb_half CenterSpreadLength, mb_half CenterVelocityAngle,
|
|
mb_half SampleDepth, mb_half SampleSpreadLength, mb_half SampleVelocityAngle,
|
|
mb_half OffsetLength, mb_half BlurAngle, mb_half PixelToSampleScale, mb_half DepthScale)
|
|
{
|
|
// Compute weight to use the center data if center is closer than the sample.
|
|
mb_half CenterWeight = saturate(0.5 + DepthScale * (SampleDepth - CenterDepth));
|
|
|
|
// Compute weight to use the sample data if sample is closer than the center.
|
|
mb_half SampleWeight = saturate(0.5 - DepthScale * (SampleDepth - CenterDepth));
|
|
|
|
mb_half CenterConvolutionWeight = ComputeSampleConvolutionWeight(
|
|
CenterDepth, CenterSpreadLength, CenterVelocityAngle,
|
|
OffsetLength, BlurAngle, PixelToSampleScale);
|
|
|
|
mb_half SampleConvolutionWeight = ComputeSampleConvolutionWeight(
|
|
SampleDepth, SampleSpreadLength, SampleVelocityAngle,
|
|
OffsetLength, BlurAngle, PixelToSampleScale);
|
|
|
|
return CenterWeight * CenterConvolutionWeight + SampleWeight * SampleConvolutionWeight;
|
|
}
|
|
|
|
// TODO: move that to velocity flatten.
|
|
mb_half GetVelocityLengthPixels(mb_half2 EncodedVelocity)
|
|
{
|
|
// 11:11:10 (VelocityLength, VelocityAngle, Depth)
|
|
return EncodedVelocity.x;
|
|
}
|
|
|
|
float2 ApplyLensDistortionOnTranslucencyUV(float2 SeparateTranslucencyUV)
|
|
{
|
|
float2 DistortedViewportUV = ApplyScreenTransform(SeparateTranslucencyUV, SeparateTranslucencyUVToViewportUV);
|
|
float2 UndistortedViewportUV = ApplyLensDistortionOnViewportUV(UndistortingDisplacementTexture, UndistortingDisplacementSampler, DistortedViewportUV);
|
|
|
|
return ApplyScreenTransform(UndistortedViewportUV, ViewportUVToSeparateTranslucencyUV);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------- ENTRY POINT
|
|
|
|
[numthreads(TILE_SIZE, TILE_SIZE, 1)]
|
|
void MainCS(
|
|
uint GroupId : SV_GroupID,
|
|
uint GroupThreadIndex : SV_GroupIndex)
|
|
{
|
|
float4 Debug = 0;
|
|
mb_half4 DebugOverlay = mb_half(1.0).xxxx;
|
|
|
|
uint PackedGroupOffset = TileListsBuffer[GroupId + TileListOffset];
|
|
uint2 GroupOffset = uint2(PackedGroupOffset, PackedGroupOffset >> 16) & 0xFFFF;
|
|
|
|
const bool bIsHalfRes = CONFIG_IS_HALF_RES ? true : false;
|
|
|
|
uint2 DispatchThreadId = (
|
|
ZOrder2D(GroupThreadIndex, log2(TILE_SIZE)) +
|
|
GroupOffset * uint2(TILE_SIZE, TILE_SIZE));
|
|
|
|
uint2 iColorPixelPos = DispatchThreadId + Color_ViewportMin;
|
|
|
|
float2 ColorUV;
|
|
BRANCH
|
|
if (bIsHalfRes)
|
|
{
|
|
ColorUV = (float2(DispatchThreadId * 2 + Color_ViewportMin) + 1.0) * Color_ExtentInverse;
|
|
}
|
|
else
|
|
{
|
|
ColorUV = (float2(iColorPixelPos) + 0.5) * Color_ExtentInverse;
|
|
}
|
|
|
|
const float PixelToTileScale = rcp(float(VELOCITY_FLATTEN_TILE_SIZE));
|
|
|
|
float Random = InterleavedGradientNoise(iColorPixelPos, 0);
|
|
float Random2 = InterleavedGradientNoise(iColorPixelPos, 1);
|
|
|
|
// [-0.25, 0.25]
|
|
float2 TileJitter = (float2(Random, Random2) - 0.5) * 0.5;
|
|
|
|
// Map color UV to velocity UV space.
|
|
float2 VelocityUV = ApplyScreenTransform(ColorUV, ColorToVelocity);
|
|
VelocityUV = clamp(VelocityUV, Velocity_UVViewportBilinearMin, Velocity_UVViewportBilinearMax);
|
|
|
|
// Map velocity UV to velocity tile UV space with jitter.
|
|
float2 NearestVelocitySvPosition = floor(VelocityUV * Velocity_Extent) + 0.5;
|
|
float2 VelocityTileUV = ((NearestVelocitySvPosition - Velocity_ViewportMin) * PixelToTileScale + TileJitter) * VelocityTile_ExtentInverse;
|
|
|
|
// Velocity tile UV originates at [0,0]; only need to clamp max.
|
|
VelocityTileUV = min(VelocityTileUV, VelocityTile_UVViewportBilinearMax);
|
|
|
|
FVelocityRange VelocityRange = DecodeVelocityRange(
|
|
VelocityTileTextures_Textures_0.SampleLevel(SharedVelocityTileSampler, VelocityTileUV, 0),
|
|
VelocityTileTextures_Textures_1.SampleLevel(SharedVelocityTileSampler, VelocityTileUV, 0));
|
|
|
|
float2 MinVelocityPixels = VelocityRange.Min;
|
|
float2 Max0VelocityPixels = VelocityRange.Max[0];
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
float2 Max1VelocityPixels = VelocityRange.Max[1];
|
|
#endif
|
|
|
|
// Compute how many sample should be applied.
|
|
float MipLevel0;
|
|
float MipLevel1;
|
|
uint SampleCount;
|
|
float InvSampleCount;
|
|
bool bSkip;
|
|
bool bFastPath;
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
bool bDoOneDirectionOnly;
|
|
#else
|
|
const bool bDoOneDirectionOnly = true;
|
|
#endif
|
|
{
|
|
const uint SampleCountFactor = 4;
|
|
|
|
float MaxPixelLength0 = length(Max0VelocityPixels);
|
|
|
|
uint RecommendedSampleCount = clamp(SampleCountFactor * ceil(MaxPixelLength0 * rcp(float(SampleCountFactor))), SampleCountFactor, MaxSampleCount);
|
|
|
|
bFastPath = length2(MinVelocityPixels) > 0.4 * (MaxPixelLength0 * MaxPixelLength0);
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_MINMAX && COMPILER_SUPPORTS_WAVE_VOTE
|
|
{
|
|
SampleCount = ToScalarMemory(WaveActiveMax(RecommendedSampleCount));
|
|
bFastPath = WaveActiveAllTrue(bFastPath);
|
|
}
|
|
#else
|
|
{
|
|
if (GroupThreadIndex == 0)
|
|
{
|
|
SharedSampleCount = 0;
|
|
SharedFastPath = 0;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
InterlockedMax(SharedSampleCount, RecommendedSampleCount);
|
|
InterlockedAdd(SharedFastPath, bFastPath ? 1 : 0);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
SampleCount = ToScalarMemory(SharedSampleCount);
|
|
bFastPath = (SharedFastPath == (TILE_SIZE * TILE_SIZE));
|
|
}
|
|
#endif
|
|
|
|
InvSampleCount = ToScalarMemory(rcp(float(SampleCount)));
|
|
bSkip = MaxPixelLength0 < CONFIG_MINIMAL_PIXEL_VELOCITY;
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
{
|
|
float MaxPixelLength1 = length(Max1VelocityPixels);
|
|
bDoOneDirectionOnly = MaxPixelLength1 < CONFIG_MINIMAL_PIXEL_VELOCITY;
|
|
if (bDoOneDirectionOnly)
|
|
{
|
|
MipLevel0 = saturate(MaxPixelLength0 * InvSampleCount - 1.0);
|
|
MipLevel1 = MipLevel0;
|
|
Max1VelocityPixels = Max0VelocityPixels;
|
|
}
|
|
else
|
|
{
|
|
MipLevel0 = saturate(MaxPixelLength0 * (InvSampleCount * 2.0) - 1.0);
|
|
MipLevel1 = saturate(MaxPixelLength1 * (InvSampleCount * 2.0) - 1.0);
|
|
}
|
|
}
|
|
#else
|
|
{
|
|
MipLevel0 = saturate(MaxPixelLength0 * InvSampleCount - 1.0);
|
|
MipLevel1 = MipLevel0;
|
|
}
|
|
#endif
|
|
|
|
#if DEBUG_OVERLAY_SAMPLES == 1
|
|
DebugOverlay = lerp(mb_half4(0.5, 1.0, 0.5, 1.0), mb_half4(1.0, 0.5, 0.5, 1.0), float(SampleCount) / float(MaxSampleCount));
|
|
#elif DEBUG_OVERLAY_SAMPLES == 2
|
|
DebugOverlay = lerp(mb_half4(0.5, 1.0, 0.5, 1.0), mb_half4(1.0, 0.5, 0.5, 1.0), float(SampleCount) / float(MaxSampleCount * (bIsHalfRes ? 4 : 1)));
|
|
#endif
|
|
}
|
|
|
|
mb_half2 SearchVector0 = mb_half2(Max0VelocityPixels * Color_ExtentInverse.xy);
|
|
|
|
mb_half4 MotionBlurColor;
|
|
mb_half FullResBlend;
|
|
|
|
BRANCH
|
|
if (bSkip)
|
|
{
|
|
MotionBlurColor = mb_half(0.0).xxxx;
|
|
FullResBlend = 1.0;
|
|
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION
|
|
DebugOverlay = mb_half4(0.5, 0.5, 1.0, 1.0);
|
|
#endif
|
|
|
|
#if DEBUG_OVERLAY_SAMPLES
|
|
DebugOverlay = mb_half4(0.5, 1.0, 0.5, 1.0);
|
|
#endif
|
|
}
|
|
#if DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_GATHER_HALF_RES || DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_GATHER_FULL_RES
|
|
else
|
|
#else
|
|
else if (bFastPath)
|
|
#endif
|
|
{
|
|
mb_half4 ColorAccum = 0;
|
|
|
|
LOOP
|
|
for (uint i = 0; i < SampleCount; i += 4)
|
|
{
|
|
UNROLL_N(2)
|
|
for (uint j = 0; j < 2; j ++)
|
|
{
|
|
float2 OffsetLength = float(i / 2 + j) + (0.5 + float2(Random - 0.5, 0.5 - Random));
|
|
float2 OffsetFraction = OffsetLength * (2.0 * InvSampleCount);
|
|
|
|
float2 SampleUV[2];
|
|
SampleUV[0] = ColorUV + OffsetFraction.x * SearchVector0;
|
|
SampleUV[1] = ColorUV - OffsetFraction.y * SearchVector0;
|
|
|
|
SampleUV[0] = clamp(SampleUV[0], Color_UVViewportBilinearMin, Color_UVViewportBilinearMax);
|
|
SampleUV[1] = clamp(SampleUV[1], Color_UVViewportBilinearMin, Color_UVViewportBilinearMax);
|
|
|
|
ColorAccum += ColorTexture.SampleLevel(ColorSampler, SampleUV[0], MipLevel0);
|
|
ColorAccum += ColorTexture.SampleLevel(ColorSampler, SampleUV[1], MipLevel0);
|
|
}
|
|
}
|
|
|
|
MotionBlurColor = ColorAccum * InvSampleCount;
|
|
FullResBlend = 0.0;
|
|
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION
|
|
DebugOverlay = mb_half4(0.5, 1.0, 0.5, 1.0);
|
|
#endif
|
|
|
|
#if DEBUG_OVERLAY_INPUT_RES
|
|
{
|
|
mb_half4 DebugMipLevel = lerp(mb_half4(1.0, 0.5, 0.5, 1.0), mb_half4(0.5, 1.0, 0.5, 1.0), mb_half(MipLevel0));
|
|
MotionBlurColor *= DebugMipLevel;
|
|
}
|
|
#endif
|
|
}
|
|
#if DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_1_VELOCITY_HALF_RES || DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_1_VELOCITY_FULL_RES || DIM_TILE_CLASSIFICATION == FILTER_TILE_CLASSIFY_SCATTER_AS_GATHER_2_VELOCITY_FULL_RES
|
|
else
|
|
{
|
|
const uint TotalSteps = SampleCount / 2;
|
|
const uint DirectionCount = CONFIG_MAX_RANGE_SIZE;
|
|
const uint StepPerDirectionCount = TotalSteps / DirectionCount;
|
|
const uint SamplePerDirection = SampleCount / DirectionCount;
|
|
|
|
mb_half3 CenterVelocityDepth = VelocityFlatTexture.SampleLevel(SharedVelocityFlatSampler, VelocityUV, 0).xyz;
|
|
mb_half CenterDepth = CenterVelocityDepth.z;
|
|
mb_half CenterVelocityLength = GetVelocityLengthPixels(CenterVelocityDepth.xy);
|
|
mb_half CenterVelocityAngle = CenterVelocityDepth.y * (2.0 * PI) - PI;
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
mb_half4 HoleFillColor = 0;
|
|
mb_half HoleFillColorWeight = 0;
|
|
#endif
|
|
|
|
mb_half4 DirectionalColor[CONFIG_MAX_RANGE_SIZE];
|
|
mb_half DirectionalColorWeight[CONFIG_MAX_RANGE_SIZE];
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
mb_half HoleFillWeightAccum[CONFIG_MAX_RANGE_SIZE];
|
|
|
|
mb_half DepthAccum[CONFIG_MAX_RANGE_SIZE];
|
|
mb_half DepthSquareAccum[CONFIG_MAX_RANGE_SIZE];
|
|
mb_half DepthAwareWeight[CONFIG_MAX_RANGE_SIZE];
|
|
#endif
|
|
|
|
// Iterate over the different directions.
|
|
UNROLL_N(CONFIG_MAX_RANGE_SIZE)
|
|
for (uint DirectionId = 0; DirectionId < CONFIG_MAX_RANGE_SIZE; DirectionId++)
|
|
{
|
|
float PixelToSampleScale = TotalSteps * rsqrt(dot(Max0VelocityPixels, Max0VelocityPixels));
|
|
float2 SearchVector = SearchVector0;
|
|
float BlurAngle = CartesianToPolar(Max0VelocityPixels).y;
|
|
float MipLevel = MipLevel0;
|
|
|
|
bool bAccumulateHoleFillColor = true;
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
if (DirectionId == 1)
|
|
{
|
|
PixelToSampleScale = TotalSteps * rsqrt(dot(Max1VelocityPixels, Max1VelocityPixels));
|
|
SearchVector = Max1VelocityPixels * Color_ExtentInverse.xy;
|
|
BlurAngle = CartesianToPolar(Max1VelocityPixels).y;
|
|
bAccumulateHoleFillColor = HoleFillColorWeight == 0.0;
|
|
MipLevel = MipLevel1;
|
|
}
|
|
#endif
|
|
|
|
DirectionalColor[DirectionId] = 0;
|
|
DirectionalColorWeight[DirectionId] = 0;
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
{
|
|
HoleFillWeightAccum[DirectionId] = 0;
|
|
|
|
DepthAccum[DirectionId] = 0;
|
|
DepthSquareAccum[DirectionId] = 0;
|
|
DepthAwareWeight[DirectionId] = 0;
|
|
}
|
|
#endif
|
|
|
|
// Iterate over steps of 2 samples in each directions.
|
|
LOOP
|
|
for (uint StepId = DirectionId; StepId < TotalSteps; StepId += CONFIG_MAX_RANGE_SIZE)
|
|
{
|
|
float2 SampleUV[2];
|
|
mb_half4 SampleColor[2];
|
|
mb_half SampleDepth[2];
|
|
mb_half SampleVelocityLength[2];
|
|
mb_half ConvolutionWeight[2];
|
|
mb_half HoleFillingWeight[2];
|
|
|
|
mb_half2 OffsetLength = mb_half(StepId + 0.5) + mb_half2(Random - 0.5, 0.5 - Random) * (bDoOneDirectionOnly ? mb_half(1.0) : mb_half(2.0));
|
|
mb_half2 OffsetFraction = OffsetLength * rcp(mb_half(TotalSteps));
|
|
|
|
mb_half WeightOffsetLength = mb_half(DirectionId + StepId) + 0.5;
|
|
|
|
SampleUV[0] = ColorUV + OffsetFraction.x * SearchVector;
|
|
SampleUV[1] = ColorUV - OffsetFraction.y * SearchVector;
|
|
|
|
SampleUV[0] = clamp(SampleUV[0], Color_UVViewportBilinearMin, Color_UVViewportBilinearMax);
|
|
SampleUV[1] = clamp(SampleUV[1], Color_UVViewportBilinearMin, Color_UVViewportBilinearMax);
|
|
|
|
UNROLL
|
|
for (uint j = 0; j < 2; j++)
|
|
{
|
|
mb_half3 SampleVelocityDepth = VelocityFlatTexture.SampleLevel(
|
|
SharedVelocityFlatSampler, ApplyScreenTransform(SampleUV[j], ColorToVelocity), 0).xyz;
|
|
|
|
SampleColor[j] = ColorTexture.SampleLevel(ColorSampler, SampleUV[j], MipLevel);
|
|
SampleDepth[j] = mb_half(SampleVelocityDepth.z);
|
|
|
|
// Decode
|
|
SampleVelocityDepth.x = GetVelocityLengthPixels(SampleVelocityDepth.x); // TODO: move in velocity flatten
|
|
SampleVelocityDepth.y = SampleVelocityDepth.y * mb_half(2.0 * PI) - mb_half(PI);
|
|
|
|
// in pixels
|
|
SampleVelocityLength[j] = SampleVelocityDepth.x;
|
|
|
|
ConvolutionWeight[j] = ComputeSampleConvolutionWeight(
|
|
SampleVelocityDepth.z, SampleVelocityDepth.x, SampleVelocityDepth.y,
|
|
OffsetLength.x, BlurAngle, PixelToSampleScale);
|
|
|
|
HoleFillingWeight[j] = ComputeCenterOrSampleWeight(
|
|
CenterDepth, CenterVelocityLength, CenterVelocityAngle,
|
|
SampleVelocityDepth.z, SampleVelocityDepth.x, SampleVelocityDepth.y,
|
|
WeightOffsetLength, BlurAngle, PixelToSampleScale, SOFT_Z_EXTENT);
|
|
}
|
|
|
|
{
|
|
bool2 Mirror = bool2(SampleDepth[0] > SampleDepth[1], SampleVelocityLength[0] < SampleVelocityLength[1]);
|
|
HoleFillingWeight[0] = all(Mirror) ? HoleFillingWeight[1] : HoleFillingWeight[0];
|
|
HoleFillingWeight[1] = any(Mirror) ? HoleFillingWeight[1] : HoleFillingWeight[0];
|
|
}
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
{
|
|
DirectionalColor[DirectionId] += ConvolutionWeight[0] * SampleColor[0] + ConvolutionWeight[1] * SampleColor[1];
|
|
DirectionalColorWeight[DirectionId] += ConvolutionWeight[0] + ConvolutionWeight[1];
|
|
|
|
DepthAccum[DirectionId] += HoleFillingWeight[0] * SampleDepth[0] + HoleFillingWeight[1] * SampleDepth[1];
|
|
DepthSquareAccum[DirectionId] += HoleFillingWeight[0] * Square(SampleDepth[0]) + HoleFillingWeight[1] * Square(SampleDepth[1]);
|
|
DepthAwareWeight[DirectionId] += HoleFillingWeight[0] + HoleFillingWeight[1];
|
|
|
|
|
|
// Measure how much should hole fill DirectionalColor with.
|
|
{
|
|
mb_half HoleFillingWeight0 = saturate(HoleFillingWeight[0] - ConvolutionWeight[0]);
|
|
mb_half HoleFillingWeight1 = saturate(HoleFillingWeight[1] - ConvolutionWeight[1]);
|
|
|
|
//mb_half HoleFillingWeight = ComputeSampleHoleFillWeight(CenterDepth, SampleDepth[j], SOFT_Z_EXTENT);
|
|
|
|
HoleFillWeightAccum[DirectionId] += HoleFillingWeight0 + HoleFillingWeight1;
|
|
|
|
// Build a hole filling along the major directional blur.
|
|
if (bAccumulateHoleFillColor)
|
|
{
|
|
HoleFillColor += HoleFillingWeight0 * SampleColor[0] + HoleFillingWeight1 * SampleColor[1];
|
|
HoleFillColorWeight += HoleFillingWeight0 + HoleFillingWeight1;
|
|
}
|
|
}
|
|
|
|
}
|
|
#else
|
|
{
|
|
DirectionalColor[DirectionId] += HoleFillingWeight[0] * SampleColor[0] + HoleFillingWeight[1] * SampleColor[1];
|
|
DirectionalColorWeight[DirectionId] += HoleFillingWeight[0] + HoleFillingWeight[1];
|
|
}
|
|
#endif
|
|
} // for (uint StepId = 0; StepId < StepCount; StepId += CONFIG_MAX_RANGE_SIZE)
|
|
|
|
#if DEBUG_OVERLAY_INPUT_RES
|
|
{
|
|
mb_half4 DebugMipLevel = lerp(mb_half4(1.0, 0.5, 0.5, 1.0), mb_half4(0.5, 1.0, 0.5, 1.0), mb_half(MipLevel0));
|
|
DirectionalColor[DirectionId] *= DebugMipLevel;
|
|
}
|
|
#endif
|
|
} // for (uint DirectionId = 0; DirectionId < CONFIG_MAX_RANGE_SIZE; DirectionId++)
|
|
|
|
#if CONFIG_MAX_RANGE_SIZE > 1
|
|
{
|
|
mb_half InvDirectionSampleCount = InvSampleCount * 2.0;
|
|
if (bDoOneDirectionOnly)
|
|
{
|
|
DirectionalColor[0] += DirectionalColor[1];
|
|
DirectionalColorWeight[0] += DirectionalColorWeight[1];
|
|
HoleFillWeightAccum[0] += HoleFillWeightAccum[1];
|
|
|
|
DirectionalColor[1] = mb_half(0.0);
|
|
DirectionalColorWeight[1] = mb_half(0.0);
|
|
HoleFillWeightAccum[1] = mb_half(0.0);
|
|
|
|
InvDirectionSampleCount = InvSampleCount;
|
|
}
|
|
|
|
mb_half Velocity1Translucency;
|
|
{
|
|
mb_half AvgDepthSquare0 = NormalizeAccumulator(DepthSquareAccum[0], mb_half(DepthAwareWeight[0]));
|
|
mb_half AvgDepth0 = NormalizeAccumulator(DepthAccum[0], mb_half(DepthAwareWeight[0]));
|
|
mb_half AvgDepth1 = NormalizeAccumulator(DepthAccum[1], mb_half(DepthAwareWeight[1]));
|
|
|
|
mb_half Variance0 = AvgDepthSquare0 - Square(AvgDepth0);
|
|
|
|
Velocity1Translucency = mb_half(saturate(2.0 * Variance0 / (Variance0 + Square(AvgDepth1 - AvgDepth0))));
|
|
}
|
|
|
|
mb_half4 NormalizedHoleFillColor0 = NormalizeAccumulator(HoleFillColor, HoleFillColorWeight);
|
|
mb_half4 NormalizedColor0;
|
|
mb_half Opacity0;
|
|
bool bValidColorOutput0;
|
|
NormalizeAccumulatorWithHoleFill(
|
|
DirectionalColor[0],
|
|
DirectionalColorWeight[0],
|
|
HoleFillWeightAccum[0],
|
|
DepthAwareWeight[0],
|
|
NormalizedHoleFillColor0,
|
|
InvDirectionSampleCount,
|
|
/* out */ NormalizedColor0,
|
|
/* out */ Opacity0,
|
|
/* out */ bValidColorOutput0);
|
|
|
|
mb_half4 NormalizedHoleFillColor1 = lerp(NormalizedColor0, NormalizedHoleFillColor0, bValidColorOutput0 ? Velocity1Translucency : 1.0);
|
|
mb_half4 NormalizedColor1;
|
|
mb_half Opacity1;
|
|
bool bValidColorOutput1;
|
|
NormalizeAccumulatorWithHoleFill(
|
|
DirectionalColor[1],
|
|
DirectionalColorWeight[1],
|
|
HoleFillWeightAccum[1],
|
|
DepthAwareWeight[1],
|
|
NormalizedHoleFillColor1,
|
|
InvDirectionSampleCount,
|
|
/* out */ NormalizedColor1,
|
|
/* out */ Opacity1,
|
|
/* out */ bValidColorOutput1);
|
|
|
|
Opacity1 *= saturate(1.0 - Velocity1Translucency);
|
|
|
|
mb_half CenterColorContribution = saturate(1.0 - Opacity0) * saturate(1.0 - Opacity1);
|
|
mb_half InvTotalWeight = rcp(Opacity0 + Opacity1);
|
|
|
|
MotionBlurColor = (NormalizedColor0 * Opacity0 + NormalizedColor1 * Opacity1) * -min(-InvTotalWeight * (1.0 - CenterColorContribution), 0.0);
|
|
FullResBlend = CenterColorContribution;
|
|
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION
|
|
DebugOverlay = mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
#endif
|
|
}
|
|
#else // CONFIG_MAX_RANGE_SIZE == 1
|
|
{
|
|
const uint DirectionId = 0;
|
|
|
|
DirectionalColor[DirectionId] *= mb_half(0.5) / mb_half(StepPerDirectionCount);
|
|
DirectionalColorWeight[DirectionId] *= mb_half(0.5) / mb_half(StepPerDirectionCount);
|
|
|
|
MotionBlurColor = DirectionalColor[DirectionId];
|
|
FullResBlend = (mb_half(1.0) - DirectionalColorWeight[DirectionId]);
|
|
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION
|
|
DebugOverlay = mb_half4(1.0, 1.0, 0.5, 1.0);
|
|
#endif
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#if CONFIG_SKIP_CENTER
|
|
bool bFetchCenter = WaveActiveAnyTrue(FullResBlend > 0.0);
|
|
#else
|
|
const bool bFetchCenter = true;
|
|
#endif
|
|
|
|
#if DEBUG_OVERLAY_GATHER_RES
|
|
{
|
|
mb_half4 DebugHalfResGatherLevel = lerp(mb_half4(1.0, 0.5, 0.5, 1.0), mb_half4(0.5, 1.0, 0.5, 1.0), bIsHalfRes ? mb_half(1.0) : mb_half(0.0));
|
|
MotionBlurColor *= DebugHalfResGatherLevel;
|
|
}
|
|
#endif
|
|
|
|
#if DEBUG_OVERLAY_SKIP_CENTER
|
|
DebugOverlay = lerp(mb_half4(1.0, 0.5, 0.5, 1.0), mb_half4(0.5, 1.0, 0.5, 1.0), bFetchCenter ? mb_half(0.0) : mb_half(1.0));
|
|
#endif
|
|
|
|
BRANCH
|
|
if (bIsHalfRes)
|
|
{
|
|
mb_short2 OutputPixelCoord = mb_short2(uint2(ColorUV * Color_Extent)) & mb_short(~0x1);
|
|
mb_short2 OutputPixelCoord0 = min(OutputPixelCoord + mb_short2(0, 0), mb_short2(Color_ViewportMax - 1));
|
|
mb_short2 OutputPixelCoord1 = min(OutputPixelCoord + mb_short2(1, 0), mb_short2(Color_ViewportMax - 1));
|
|
mb_short2 OutputPixelCoord2 = min(OutputPixelCoord + mb_short2(0, 1), mb_short2(Color_ViewportMax - 1));
|
|
mb_short2 OutputPixelCoord3 = min(OutputPixelCoord + mb_short2(1, 1), mb_short2(Color_ViewportMax - 1));
|
|
|
|
#if 1
|
|
float2 PostMotionBlurTranslucencyUV0 = ApplyScreenTransform((float2(uint2(OutputPixelCoord0)) + 0.5) * Color_ExtentInverse, ColorToTranslucency);
|
|
float2 PostMotionBlurTranslucencyUV1 = ApplyScreenTransform((float2(uint2(OutputPixelCoord1)) + 0.5) * Color_ExtentInverse, ColorToTranslucency);
|
|
float2 PostMotionBlurTranslucencyUV2 = ApplyScreenTransform((float2(uint2(OutputPixelCoord2)) + 0.5) * Color_ExtentInverse, ColorToTranslucency);
|
|
float2 PostMotionBlurTranslucencyUV3 = ApplyScreenTransform((float2(uint2(OutputPixelCoord3)) + 0.5) * Color_ExtentInverse, ColorToTranslucency);
|
|
#else
|
|
float2 PostMotionBlurTranslucencyUV = ApplyScreenTransform(ColorUV, ColorToTranslucency);
|
|
float2 PostMotionBlurTranslucencyUV0 = PostMotionBlurTranslucencyUV + float2(-0.5, -0.5) * TranslucencyExtentInverse;
|
|
float2 PostMotionBlurTranslucencyUV1 = PostMotionBlurTranslucencyUV + float2(+0.5, -0.5) * TranslucencyExtentInverse;
|
|
float2 PostMotionBlurTranslucencyUV2 = PostMotionBlurTranslucencyUV + float2(-0.5, +0.5) * TranslucencyExtentInverse;
|
|
float2 PostMotionBlurTranslucencyUV3 = PostMotionBlurTranslucencyUV + float2(+0.5, +0.5) * TranslucencyExtentInverse;
|
|
#endif
|
|
|
|
BRANCH
|
|
if (bLensDistortion)
|
|
{
|
|
PostMotionBlurTranslucencyUV0 = ApplyLensDistortionOnTranslucencyUV(PostMotionBlurTranslucencyUV0);
|
|
PostMotionBlurTranslucencyUV1 = ApplyLensDistortionOnTranslucencyUV(PostMotionBlurTranslucencyUV1);
|
|
PostMotionBlurTranslucencyUV2 = ApplyLensDistortionOnTranslucencyUV(PostMotionBlurTranslucencyUV2);
|
|
PostMotionBlurTranslucencyUV3 = ApplyLensDistortionOnTranslucencyUV(PostMotionBlurTranslucencyUV3);
|
|
}
|
|
|
|
PostMotionBlurTranslucencyUV0 = clamp(PostMotionBlurTranslucencyUV0, TranslucencyUVMin, TranslucencyUVMax);
|
|
PostMotionBlurTranslucencyUV1 = clamp(PostMotionBlurTranslucencyUV1, TranslucencyUVMin, TranslucencyUVMax);
|
|
PostMotionBlurTranslucencyUV2 = clamp(PostMotionBlurTranslucencyUV2, TranslucencyUVMin, TranslucencyUVMax);
|
|
PostMotionBlurTranslucencyUV3 = clamp(PostMotionBlurTranslucencyUV3, TranslucencyUVMin, TranslucencyUVMax);
|
|
|
|
mb_half4 CenterColor0;
|
|
mb_half4 CenterColor1;
|
|
mb_half4 CenterColor2;
|
|
mb_half4 CenterColor3;
|
|
|
|
mb_half4 PostMotionBlurTranslucency0;
|
|
mb_half4 PostMotionBlurTranslucency1;
|
|
mb_half4 PostMotionBlurTranslucency2;
|
|
mb_half4 PostMotionBlurTranslucency3;
|
|
|
|
// Fetch center and motion blur translucency with overlapped texture fetches
|
|
BRANCH
|
|
if (bFetchCenter)
|
|
{
|
|
CenterColor0 = ColorTexture[OutputPixelCoord0];
|
|
CenterColor1 = ColorTexture[OutputPixelCoord1];
|
|
CenterColor2 = ColorTexture[OutputPixelCoord2];
|
|
CenterColor3 = ColorTexture[OutputPixelCoord3];
|
|
|
|
PostMotionBlurTranslucency0 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV0, 0);
|
|
PostMotionBlurTranslucency1 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV1, 0);
|
|
PostMotionBlurTranslucency2 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV2, 0);
|
|
PostMotionBlurTranslucency3 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV3, 0);
|
|
}
|
|
else
|
|
{
|
|
CenterColor0 = mb_half(0.0).xxxx;
|
|
CenterColor1 = mb_half(0.0).xxxx;
|
|
CenterColor2 = mb_half(0.0).xxxx;
|
|
CenterColor3 = mb_half(0.0).xxxx;
|
|
|
|
PostMotionBlurTranslucency0 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV0, 0);
|
|
PostMotionBlurTranslucency1 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV1, 0);
|
|
PostMotionBlurTranslucency2 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV2, 0);
|
|
PostMotionBlurTranslucency3 = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV3, 0);
|
|
}
|
|
|
|
#if DEBUG_OVERLAY_INPUT_RES
|
|
{
|
|
CenterColor0 *= mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
CenterColor1 *= mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
CenterColor2 *= mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
CenterColor3 *= mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
}
|
|
#endif
|
|
|
|
// Swizzle the half res 2x2 motion blur color across the full res 4x4 quad
|
|
// 0 0 1 1 0 1 0 1
|
|
// 0 0 1 1 -> 2 3 2 3
|
|
// 2 2 3 3 0 1 0 1
|
|
// 2 2 3 3 2 3 2 3
|
|
mb_half4 MotionBlurColor0;
|
|
mb_half4 MotionBlurColor1;
|
|
mb_half4 MotionBlurColor2;
|
|
mb_half4 MotionBlurColor3;
|
|
mb_half FullResBlend0;
|
|
mb_half FullResBlend1;
|
|
mb_half FullResBlend2;
|
|
mb_half FullResBlend3;
|
|
#if CONFIG_SHUFFLE_HALF_RES && PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
{
|
|
const uint LaneGroupSize = 4;
|
|
const uint InnerLaneGroupSize = 1;
|
|
|
|
const FWaveBroadcastSettings Broadcast0 = InitWaveBroadcastLaneGroup(LaneGroupSize, InnerLaneGroupSize, /* InnerLaneGroupId = */ 0);
|
|
const FWaveBroadcastSettings Broadcast1 = InitWaveBroadcastLaneGroup(LaneGroupSize, InnerLaneGroupSize, /* InnerLaneGroupId = */ 1);
|
|
const FWaveBroadcastSettings Broadcast2 = InitWaveBroadcastLaneGroup(LaneGroupSize, InnerLaneGroupSize, /* InnerLaneGroupId = */ 2);
|
|
const FWaveBroadcastSettings Broadcast3 = InitWaveBroadcastLaneGroup(LaneGroupSize, InnerLaneGroupSize, /* InnerLaneGroupId = */ 3);
|
|
|
|
MotionBlurColor0 = WaveBroadcast(Broadcast0, MotionBlurColor);
|
|
MotionBlurColor1 = WaveBroadcast(Broadcast1, MotionBlurColor);
|
|
MotionBlurColor2 = WaveBroadcast(Broadcast2, MotionBlurColor);
|
|
MotionBlurColor3 = WaveBroadcast(Broadcast3, MotionBlurColor);
|
|
|
|
FullResBlend0 = WaveBroadcast(Broadcast0, FullResBlend);
|
|
FullResBlend1 = WaveBroadcast(Broadcast1, FullResBlend);
|
|
FullResBlend2 = WaveBroadcast(Broadcast2, FullResBlend);
|
|
FullResBlend3 = WaveBroadcast(Broadcast3, FullResBlend);
|
|
}
|
|
#elif CONFIG_SHUFFLE_HALF_RES && !PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
{
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
SharedArray0[GroupThreadIndex] = MotionBlurColor;
|
|
SharedArray1[GroupThreadIndex] = FullResBlend;
|
|
#else
|
|
SharedArray0[GroupThreadIndex] = float4(MotionBlurColor.rgb, FullResBlend);
|
|
#endif
|
|
|
|
MotionBlurColor0 = SharedArray0[(GroupThreadIndex & (~0x3)) | 0x0];
|
|
MotionBlurColor1 = SharedArray0[(GroupThreadIndex & (~0x3)) | 0x1];
|
|
MotionBlurColor2 = SharedArray0[(GroupThreadIndex & (~0x3)) | 0x2];
|
|
MotionBlurColor3 = SharedArray0[(GroupThreadIndex & (~0x3)) | 0x3];
|
|
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
FullResBlend0 = SharedArray1[(GroupThreadIndex & (~0x3)) | 0x0];
|
|
FullResBlend1 = SharedArray1[(GroupThreadIndex & (~0x3)) | 0x1];
|
|
FullResBlend2 = SharedArray1[(GroupThreadIndex & (~0x3)) | 0x2];
|
|
FullResBlend3 = SharedArray1[(GroupThreadIndex & (~0x3)) | 0x3];
|
|
#else
|
|
FullResBlend0 = MotionBlurColor0.a;
|
|
FullResBlend1 = MotionBlurColor1.a;
|
|
FullResBlend2 = MotionBlurColor2.a;
|
|
FullResBlend3 = MotionBlurColor3.a;
|
|
#endif
|
|
}
|
|
#else // !CONFIG_SHUFFLE_HALF_RES
|
|
{
|
|
MotionBlurColor0 = MotionBlurColor;
|
|
MotionBlurColor1 = MotionBlurColor;
|
|
MotionBlurColor2 = MotionBlurColor;
|
|
MotionBlurColor3 = MotionBlurColor;
|
|
|
|
FullResBlend0 = FullResBlend;
|
|
FullResBlend1 = FullResBlend;
|
|
FullResBlend2 = FullResBlend;
|
|
FullResBlend3 = FullResBlend;
|
|
}
|
|
#endif
|
|
|
|
// Blend full res and motion blur
|
|
mb_half4 OutputColor0 = CenterColor0 * FullResBlend0 + MotionBlurColor0;
|
|
mb_half4 OutputColor1 = CenterColor1 * FullResBlend1 + MotionBlurColor1;
|
|
mb_half4 OutputColor2 = CenterColor2 * FullResBlend2 + MotionBlurColor2;
|
|
mb_half4 OutputColor3 = CenterColor3 * FullResBlend3 + MotionBlurColor3;
|
|
|
|
// Adds debug overlay
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION || DEBUG_OVERLAY_SKIP_CENTER || DEBUG_OVERLAY_SAMPLES
|
|
{
|
|
OutputColor0 *= DebugOverlay;
|
|
OutputColor1 *= DebugOverlay;
|
|
OutputColor2 *= DebugOverlay;
|
|
OutputColor3 *= DebugOverlay;
|
|
}
|
|
#endif
|
|
|
|
// Blend post motion blur translucency
|
|
#if CONFIG_POST_MOTIONBLUR_TRANSLUCENCY
|
|
{
|
|
OutputColor0.rgb = OutputColor0.rgb * PostMotionBlurTranslucency0.a + PostMotionBlurTranslucency0.rgb;
|
|
OutputColor1.rgb = OutputColor1.rgb * PostMotionBlurTranslucency1.a + PostMotionBlurTranslucency1.rgb;
|
|
OutputColor2.rgb = OutputColor2.rgb * PostMotionBlurTranslucency2.a + PostMotionBlurTranslucency2.rgb;
|
|
OutputColor3.rgb = OutputColor3.rgb * PostMotionBlurTranslucency3.a + PostMotionBlurTranslucency3.rgb;
|
|
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
OutputColor0.a = OutputColor0.a * PostMotionBlurTranslucency0.a;
|
|
OutputColor1.a = OutputColor1.a * PostMotionBlurTranslucency1.a;
|
|
OutputColor2.a = OutputColor2.a * PostMotionBlurTranslucency2.a;
|
|
OutputColor3.a = OutputColor3.a * PostMotionBlurTranslucency3.a;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque.
|
|
// (0.995 chosen to accommodate handling of 254/255)
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
{
|
|
OutputColor0[3] = select(OutputColor0[3] > mb_half(0.995), mb_half(1.0), OutputColor0[3]);
|
|
OutputColor1[3] = select(OutputColor1[3] > mb_half(0.995), mb_half(1.0), OutputColor1[3]);
|
|
OutputColor2[3] = select(OutputColor2[3] > mb_half(0.995), mb_half(1.0), OutputColor2[3]);
|
|
OutputColor3[3] = select(OutputColor3[3] > mb_half(0.995), mb_half(1.0), OutputColor3[3]);
|
|
|
|
OutputColor0[3] = select(OutputColor0[3] < mb_half(0.005), mb_half(0.0), OutputColor0[3]);
|
|
OutputColor1[3] = select(OutputColor1[3] < mb_half(0.005), mb_half(0.0), OutputColor1[3]);
|
|
OutputColor2[3] = select(OutputColor2[3] < mb_half(0.005), mb_half(0.0), OutputColor2[3]);
|
|
OutputColor3[3] = select(OutputColor3[3] < mb_half(0.005), mb_half(0.0), OutputColor3[3]);
|
|
}
|
|
#else
|
|
{
|
|
OutputColor0.a = 0;
|
|
OutputColor1.a = 0;
|
|
OutputColor2.a = 0;
|
|
OutputColor3.a = 0;
|
|
}
|
|
#endif
|
|
|
|
// Compute the half res.
|
|
mb_half4 HalfResOutput = mb_half(0.25) * (OutputColor0 + OutputColor1 + OutputColor2 + OutputColor3);
|
|
|
|
// Compute the quarter res
|
|
mb_half4 QuarterResOutput;
|
|
BRANCH
|
|
if (OutputMip2)
|
|
#if PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
{
|
|
FWaveBroadcastSettings Horizontal = InitWaveXorButterfly(/* XorButterFly = */ 0x1);
|
|
FWaveBroadcastSettings Vertical = InitWaveXorButterfly(/* XorButterFly = */ 0x2);
|
|
|
|
QuarterResOutput = HalfResOutput * mb_half(0.25);
|
|
QuarterResOutput += WaveBroadcast(Horizontal, QuarterResOutput);
|
|
QuarterResOutput += WaveBroadcast(Vertical, QuarterResOutput);
|
|
}
|
|
#else
|
|
{
|
|
QuarterResOutput = HalfResOutput * mb_half(0.25);
|
|
|
|
SharedArray0[GroupThreadIndex] = QuarterResOutput;
|
|
QuarterResOutput += SharedArray0[GroupThreadIndex ^ 0x1];
|
|
|
|
SharedArray0[GroupThreadIndex] = QuarterResOutput;
|
|
QuarterResOutput += SharedArray0[GroupThreadIndex ^ 0x2];
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
QuarterResOutput = 0.0;
|
|
}
|
|
|
|
// Needed to avoid crash in shader compiler of Xclipse driver
|
|
#if !CONFIG_SCENE_COLOR_ALPHA
|
|
QuarterResOutput.a = 0;
|
|
#endif
|
|
bool bIsValid = all(OutputPixelCoord < mb_short2(Color_ViewportMax));
|
|
mb_short2 OutputPixelCoord0Mip0 = (bIsValid) ? OutputPixelCoord : mb_short(~0).xx;
|
|
mb_short2 OutputPixelCoord1Mip0 = (bIsValid) ? OutputPixelCoord1 : mb_short(~0).xx;
|
|
mb_short2 OutputPixelCoord2Mip0 = (bIsValid) ? OutputPixelCoord2 : mb_short(~0).xx;
|
|
mb_short2 OutputPixelCoord3Mip0 = (bIsValid) ? OutputPixelCoord3 : mb_short(~0).xx;
|
|
mb_short2 OutputPixelCoordMip1 = select(and(bIsValid, OutputMip1 != 0), (OutputPixelCoord >> mb_short(1)), mb_short(~0).xx);
|
|
mb_short2 OutputPixelCoordMip2 = select(and(bIsValid, and(OutputMip2 != 0, (OutputPixelCoord & 0x3) == 0)), (OutputPixelCoord >> mb_short(2)), mb_short(~0).xx);
|
|
|
|
SceneColorOutputMip0[OutputPixelCoord0Mip0] = OutputColor0;
|
|
SceneColorOutputMip0[OutputPixelCoord1Mip0] = OutputColor1;
|
|
SceneColorOutputMip0[OutputPixelCoord2Mip0] = OutputColor2;
|
|
SceneColorOutputMip0[OutputPixelCoord3Mip0] = OutputColor3;
|
|
|
|
SceneColorOutputMip1[OutputPixelCoordMip1] = HalfResOutput;
|
|
SceneColorOutputMip2[OutputPixelCoordMip2] = QuarterResOutput;
|
|
|
|
#if DEBUG_MOTION_BLUR_OUTPUT
|
|
DebugOutput[OutputPixelCoord0Mip0] = Debug;
|
|
DebugOutput[OutputPixelCoord1Mip0] = Debug;
|
|
DebugOutput[OutputPixelCoord2Mip0] = Debug;
|
|
DebugOutput[OutputPixelCoord3Mip0] = Debug;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
mb_short2 OutputPixelCoord = mb_short2(ColorUV * Color_Extent);
|
|
|
|
float2 PostMotionBlurTranslucencyUV = ApplyScreenTransform(ColorUV, ColorToTranslucency);
|
|
BRANCH
|
|
if (bLensDistortion)
|
|
{
|
|
PostMotionBlurTranslucencyUV = ApplyLensDistortionOnTranslucencyUV(PostMotionBlurTranslucencyUV);
|
|
}
|
|
PostMotionBlurTranslucencyUV = clamp(PostMotionBlurTranslucencyUV, TranslucencyUVMin, TranslucencyUVMax);
|
|
|
|
mb_half4 CenterColor;
|
|
mb_half4 PostMotionBlurTranslucency;
|
|
|
|
// Fetch center and motion blur translucency with overlapped texture fetches
|
|
BRANCH
|
|
if (bFetchCenter)
|
|
{
|
|
CenterColor = ColorTexture[min(OutputPixelCoord, mb_short2(Color_ViewportMax - 1))];
|
|
PostMotionBlurTranslucency = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV, 0);
|
|
}
|
|
else
|
|
{
|
|
CenterColor = mb_half(0.0).xxxx;
|
|
PostMotionBlurTranslucency = TranslucencyTexture.SampleLevel(TranslucencySampler, PostMotionBlurTranslucencyUV, 0);
|
|
}
|
|
|
|
#if DEBUG_OVERLAY_INPUT_RES
|
|
{
|
|
CenterColor *= mb_half4(1.0, 0.5, 0.5, 1.0);
|
|
}
|
|
#endif
|
|
|
|
// Blend full res and motion blur
|
|
mb_half4 OutputColor = CenterColor * FullResBlend + MotionBlurColor;
|
|
|
|
// Adds debug overlay
|
|
#if DEBUG_OVERLAY_TILE_CLASSIFICATION || DEBUG_OVERLAY_SKIP_CENTER || DEBUG_OVERLAY_SAMPLES
|
|
{
|
|
OutputColor *= DebugOverlay;
|
|
}
|
|
#endif
|
|
|
|
// Blend post motion blur translucency
|
|
#if CONFIG_POST_MOTIONBLUR_TRANSLUCENCY
|
|
{
|
|
OutputColor.rgb = OutputColor.rgb * PostMotionBlurTranslucency.a + PostMotionBlurTranslucency.rgb;
|
|
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
OutputColor.a = OutputColor.a * PostMotionBlurTranslucency.a;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// Ensure that alpha values that are expected to be opaque (but are only close to opaque) are forced to be opaque.
|
|
// (0.995 chosen to accommodate handling of 254/255)
|
|
#if CONFIG_SCENE_COLOR_ALPHA
|
|
{
|
|
OutputColor[3] = select(OutputColor[3] > mb_half(0.995), mb_half(1.0), OutputColor[3]);
|
|
OutputColor[3] = select(OutputColor[3] < mb_half(0.005), mb_half(0.0), OutputColor[3]);
|
|
}
|
|
#else
|
|
{
|
|
OutputColor.a = 0;
|
|
}
|
|
#endif
|
|
|
|
mb_half4 HalfResOutput;
|
|
BRANCH
|
|
if (OutputMip1 || OutputMip2)
|
|
#if PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
{
|
|
FWaveBroadcastSettings Horizontal = InitWaveXorButterfly(/* XorButterFly = */ 0x1);
|
|
FWaveBroadcastSettings Vertical = InitWaveXorButterfly(/* XorButterFly = */ 0x2);
|
|
|
|
HalfResOutput = OutputColor * mb_half(0.25);
|
|
HalfResOutput += WaveBroadcast(Horizontal, HalfResOutput);
|
|
HalfResOutput += WaveBroadcast(Vertical, HalfResOutput);
|
|
}
|
|
#else
|
|
{
|
|
HalfResOutput = OutputColor * mb_half(0.25);
|
|
|
|
SharedArray0[GroupThreadIndex] = HalfResOutput;
|
|
HalfResOutput += SharedArray0[GroupThreadIndex ^ 0x1];
|
|
|
|
SharedArray0[GroupThreadIndex] = HalfResOutput;
|
|
HalfResOutput += SharedArray0[GroupThreadIndex ^ 0x2];
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
HalfResOutput = 0.0;
|
|
}
|
|
|
|
// Needed to avoid crash in shader compiler of Xclipse driver
|
|
#if !CONFIG_SCENE_COLOR_ALPHA
|
|
HalfResOutput.a = 0;
|
|
#endif
|
|
mb_half4 QuarterResOutput;
|
|
BRANCH
|
|
if (OutputMip2)
|
|
#if PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
{
|
|
FWaveBroadcastSettings Horizontal = InitWaveXorButterfly(/* XorButterFly = */ 0x4);
|
|
FWaveBroadcastSettings Vertical = InitWaveXorButterfly(/* XorButterFly = */ 0x8);
|
|
|
|
QuarterResOutput = HalfResOutput * mb_half(0.25);
|
|
QuarterResOutput += WaveBroadcast(Horizontal, QuarterResOutput);
|
|
QuarterResOutput += WaveBroadcast(Vertical, QuarterResOutput);
|
|
}
|
|
#else
|
|
{
|
|
QuarterResOutput = HalfResOutput * mb_half(0.25);
|
|
|
|
SharedArray0[GroupThreadIndex] = QuarterResOutput;
|
|
QuarterResOutput += SharedArray0[GroupThreadIndex ^ 0x4];
|
|
|
|
SharedArray0[GroupThreadIndex] = QuarterResOutput;
|
|
QuarterResOutput += SharedArray0[GroupThreadIndex ^ 0x8];
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
QuarterResOutput = 0.0;
|
|
}
|
|
|
|
// Needed to avoid crash in shader compiler of Xclipse driver
|
|
#if !CONFIG_SCENE_COLOR_ALPHA
|
|
QuarterResOutput.a = 0;
|
|
#endif
|
|
bool bIsValid = all(OutputPixelCoord < mb_short2(Color_ViewportMax));
|
|
mb_short2 OutputPixelCoordMip0 = (bIsValid) ? OutputPixelCoord : mb_short(~0).xx;
|
|
mb_short2 OutputPixelCoordMip1 = select(and(bIsValid, and(OutputMip1 != 0, (OutputPixelCoordMip0 & 0x1) == 0)), (OutputPixelCoord >> mb_short(1)), mb_short(~0).xx);
|
|
mb_short2 OutputPixelCoordMip2 = select(and(bIsValid, and(OutputMip2 != 0, (OutputPixelCoordMip0 & 0x3) == 0)), (OutputPixelCoord >> mb_short(2)), mb_short(~0).xx);
|
|
|
|
SceneColorOutputMip0[OutputPixelCoordMip0] = OutputColor;
|
|
SceneColorOutputMip1[OutputPixelCoordMip1] = HalfResOutput;
|
|
SceneColorOutputMip2[OutputPixelCoordMip2] = QuarterResOutput;
|
|
|
|
#if DEBUG_MOTION_BLUR_OUTPUT
|
|
DebugOutput[OutputPixelCoordMip0] = Debug;
|
|
#endif
|
|
}
|
|
}
|