1688 lines
53 KiB
HLSL
1688 lines
53 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
PostProcessMobile.usf: Combined {bloom, sunshafts, depth of field}
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "PostProcessCommon.ush"
|
|
#include "PostprocessHistogramCommon.ush"
|
|
#include "ScreenPass.ush"
|
|
|
|
#if MOBILE_MULTI_VIEW
|
|
#define MultiViewTexture2D Texture2DArray<float4>
|
|
#else
|
|
#define MultiViewTexture2D Texture2D<float4>
|
|
#endif
|
|
|
|
MultiViewTexture2D SceneColorTexture;
|
|
SamplerState SceneColorSampler;
|
|
|
|
Texture2D LastFrameSceneColorTexture;
|
|
SamplerState LastFrameSceneColorSampler;
|
|
|
|
Texture2D SunShaftAndDofTexture;
|
|
SamplerState SunShaftAndDofSampler;
|
|
|
|
Texture2D DofNearTexture;
|
|
SamplerState DofNearSampler;
|
|
|
|
Texture2D DofDownTexture;
|
|
SamplerState DofDownSampler;
|
|
|
|
Texture2D DofBlurTexture;
|
|
SamplerState DofBlurSampler;
|
|
|
|
MultiViewTexture2D BloomDownSourceTexture;
|
|
SamplerState BloomDownSourceSampler;
|
|
|
|
MultiViewTexture2D BloomUpSourceATexture;
|
|
SamplerState BloomUpSourceASampler;
|
|
|
|
MultiViewTexture2D BloomUpSourceBTexture;
|
|
SamplerState BloomUpSourceBSampler;
|
|
|
|
Texture2D SunAlphaTexture;
|
|
SamplerState SunAlphaSampler;
|
|
|
|
Texture2D SunBlurTexture;
|
|
SamplerState SunBlurSampler;
|
|
|
|
MultiViewTexture2D BloomSetup_BloomTexture;
|
|
SamplerState BloomSetup_BloomSampler;
|
|
|
|
MultiViewTexture2D BloomUpTexture;
|
|
SamplerState BloomUpSampler;
|
|
|
|
Texture2D SunMergeTexture;
|
|
SamplerState SunMergeSampler;
|
|
|
|
Texture2D LastFrameSunMergeTexture;
|
|
SamplerState LastFrameSunMergeSampler;
|
|
|
|
float4 BufferSizeAndInvSize;
|
|
float4 DofBlurSizeAndInvSize;
|
|
float4 BufferASizeAndInvSize;
|
|
float4 BufferBSizeAndInvSize;
|
|
float4 BloomUpSizeAndInvSize;
|
|
|
|
// Point on circle.
|
|
float2 Circle(float Start, float Points, float Point)
|
|
{
|
|
float Rad = (3.141592 * 2.0 * (1.0 / Points)) * (Point + Start);
|
|
return float2(sin(Rad), cos(Rad));
|
|
}
|
|
|
|
float BloomMaxBrightness;
|
|
float BloomThreshold;
|
|
|
|
half FocusDistFar()
|
|
{
|
|
return View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion;
|
|
}
|
|
|
|
half FocusDistNear()
|
|
{
|
|
return View.DepthOfFieldFocalDistance;
|
|
}
|
|
|
|
// Alpha = 0.5 is full size, >0.5 rate at which near and far hit maximum.
|
|
float4 SunColorApertureDiv2;
|
|
|
|
// Returns 0=max near DOF, 0.5=in focus, 1.0=max far DOF
|
|
half Coc(half Depth)
|
|
{
|
|
half FocusDist = clamp(Depth,half(FocusDistNear()),half(FocusDistFar()));
|
|
half CocValue = ((Depth - FocusDist) / Depth);
|
|
return saturate(CocValue * SunColorApertureDiv2.a + 0.5);
|
|
}
|
|
|
|
|
|
//////////////////////////
|
|
|
|
|
|
float ComputeDOFNearFocalMask(float SceneDepth)
|
|
{
|
|
float NearFocalPlane = View.DepthOfFieldFocalDistance;
|
|
|
|
return saturate((NearFocalPlane - SceneDepth) / View.DepthOfFieldNearTransitionRegion);
|
|
}
|
|
|
|
// todo move to central place
|
|
float ComputeDOFFarFocalMask(float SceneDepth)
|
|
{
|
|
float FarFocalPlane = View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion;
|
|
|
|
return saturate((SceneDepth - FarFocalPlane) / View.DepthOfFieldFarTransitionRegion);
|
|
}
|
|
|
|
// Returns 0=max near DOF, 0.5=in focus, 1.0=max far DOF
|
|
half Coc2(half Depth)
|
|
{
|
|
half N = ComputeDOFNearFocalMask(Depth);
|
|
half F = ComputeDOFFarFocalMask(Depth);
|
|
if (F > N)
|
|
{
|
|
return (F * 0.5) + 0.5;
|
|
}
|
|
return (1.0-N) * 0.5;
|
|
}
|
|
|
|
//////////////////////////////////
|
|
half2 SunConstDepthMaskScaleBias()
|
|
{
|
|
half DepthMin = 65504.0 - 16384.0;
|
|
half DepthMax = 65504.0 - 0.0;
|
|
// Compute scale and bias.
|
|
half Scale = 1.0/(DepthMax-DepthMin);
|
|
return half2(Scale,-DepthMin * Scale);
|
|
}
|
|
|
|
half4 SampleMultiViewTexture(MultiViewTexture2D Texture, SamplerState Sampler, float2 SceneUV, float ArrayIndex)
|
|
{
|
|
#if MOBILE_MULTI_VIEW
|
|
return Texture.Sample(Sampler, float3(SceneUV.xy,ArrayIndex));
|
|
#else
|
|
return Texture.Sample(Sampler, SceneUV);
|
|
#endif
|
|
}
|
|
|
|
float4 SampleSceneColor(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(SceneColorTexture, SceneColorSampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
half4 SampleBloomDown(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(BloomDownSourceTexture, BloomDownSourceSampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
half4 SampleBloomUpA(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(BloomUpSourceATexture, BloomUpSourceASampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
// Sampling BloomUpB requires float return type to avoid precision issues leading to artifacts
|
|
// e.g. blue tint all over the view when looking partially at the sky.
|
|
float4 SampleBloomUpB(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(BloomUpSourceBTexture, BloomUpSourceBSampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
half4 SampleBloomSetup(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(BloomSetup_BloomTexture, BloomSetup_BloomSampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
half4 SampleBloomUp(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
return SampleMultiViewTexture(BloomUpTexture, BloomUpSampler, SceneUV.xy, ArrayIndex);
|
|
}
|
|
|
|
//
|
|
// Convert depth in alpha into combined circle of confusion and sun intensity.
|
|
//
|
|
|
|
#if SHADER_SUN_MASK
|
|
|
|
void SunMaskPS_Mobile(
|
|
float4 InUVPos : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF_TYPE OutSunShaftAndDof : SV_Target0
|
|
#if MOBILE_USESUN && METAL_MSAA_HDR_DECODE
|
|
, out HALF4_TYPE OutColor : SV_Target1
|
|
#endif
|
|
)
|
|
{
|
|
half4 SceneColor = SampleSceneColor(InUVPos.xy,GetEyeIndex(StereoInput));
|
|
|
|
#if MOBILE_USEDEPTHTEXTURE
|
|
half InDepth = ConvertFromDeviceZ(Texture2DSampleLevel(MobileSceneTextures.SceneDepthTexture, MobileSceneTextures.SceneDepthTextureSampler, InUVPos.xy, 0).r);
|
|
#else
|
|
half InDepth = ConvertFromDeviceZ(Texture2DSampleLevel(MobileSceneTextures.SceneDepthAuxTexture, MobileSceneTextures.SceneDepthAuxTextureSampler, InUVPos.xy, 0).r);
|
|
#endif
|
|
|
|
#if MOBILE_USESUN
|
|
|
|
#if METAL_MSAA_HDR_DECODE
|
|
SceneColor.rgb *= rcp(SceneColor.r*(-0.299) + SceneColor.g*(-0.587) + SceneColor.b*(-0.114) + 1.0);
|
|
OutColor = SceneColor;
|
|
#endif
|
|
|
|
half2 DepthMaskScaleBias = SunConstDepthMaskScaleBias();
|
|
half FarAmount = saturate(InDepth * DepthMaskScaleBias.x + DepthMaskScaleBias.y);
|
|
half3 SunAmount = SceneColor.rgb * SunColorApertureDiv2.rgb;
|
|
|
|
float SunLuminance = max(Luminance(SunAmount), 6.10352e-5);
|
|
float AdjustedLuminance = clamp(SunLuminance - BloomThreshold, 0.0f, BloomMaxBrightness);
|
|
SunAmount = SunAmount / SunLuminance * AdjustedLuminance * 2.0f;
|
|
|
|
|
|
half2 Pos = InUVPos.zw * 0.5 + 0.5;
|
|
half EdgeMask = 1.0f - Pos.x * (1.0f - Pos.x) * Pos.y * (1.0f - Pos.y) * 8.0f;
|
|
EdgeMask = EdgeMask * EdgeMask;
|
|
|
|
FarAmount *= 1.0-EdgeMask;
|
|
|
|
OutSunShaftAndDof = min(min(SunAmount.r, SunAmount.g), SunAmount.b) * FarAmount;
|
|
#else
|
|
OutSunShaftAndDof = 0.0;
|
|
#endif
|
|
|
|
#if MOBILE_USEDOF
|
|
OutSunShaftAndDof += Coc2(InDepth);
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// Pre-tonemap before hardware box-filtered resolve.
|
|
//
|
|
|
|
void PreTonemapMSAA_Mobile(
|
|
float4 InUVPos : TEXCOORD0,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
#if (METAL_ES3_1_PROFILE && !MAC)
|
|
// On-chip pre-tonemap before MSAA resolve.
|
|
OutColor = SubpassFetchRGBA_0();
|
|
OutColor.rgb *= rcp(OutColor.r*0.299 + OutColor.g*0.587 + OutColor.b*0.114 + 1.0);
|
|
#endif
|
|
}
|
|
|
|
#if MOBILE_MULTI_VIEW
|
|
Texture2DArray InputTexture;
|
|
#else
|
|
Texture2D InputTexture;
|
|
#endif
|
|
SamplerState InputSampler;
|
|
|
|
float4 SampleInput(float2 SceneUV, float ArrayIndex)
|
|
{
|
|
#if MOBILE_MULTI_VIEW
|
|
return Texture2DArraySample(InputTexture, InputSampler, float3(SceneUV.xy,ArrayIndex));
|
|
#else
|
|
return Texture2DSample(InputTexture, InputSampler, SceneUV);
|
|
#endif
|
|
}
|
|
|
|
void MSAADecodeAndCopyRectPS(
|
|
noperspective float4 UVAndScreenPos : TEXCOORD0,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
#if (METAL_ES3_1_PROFILE && !MAC)
|
|
float2 UV = UVAndScreenPos.xy;
|
|
OutColor = Texture2DSample(InputTexture, InputSampler, UV);
|
|
OutColor.rgb *= rcp(OutColor.r*(-0.299) + OutColor.g*(-0.587) + OutColor.b*(-0.114) + 1.0);
|
|
#endif
|
|
}
|
|
|
|
//
|
|
// Bloom Setup - Mask Bloom and Downsample 1/16 Area
|
|
//
|
|
|
|
void BloomVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords[4] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
float2 TransformedUV;
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, TransformedUV);
|
|
|
|
OutTexCoords[0] = TransformedUV + BufferSizeAndInvSize.zw * float2(-1, -1);
|
|
OutTexCoords[1] = TransformedUV + BufferSizeAndInvSize.zw * float2( 1, -1);
|
|
OutTexCoords[2] = TransformedUV + BufferSizeAndInvSize.zw * float2(-1, 1);
|
|
OutTexCoords[3] = TransformedUV + BufferSizeAndInvSize.zw * float2( 1, 1);
|
|
}
|
|
|
|
|
|
|
|
float Luma4(float3 Color)
|
|
{
|
|
return (Color.g * 2.0) + (Color.r + Color.b);
|
|
}
|
|
|
|
float HdrWeight4(float3 Color)
|
|
{
|
|
return rcp(Luma4(Color) + 4.0);
|
|
}
|
|
|
|
float HdrWeightInv4(float3 Color)
|
|
{
|
|
return 4.0 * rcp(1.0 - Luma4(Color));
|
|
}
|
|
|
|
void BloomPS_Mobile(
|
|
float2 InUVs[4] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput
|
|
#if MOBILE_USEBLOOM
|
|
|
|
, out HALF4_TYPE OutColor : SV_Target0
|
|
|
|
#if MOBILE_USEDOF || MOBILE_USESUN
|
|
|
|
,out HALF_TYPE OutSunShaftAndDof : SV_Target1
|
|
|
|
#if MOBILE_USEEYEADAPTATION
|
|
, out HALF_TYPE OutEyeAdaptation : SV_Target2
|
|
#endif
|
|
|
|
#else
|
|
|
|
#if MOBILE_USEEYEADAPTATION
|
|
, out HALF_TYPE OutEyeAdaptation : SV_Target1
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
#else
|
|
|
|
#if MOBILE_USEDOF || MOBILE_USESUN
|
|
|
|
, out HALF_TYPE OutSunShaftAndDof : SV_Target0
|
|
|
|
#if MOBILE_USEEYEADAPTATION
|
|
, out HALF_TYPE OutEyeAdaptation : SV_Target1
|
|
#endif
|
|
|
|
#else
|
|
|
|
#if MOBILE_USEEYEADAPTATION
|
|
, out HALF_TYPE OutEyeAdaptation : SV_Target0
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|
|
)
|
|
{
|
|
half3 AverageColor = 0.0f;
|
|
|
|
#if MOBILE_USEBLOOM || MOBILE_USESUN || MOBILE_USEEYEADAPTATION
|
|
|
|
float4 C0 = SampleSceneColor(InUVs[0],GetEyeIndex(StereoInput));
|
|
float4 C1 = SampleSceneColor(InUVs[1],GetEyeIndex(StereoInput));
|
|
float4 C2 = SampleSceneColor(InUVs[2],GetEyeIndex(StereoInput));
|
|
float4 C3 = SampleSceneColor(InUVs[3],GetEyeIndex(StereoInput));
|
|
|
|
C0 *= HdrWeight4(C0);
|
|
C1 *= HdrWeight4(C1);
|
|
C2 *= HdrWeight4(C2);
|
|
C3 *= HdrWeight4(C3);
|
|
|
|
// Output color is average.
|
|
AverageColor.rgb = (C0.rgb * 0.25) + (C1.rgb * 0.25) + (C2.rgb * 0.25) + (C3.rgb * 0.25);
|
|
AverageColor.rgb *= HdrWeightInv4(AverageColor);
|
|
|
|
#if METAL_MSAA_HDR_DECODE
|
|
// This should really happen before the average, instead doing after average as optimization.
|
|
AverageColor.rgb *= rcp(AverageColor.r*(-0.299) + AverageColor.g*(-0.587) + AverageColor.b*(-0.114) + 1.0);
|
|
#endif
|
|
|
|
// Try to kill negatives and NaNs here
|
|
AverageColor.rgb = max(AverageColor.rgb, 0);
|
|
|
|
|
|
#if MOBILE_USEBLOOM || MOBILE_USESUN
|
|
// Trim bloom and sunshafts black level.
|
|
half TotalLuminance = Luminance(AverageColor.rgb);
|
|
half BloomLuminance = TotalLuminance - BloomThreshold;
|
|
half Amount = saturate(BloomLuminance * 0.5f);
|
|
#endif
|
|
|
|
#if MOBILE_USEBLOOM
|
|
OutColor.rgb = AverageColor;
|
|
OutColor.rgb *= Amount;
|
|
OutColor.a = 0;
|
|
#endif
|
|
#endif
|
|
|
|
#if MOBILE_USEDOF || MOBILE_USESUN
|
|
half A0 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0]).r;
|
|
half A1 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r;
|
|
half A2 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r;
|
|
half A3 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r;
|
|
#endif
|
|
|
|
// In the case of both DOF and SUN,
|
|
// Split out alpha back into dual components (circle of confusion size and sun amount).
|
|
// Expand {near to in-focus} {0.0 to 0.5} to {0.0 to 1.0} for near DOF diolation.
|
|
// Must keep 1.0 the in-focus here (sunshaft pass will use this data).
|
|
|
|
#if MOBILE_USEDOF
|
|
// Expand {near to in-focus} {0.0 to 0.5} to {0.0 to 1.0} for near DOF diolation.
|
|
// Must keep 1.0 the in-focus here (sunshaft pass will use this data).
|
|
half Coc0 = saturate(A0*2.0);
|
|
half Coc1 = saturate(A1*2.0);
|
|
half Coc2 = saturate(A2*2.0);
|
|
half Coc3 = saturate(A3*2.0);
|
|
|
|
// Take min of COC (which is maximum near radius).
|
|
OutSunShaftAndDof = min(min(Coc0,Coc1),min(Coc2,Coc3));
|
|
|
|
// Improve the quality of near diolation.
|
|
OutSunShaftAndDof = 1.0 - OutSunShaftAndDof;
|
|
OutSunShaftAndDof *= OutSunShaftAndDof;
|
|
OutSunShaftAndDof = 1.0 - OutSunShaftAndDof;
|
|
#elif MOBILE_USESUN
|
|
OutSunShaftAndDof = 0.0f;
|
|
#endif
|
|
|
|
#if MOBILE_USESUN
|
|
#if MOBILE_USEDOF
|
|
half Sun0 = max(0.0, A0-1.0);
|
|
half Sun1 = max(0.0, A1-1.0);
|
|
half Sun2 = max(0.0, A2-1.0);
|
|
half Sun3 = max(0.0, A3-1.0);
|
|
#else
|
|
half Sun0 = A0;
|
|
half Sun1 = A1;
|
|
half Sun2 = A2;
|
|
half Sun3 = A3;
|
|
#endif
|
|
|
|
// Take average of sun intensity and adjust by bloom threshold.
|
|
Amount *= 0.25;
|
|
OutSunShaftAndDof += (Sun0 * Amount) + (Sun1 * Amount) + (Sun2 * Amount) + (Sun3 * Amount);
|
|
#endif
|
|
|
|
#if MOBILE_USEEYEADAPTATION
|
|
const float Intensity = CalculateEyeAdaptationLuminance(AverageColor * View.OneOverPreExposure);
|
|
const float LogIntensity = clamp(log2(Intensity), -10.0f, 20.0f);
|
|
// Store log intensity in the alpha channel: scale to 0,1 range.
|
|
OutEyeAdaptation = EyeAdaptation_HistogramScale * LogIntensity + EyeAdaptation_HistogramBias;
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
// Bloom Downsample
|
|
//
|
|
|
|
float BloomDownScale;
|
|
|
|
void BloomDownVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float4 OutTexCoords[8] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
float Start = 2.0/14.0;
|
|
float Scale = BloomDownScale;
|
|
|
|
OutTexCoords[0].xy = InTexCoord.xy;
|
|
OutTexCoords[0].zw = InTexCoord.xy + Circle(Start, 14.0, 0.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 14.0, 1.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[1].zw = InTexCoord.xy + Circle(Start, 14.0, 2.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 14.0, 3.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[2].zw = InTexCoord.xy + Circle(Start, 14.0, 4.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 14.0, 5.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[3].zw = InTexCoord.xy + Circle(Start, 14.0, 6.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 14.0, 7.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[4].zw = InTexCoord.xy + Circle(Start, 14.0, 8.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 14.0, 9.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[5].zw = InTexCoord.xy + Circle(Start, 14.0, 10.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[6].xy = InTexCoord.xy + Circle(Start, 14.0, 11.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[6].zw = InTexCoord.xy + Circle(Start, 14.0, 12.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[7].xy = InTexCoord.xy + Circle(Start, 14.0, 13.0) * Scale * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[7].zw = float2(0.0, 0.0);
|
|
}
|
|
|
|
void BloomDownPS_Mobile(
|
|
float4 InUVs[8] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
const uint EyeIndex = GetEyeIndex(StereoInput);
|
|
half4 N0 = SampleBloomDown(InUVs[0].xy, EyeIndex).rgba;
|
|
half4 N1 = SampleBloomDown(InUVs[0].zw, EyeIndex).rgba;
|
|
half4 N2 = SampleBloomDown(InUVs[1].xy, EyeIndex).rgba;
|
|
half4 N3 = SampleBloomDown(InUVs[1].zw, EyeIndex).rgba;
|
|
half4 N4 = SampleBloomDown(InUVs[2].xy, EyeIndex).rgba;
|
|
half4 N5 = SampleBloomDown(InUVs[2].zw, EyeIndex).rgba;
|
|
half4 N6 = SampleBloomDown(InUVs[3].xy, EyeIndex).rgba;
|
|
half4 N7 = SampleBloomDown(InUVs[3].zw, EyeIndex).rgba;
|
|
half4 N8 = SampleBloomDown(InUVs[4].xy, EyeIndex).rgba;
|
|
half4 N9 = SampleBloomDown(InUVs[4].zw, EyeIndex).rgba;
|
|
half4 N10 = SampleBloomDown(InUVs[5].xy, EyeIndex).rgba;
|
|
half4 N11 = SampleBloomDown(InUVs[5].zw, EyeIndex).rgba;
|
|
half4 N12 = SampleBloomDown(InUVs[6].xy, EyeIndex).rgba;
|
|
half4 N13 = SampleBloomDown(InUVs[6].zw, EyeIndex).rgba;
|
|
half4 N14 = SampleBloomDown(InUVs[7].xy, EyeIndex).rgba;
|
|
float W = 1.0/15.0;
|
|
OutColor.rgb =
|
|
(N0 * W) +
|
|
(N1 * W) +
|
|
(N2 * W) +
|
|
(N3 * W) +
|
|
(N4 * W) +
|
|
(N5 * W) +
|
|
(N6 * W) +
|
|
(N7 * W) +
|
|
(N8 * W) +
|
|
(N9 * W) +
|
|
(N10 * W) +
|
|
(N11 * W) +
|
|
(N12 * W) +
|
|
(N13 * W) +
|
|
(N14 * W);
|
|
OutColor.a = 0;
|
|
}
|
|
|
|
|
|
//
|
|
// Bloom Upsample
|
|
//
|
|
|
|
float2 BloomUpScales;
|
|
|
|
void BloomUpVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float4 OutTexCoords[8] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
float Start;
|
|
float Scale;
|
|
|
|
Start = 2.0/7.0;
|
|
Scale = BloomUpScales.x;
|
|
|
|
|
|
OutTexCoords[0].xy = InTexCoord.xy + Circle(Start, 7.0, 0.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[0].zw = InTexCoord.xy + Circle(Start, 7.0, 1.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 7.0, 2.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[1].zw = InTexCoord.xy + Circle(Start, 7.0, 3.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 7.0, 4.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[2].zw = InTexCoord.xy + Circle(Start, 7.0, 5.0) * Scale * BufferASizeAndInvSize.zw;
|
|
OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 7.0, 6.0) * Scale * BufferASizeAndInvSize.zw;
|
|
|
|
OutTexCoords[3].zw = InTexCoord.xy;
|
|
|
|
Start = 2.0/7.0;
|
|
Scale = BloomUpScales.y;
|
|
|
|
OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 7.0, 0.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[4].zw = InTexCoord.xy + Circle(Start, 7.0, 1.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 7.0, 2.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[5].zw = InTexCoord.xy + Circle(Start, 7.0, 3.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[6].xy = InTexCoord.xy + Circle(Start, 7.0, 4.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[6].zw = InTexCoord.xy + Circle(Start, 7.0, 5.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[7].xy = InTexCoord.xy + Circle(Start, 7.0, 6.0) * Scale * BufferBSizeAndInvSize.zw;
|
|
OutTexCoords[7].zw = float2(0.0, 0.0);
|
|
}
|
|
|
|
float4 BloomTintA;
|
|
float4 BloomTintB;
|
|
|
|
void BloomUpPS_Mobile(
|
|
float4 InUVs[8] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
const uint EyeIndex = GetEyeIndex(StereoInput);
|
|
|
|
half3 A0 = SampleBloomUpA(InUVs[0].xy, EyeIndex).rgb;
|
|
half3 A1 = SampleBloomUpA(InUVs[0].zw, EyeIndex).rgb;
|
|
half3 A2 = SampleBloomUpA(InUVs[1].xy, EyeIndex).rgb;
|
|
half3 A3 = SampleBloomUpA(InUVs[1].zw, EyeIndex).rgb;
|
|
half3 A4 = SampleBloomUpA(InUVs[2].xy, EyeIndex).rgb;
|
|
half3 A5 = SampleBloomUpA(InUVs[2].zw, EyeIndex).rgb;
|
|
half3 A6 = SampleBloomUpA(InUVs[3].xy, EyeIndex).rgb;
|
|
half3 A7 = SampleBloomUpA(InUVs[3].zw, EyeIndex).rgb;
|
|
|
|
half3 B0 = SampleBloomUpB(InUVs[3].zw, EyeIndex).rgb;
|
|
half3 B1 = SampleBloomUpB(InUVs[4].xy, EyeIndex).rgb;
|
|
half3 B2 = SampleBloomUpB(InUVs[4].zw, EyeIndex).rgb;
|
|
half3 B3 = SampleBloomUpB(InUVs[5].xy, EyeIndex).rgb;
|
|
half3 B4 = SampleBloomUpB(InUVs[5].zw, EyeIndex).rgb;
|
|
half3 B5 = SampleBloomUpB(InUVs[6].xy, EyeIndex).rgb;
|
|
half3 B6 = SampleBloomUpB(InUVs[6].zw, EyeIndex).rgb;
|
|
half3 B7 = SampleBloomUpB(InUVs[7].xy, EyeIndex).rgb;
|
|
|
|
// A is the same size source.
|
|
half3 WA = BloomTintA.rgb;
|
|
// B is the upsampled source.
|
|
half3 WB = BloomTintB.rgb;
|
|
|
|
OutColor.rgb =
|
|
A0 * WA +
|
|
A1 * WA +
|
|
A2 * WA +
|
|
A3 * WA +
|
|
A4 * WA +
|
|
A5 * WA +
|
|
A6 * WA +
|
|
A7 * WA +
|
|
B0 * WB +
|
|
B1 * WB +
|
|
B2 * WB +
|
|
B3 * WB +
|
|
B4 * WB +
|
|
B5 * WB +
|
|
B6 * WB +
|
|
B7 * WB;
|
|
OutColor.a = 0;
|
|
}
|
|
|
|
|
|
//
|
|
// Near Setup - Generate near diolation for DOF.
|
|
//
|
|
|
|
void DofNearVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords2 : TEXCOORD0,
|
|
out float4 OutTexCoords4[4] : TEXCOORD1,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
OutTexCoords2 = InTexCoord;
|
|
OutTexCoords4[0].xy = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5,-1.0);
|
|
OutTexCoords4[0].zw = InTexCoord + BufferSizeAndInvSize.zw * float2( 1.0,-0.5);
|
|
OutTexCoords4[1].xy = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, 1.0);
|
|
OutTexCoords4[1].zw = InTexCoord + BufferSizeAndInvSize.zw * float2(-1.0, 0.5);
|
|
OutTexCoords4[2].xy = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5,-1.0);
|
|
OutTexCoords4[2].zw = InTexCoord + BufferSizeAndInvSize.zw * float2( 1.0, 0.5);
|
|
OutTexCoords4[3].xy = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, 1.0);
|
|
OutTexCoords4[3].zw = InTexCoord + BufferSizeAndInvSize.zw * float2(-1.0,-0.5);
|
|
}
|
|
|
|
void DofNearPS_Mobile(
|
|
float2 InUVs2 : TEXCOORD0,
|
|
float4 InUVs[4] : TEXCOORD1,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
|
|
half N0 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs2).r;
|
|
half N1 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0].xy).r;
|
|
half N2 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0].zw).r;
|
|
half N3 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1].xy).r;
|
|
half N4 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1].zw).r;
|
|
half N5 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2].xy).r;
|
|
half N6 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2].zw).r;
|
|
half N7 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3].xy).r;
|
|
half N8 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3].zw).r;
|
|
|
|
// Remove sunshaft intensity component and reverse.
|
|
#if MOBILE_USESUN
|
|
N0 = saturate(1.0 - N0);
|
|
N1 = saturate(1.0 - N1);
|
|
N2 = saturate(1.0 - N2);
|
|
N3 = saturate(1.0 - N3);
|
|
N4 = saturate(1.0 - N4);
|
|
N5 = saturate(1.0 - N5);
|
|
N6 = saturate(1.0 - N6);
|
|
N7 = saturate(1.0 - N7);
|
|
N8 = saturate(1.0 - N8);
|
|
#else
|
|
// If no sun-shafts then don't need the saturate.
|
|
N0 = 1.0 - N0;
|
|
N1 = 1.0 - N1;
|
|
N2 = 1.0 - N2;
|
|
N3 = 1.0 - N3;
|
|
N4 = 1.0 - N4;
|
|
N5 = 1.0 - N5;
|
|
N6 = 1.0 - N6;
|
|
N7 = 1.0 - N7;
|
|
N8 = 1.0 - N8;
|
|
#endif
|
|
|
|
// The first sample is 1/4 the size as the rest of the samples.
|
|
half Out = (N0 * 0.25 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8) / 8.25;
|
|
if(Out > 0.0) Out = sqrt(Out);
|
|
OutColor = Out;
|
|
}
|
|
|
|
|
|
//
|
|
// DOF Setup - Downsample to 1/4 area
|
|
//
|
|
|
|
|
|
|
|
void DofDownVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords[5] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
// Near position fixed to use UV based out output position.
|
|
OutTexCoords[0] = OutPosition.xy * float2(0.5,-0.5) + 0.5;
|
|
// Other source UVs based on possible non-full texture.
|
|
OutTexCoords[1] = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, -0.5);
|
|
OutTexCoords[2] = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, -0.5);
|
|
OutTexCoords[3] = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, 0.5);
|
|
OutTexCoords[4] = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, 0.5);
|
|
}
|
|
|
|
void DofDownPS_Mobile(
|
|
float2 InUVs[5] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
// This shader needs float precision to work.
|
|
|
|
// Fetch near diolation and scale to (0 to 16384.0) range.
|
|
float N = DofNearTexture.Sample(DofNearSampler, InUVs[0]).r * 16384.0;
|
|
|
|
const uint EyeIndex = GetEyeIndex(StereoInput);
|
|
|
|
float4 A = SampleSceneColor(InUVs[1], EyeIndex);
|
|
float4 B = SampleSceneColor(InUVs[2], EyeIndex);
|
|
float4 C = SampleSceneColor(InUVs[3], EyeIndex);
|
|
float4 D = SampleSceneColor(InUVs[4], EyeIndex);
|
|
|
|
A.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r;
|
|
B.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r;
|
|
C.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r;
|
|
D.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[4]).r;
|
|
|
|
#if MOBILE_USESUN
|
|
// The {0.0 to 1.0} range is focus.
|
|
// The {1.0 to 65504.0} range is light shaft source intensity (always at fully out of focus).
|
|
// Must clamp back to {0.0 to 1.0} range.
|
|
A.a = min(1.0, A.a);
|
|
B.a = min(1.0, B.a);
|
|
C.a = min(1.0, C.a);
|
|
D.a = min(1.0, D.a);
|
|
#endif
|
|
|
|
// To support near DOF the {0.0 to 1.0} maps to {-16384.0 to 16384.0}.
|
|
A.a = A.a * (2.0 * 16384.0) - 16384.0;
|
|
B.a = B.a * (2.0 * 16384.0) - 16384.0;
|
|
C.a = C.a * (2.0 * 16384.0) - 16384.0;
|
|
D.a = D.a * (2.0 * 16384.0) - 16384.0;
|
|
|
|
// Make sure there are no zeros.
|
|
// Alpha ends up as circle of confusion size.
|
|
// Near diolation factor applied here.
|
|
// The 1/8 factor is to workaround mobile hardware lack of precision.
|
|
A.a = max(N, abs(A.a) + 1.0/8.0);
|
|
B.a = max(N, abs(B.a) + 1.0/8.0);
|
|
C.a = max(N, abs(C.a) + 1.0/8.0);
|
|
D.a = max(N, abs(D.a) + 1.0/8.0);
|
|
|
|
// Mix weighted by circle of confusion.
|
|
// This tends to erode the effect of more infocus samples (removes bleeding artifacts).
|
|
OutColor = ((A * A.a) + (B * B.a) + (C * C.a) + (D * D.a)) * rcp(A.a + B.a + C.a + D.a);
|
|
|
|
// Clamp rgb to prevent overflow during scale.
|
|
OutColor.rgb = min(OutColor.rgb, 65504.0/16384.125);
|
|
|
|
OutColor.rgb *= OutColor.a;
|
|
}
|
|
|
|
|
|
//
|
|
// DOF Blur
|
|
//
|
|
|
|
// DOF BOKEH SAMPLING PATTERN
|
|
// --------------------
|
|
// # = bilinear tap
|
|
// * = the single point tap to get the current pixel
|
|
//
|
|
// 1 1
|
|
// 4 4 1 * 2 2
|
|
// 4 4 3 3 2 2
|
|
// 3 3
|
|
//
|
|
// This pattern is very important.
|
|
// All bilinear taps are not always exactly in the middle of 4 texels.
|
|
// It is an asymetric pattern (minimize overlap, allow for different radii).
|
|
#define DOF_1 half2(-0.500, 0.50)
|
|
#define DOF_2 half2( 0.75,-0.50)
|
|
#define DOF_3 half2(-0.500,-1.25)
|
|
#define DOF_4 half2(-1.75,-0.50)
|
|
|
|
|
|
|
|
// This will compute a constant half2 from a constant half2.
|
|
// This computes the soft blend factor for intersection test
|
|
// (does circle of confusion intersect pixel center).
|
|
// Large feather here to make transitions smooth with a few samples.
|
|
half2 DofIntersectionScaleBias(half2 Offset)
|
|
{
|
|
// Working in distance squared.
|
|
// Normalize by maximum distance
|
|
half RcpMaxDst = rcp(sqrt(dot(DOF_4, DOF_4)));
|
|
half Dst0 = sqrt(dot(DOF_1, DOF_1));
|
|
half Dst1 = sqrt(dot(Offset, Offset));
|
|
Dst0 = max(Dst0, Dst1 - 0.25);
|
|
Dst0 *= RcpMaxDst;
|
|
Dst1 *= RcpMaxDst;
|
|
half Scale = 1.0/(Dst1 - Dst0);
|
|
half Bias = (-Dst0) * Scale;
|
|
return half2(Scale, Bias);
|
|
}
|
|
|
|
half DofIntersect(half CocTap, half2 Offset)
|
|
{
|
|
half2 ConstScaleBias = DofIntersectionScaleBias(Offset);
|
|
// Undo the scale factor.
|
|
ConstScaleBias.x *= 1.0/16384.0;
|
|
return saturate(CocTap * ConstScaleBias.x + ConstScaleBias.y);
|
|
}
|
|
|
|
half DofWeight(half Coc)
|
|
{
|
|
half Dst0 = sqrt(dot(DOF_3, DOF_3)) / sqrt(dot(DOF_4, DOF_4));
|
|
half Dst1 = sqrt(dot(DOF_4, DOF_4)) / sqrt(dot(DOF_4, DOF_4));
|
|
half Scale = 1.0/(Dst1 - Dst0);
|
|
half Bias = (-Dst0) * Scale;
|
|
// Undo the 16384.0 scale factor in this constant.
|
|
Scale *= 1.0/16384.0;
|
|
// Scale and Bias should be compile time constants.
|
|
return saturate(Coc * Scale + Bias);
|
|
}
|
|
|
|
void DofBlurVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords[5] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
OutTexCoords[0] = InTexCoord.xy;
|
|
OutTexCoords[1] = InTexCoord.xy + float2(DOF_1) * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[2] = InTexCoord.xy + float2(DOF_2) * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[3] = InTexCoord.xy + float2(DOF_3) * BufferSizeAndInvSize.zw;
|
|
OutTexCoords[4] = InTexCoord.xy + float2(DOF_4) * BufferSizeAndInvSize.zw;
|
|
}
|
|
|
|
void DofBlurPS_Mobile(
|
|
float2 InUVs[5] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
// Near diolation size is copied into alpha for the tonemapper pass.
|
|
OutColor.a = DofNearTexture.Sample(DofNearSampler, InUVs[0]).r;
|
|
|
|
half4 C1 = DofDownTexture.Sample(DofDownSampler, InUVs[1]);
|
|
half4 C2 = DofDownTexture.Sample(DofDownSampler, InUVs[2]);
|
|
half4 C3 = DofDownTexture.Sample(DofDownSampler, InUVs[3]);
|
|
half4 C4 = DofDownTexture.Sample(DofDownSampler, InUVs[4]);
|
|
|
|
// Restore color (colors are weighted by CoC to help remove bleeding).
|
|
C1.rgb *= rcp(C1.a);
|
|
C2.rgb *= rcp(C2.a);
|
|
C3.rgb *= rcp(C3.a);
|
|
C4.rgb *= rcp(C4.a);
|
|
|
|
// First bilinear tap always has 1.0 weight, the rest are weighted.
|
|
half W1 = 1.0, W2, W3, W4;
|
|
W2 = W3 = W4 = DofWeight(C1.a);
|
|
|
|
// Remove contribution of taps who's circle of confusion does not intersect the pixel.
|
|
W2 *= DofIntersect(C2.a, DOF_2);
|
|
W3 *= DofIntersect(C3.a, DOF_3);
|
|
W4 *= DofIntersect(C4.a, DOF_4);
|
|
|
|
OutColor.rgb = ((C1.rgb * W1) + (C2.rgb * W2) + (C3.rgb * W3) + (C4.rgb * W4)) * rcp(W1 + W2 + W3 + W4);
|
|
}
|
|
|
|
|
|
// Integrate DOF
|
|
|
|
void IntegrateDOFVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float4 OutTexCoords : TEXCOORD0,
|
|
out float2 OutFineDofGrain : TEXCOORD1,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, OutTexCoords.xy);
|
|
|
|
// Fine adjustment is inside the possible non-full viewport in the full resolution texture.
|
|
OutFineDofGrain.xy = OutTexCoords.xy + BufferSizeAndInvSize.zw * float2(-0.5, 0.5);
|
|
// Want grain and a second UV based on the knowledge that the source texture has a full viewport.
|
|
float2 FullViewUV = OutPosition.xy * float2(0.5, -0.5) + 0.5;
|
|
// For DOF attempt to undo sampling bias for the first transition region.
|
|
// This is better for the fine transition, breaks down for the larger bokeh.
|
|
// This is the best compromise for mobile using 4 bilinear taps only.
|
|
OutTexCoords.zw = FullViewUV.xy + DofBlurSizeAndInvSize.zw * float2(0.25, -0.5);
|
|
}
|
|
|
|
void IntegrateDOFPS_Mobile(
|
|
in float4 TexCoords : TEXCOORD0,
|
|
in float2 FineDofGrain : TEXCOORD1,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
const uint EyeIndex = GetEyeIndex(StereoInput);
|
|
|
|
half4 SceneColor = SampleSceneColor(TexCoords.xy, EyeIndex);
|
|
half4 DofFine = SampleSceneColor(FineDofGrain.xy, EyeIndex);
|
|
|
|
half4 Dof = Texture2DSample(DofBlurTexture, DofBlurSampler, TexCoords.zw);
|
|
half DofCoc = Texture2DSample(SunShaftAndDofTexture, SunShaftAndDofSampler, TexCoords.xy).r;
|
|
// Convert alpha back into circle of confusion.
|
|
OutColor.a = SceneColor.a;
|
|
SceneColor.a = max(Dof.a, abs(DofCoc * 2.0 - 1.0));
|
|
// Convert circle of confusion into blend factors.
|
|
half2 ScaleBias = CocBlendScaleBias(); // Constant.
|
|
half DofAmount = saturate(SceneColor.a * ScaleBias.x + ScaleBias.y);
|
|
half2 ScaleBias2 = CocBlendScaleBiasFine(); // Constant.
|
|
half DofAmountFine = saturate(SceneColor.a * ScaleBias2.x + ScaleBias2.y);
|
|
// Blend in fine DOF.
|
|
OutColor.rgb = lerp(SceneColor.rgb, DofFine.rgb, DofAmountFine);
|
|
// Blend in coarse DOF.
|
|
OutColor.rgb = lerp(OutColor.rgb, Dof.rgb, DofAmount);
|
|
}
|
|
|
|
//
|
|
// First sun shaft blur and move sun intensity from alpha to single channel output.
|
|
//
|
|
|
|
half HighlightCompression(half Channel)
|
|
{
|
|
return Channel * rcp(1.0 + Channel);
|
|
}
|
|
|
|
half HighlightDecompression(half Channel)
|
|
{
|
|
return Channel * rcp(1.0 - Channel);
|
|
}
|
|
|
|
|
|
// Convert from [-1 to 1] to view rectangle in texture which is somewhere in [0 to 1].
|
|
float2 SunShaftPosToUV(float2 Pos)
|
|
{
|
|
// return (Pos.xy * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f) * PostprocessInput0Size.zw;
|
|
return Pos.xy * float2(0.5,-0.5) + 0.5;
|
|
}
|
|
|
|
// Center of light shaft.
|
|
float2 LightShaftCenter;
|
|
|
|
// Position in {-1 to 1} space.
|
|
float2 SunPos()
|
|
{
|
|
return LightShaftCenter.xy;
|
|
}
|
|
|
|
float2 SunShaftRect(float2 InPosition, float amount)
|
|
{
|
|
float2 center = SunPos();
|
|
return SunShaftPosToUV(lerp(center, InPosition, amount));
|
|
}
|
|
|
|
// Positions for sun shaft steps.
|
|
// The very tight first position makes direct light to eye bloom a little.
|
|
// Otherwise want even spacing.
|
|
#define SUN_P0 (31.0/32.0)
|
|
#define SUN_P1 (27.0/32.0)
|
|
#define SUN_P2 (23.0/32.0)
|
|
#define SUN_P3 (19.0/32.0)
|
|
#define SUN_P4 (15.0/32.0)
|
|
#define SUN_P5 (11.0/32.0)
|
|
#define SUN_P6 (7.0/32.0)
|
|
// SUN_P7 is fixed at zero.
|
|
|
|
#define SUN_M 1.0
|
|
|
|
void SunAlphaVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords[8] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
OutTexCoords[0] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M);
|
|
OutTexCoords[1] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M);
|
|
OutTexCoords[2] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M);
|
|
OutTexCoords[3] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M);
|
|
OutTexCoords[4] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M);
|
|
OutTexCoords[5] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M);
|
|
OutTexCoords[6] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M);
|
|
OutTexCoords[7] = InTexCoord.xy;
|
|
}
|
|
|
|
#undef SUN_M
|
|
|
|
// Remove the +1 bias.
|
|
// This sets negatives to zero because 0-1 is used for DOF.
|
|
half SunUnBias(half A)
|
|
{
|
|
#if MOBILE_USEDOF
|
|
return max(0.0, A - 1.0);
|
|
#else
|
|
return A;
|
|
#endif
|
|
}
|
|
|
|
void SunAlphaPS_Mobile(
|
|
float2 InUVs[8] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
OutColor =
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[4]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[5]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[6]).r) * 0.125 +
|
|
SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[7]).r) * 0.125;
|
|
OutColor = HighlightCompression(OutColor);
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
// Second sun shaft blur.
|
|
//
|
|
|
|
#define SUN_M 0.5
|
|
|
|
void SunBlurVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float2 OutTexCoords[8] : TEXCOORD0,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
OutTexCoords[0] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M);
|
|
OutTexCoords[1] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M);
|
|
OutTexCoords[2] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M);
|
|
OutTexCoords[3] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M);
|
|
OutTexCoords[4] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M);
|
|
OutTexCoords[5] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M);
|
|
OutTexCoords[6] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M);
|
|
OutTexCoords[7] = InTexCoord.xy;
|
|
}
|
|
|
|
#undef SUN_M
|
|
|
|
void SunBlurPS_Mobile(
|
|
float2 InUVs[8] : TEXCOORD0,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
OutColor =
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[0]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[1]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[2]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[3]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[4]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[5]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[6]).r * 0.125 +
|
|
SunAlphaTexture.Sample(SunAlphaSampler, InUVs[7]).r * 0.125;
|
|
}
|
|
|
|
|
|
//
|
|
// Third sun shaft blur, composite with bloom, vignette.
|
|
//
|
|
|
|
#define SUN_M 0.25
|
|
|
|
void SunMergeVS_Mobile(
|
|
in float4 InPosition : ATTRIBUTE0,
|
|
in float2 InTexCoord : ATTRIBUTE1,
|
|
in FStereoVSInput StereoInput,
|
|
out float4 OutTexCoordVignette : TEXCOORD0,
|
|
out float4 OutTexCoords[7] : TEXCOORD1,
|
|
out FStereoVSOutput StereoOutput,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
StereoSetupVS(StereoInput, StereoOutput);
|
|
DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord);
|
|
|
|
OutTexCoordVignette.xy = InTexCoord.xy;
|
|
OutTexCoordVignette.zw = VignetteSpace(OutPosition.xy);
|
|
|
|
float Start;
|
|
float Scale;
|
|
|
|
Start = 2.0/6.0;
|
|
Scale = 0.66/2.0;
|
|
|
|
OutTexCoords[0].xy = InTexCoord.xy + Circle(Start, 6.0, 0.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 6.0, 1.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 6.0, 2.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 6.0, 3.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 6.0, 4.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 6.0, 5.0) * Scale * BloomUpSizeAndInvSize.zw;
|
|
|
|
OutTexCoords[0].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M);
|
|
OutTexCoords[1].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M);
|
|
OutTexCoords[2].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M);
|
|
OutTexCoords[3].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M);
|
|
OutTexCoords[4].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M);
|
|
OutTexCoords[5].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M);
|
|
OutTexCoords[6].xy = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M);
|
|
OutTexCoords[6].zw = float2(0.0, 0.0);
|
|
|
|
}
|
|
|
|
#undef SUN_M
|
|
|
|
float4 SunColorVignetteIntensity;
|
|
float3 BloomColor;
|
|
|
|
Texture2D BloomDirtMaskTexture;
|
|
SamplerState BloomDirtMaskSampler;
|
|
float4 BloomDirtMaskTint;
|
|
|
|
void SunMergePS_Mobile(
|
|
float4 InUVVignette : TEXCOORD0,
|
|
float4 InUVs[7] : TEXCOORD1,
|
|
in FStereoPSInput StereoInput,
|
|
out HALF4_TYPE OutColor : SV_Target0
|
|
)
|
|
{
|
|
const uint EyeIndex = GetEyeIndex(StereoInput);
|
|
|
|
#if MOBILE_USEBLOOM
|
|
|
|
float Scale1 = 1.0/7.0;
|
|
|
|
float Scale2 = 1.0/7.0;
|
|
|
|
half3 Bloom2 = (
|
|
SampleBloomSetup(InUVVignette.xy, EyeIndex).rgba *Scale1 +
|
|
SampleBloomSetup(InUVs[0].xy, EyeIndex).rgba * Scale2 +
|
|
SampleBloomSetup(InUVs[1].xy, EyeIndex).rgba * Scale2 +
|
|
SampleBloomSetup(InUVs[2].xy, EyeIndex).rgba * Scale2 +
|
|
SampleBloomSetup(InUVs[3].xy, EyeIndex).rgba * Scale2 +
|
|
SampleBloomSetup(InUVs[4].xy, EyeIndex).rgba * Scale2 +
|
|
SampleBloomSetup(InUVs[5].xy, EyeIndex).rgba * Scale2) * rcp(Scale1 * 1.0 + Scale2 * 6.0);
|
|
|
|
OutColor.rgb = SampleBloomUp(InUVVignette.xy, EyeIndex);
|
|
|
|
half3 BloomDirtMaskColor = BloomDirtMaskTexture.Sample(BloomDirtMaskSampler, InUVVignette.xy).rgb * BloomDirtMaskTint.rgb;
|
|
|
|
// Have 5 layers on mobile.
|
|
half Scale3 = 1.0/5.0;
|
|
|
|
// scale existing color first
|
|
OutColor.rgb *= Scale3;
|
|
|
|
// add scaled bloom separately to prevent overflow before scaling
|
|
OutColor.rgb += Bloom2 * Scale3 * BloomColor + BloomDirtMaskColor * OutColor.rgb;
|
|
|
|
#else
|
|
OutColor.rgb = half3(0.0, 0.0, 0.0);
|
|
#endif
|
|
|
|
#if MOBILE_USESUN
|
|
half Sun =
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[0].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[1].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[2].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[3].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[4].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[5].zw).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVs[6].xy).r * 0.125 +
|
|
SunBlurTexture.Sample(SunBlurSampler, InUVVignette.xy).r * 0.125;
|
|
Sun = HighlightDecompression(Sun);
|
|
OutColor.rgb += SunColorVignetteIntensity.rgb * Sun;
|
|
#endif
|
|
|
|
OutColor.a = 1.0f;
|
|
}
|
|
|
|
#undef SUN_P0
|
|
#undef SUN_P1
|
|
#undef SUN_P2
|
|
#undef SUN_P3
|
|
#undef SUN_P4
|
|
#undef SUN_P5
|
|
#undef SUN_P6
|
|
|
|
// EyeAdaptation
|
|
|
|
StructuredBuffer<float4> EyeAdaptationBuffer;
|
|
|
|
static const float FLOAT_PRECISION = 1e+5;
|
|
|
|
static const float INV_FLOAT_PRECISION = 1e-5;
|
|
|
|
#if CLEAR_UAV_UINT_COMPUTE_SHADER
|
|
uint NumEntries;
|
|
|
|
uint ClearValue;
|
|
RWBuffer<uint> UAV;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void ClearUAVUIntCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
if (DispatchThreadId.x < NumEntries)
|
|
{
|
|
UAV[DispatchThreadId.x] = ClearValue;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if AVERAGE_LUMINANCE_COMPUTE_SHADER || HISTOGRAM_COMPUTE_SHADER
|
|
|
|
float4 SourceSizeAndInvSize;
|
|
|
|
#endif
|
|
|
|
#if AVERAGE_LUMINANCE_COMPUTE_SHADER
|
|
|
|
RWBuffer<uint> OutputUIntBuffer;
|
|
|
|
groupshared float2 SharedLuminance[THREADGROUP_SIZEX * THREADGROUP_SIZEY];
|
|
|
|
// Each thread group processes LoopX * LoopY texels of the input.
|
|
const static uint2 TileSize = uint2(LOOP_SIZEX, LOOP_SIZEY) * 2; // Multiply 2 because we use bilinear sampler
|
|
|
|
const static uint ThreadGroupSize = THREADGROUP_SIZEX * THREADGROUP_SIZEY;
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void AverageLuminance_MainCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
// Top left input texel for this group.
|
|
uint2 LeftTop = DispatchThreadId.xy * TileSize;
|
|
|
|
uint2 Tile, TexelPos;
|
|
half2 BufferUV;
|
|
float Weight;
|
|
float Value;
|
|
|
|
float2 GroupLuminance = float2(0.0f, 0.0f);
|
|
|
|
LOOP for (uint y = 0; y < LOOP_SIZEY; ++y)
|
|
{
|
|
LOOP for (uint x = 0; x < LOOP_SIZEX; ++x)
|
|
{
|
|
Tile = uint2(x, y) * 2; // Multiply 2 because we use bilinear sampler
|
|
TexelPos = LeftTop + Tile;
|
|
|
|
if (TexelPos.x < SourceSizeAndInvSize.x && TexelPos.y < SourceSizeAndInvSize.y)
|
|
{
|
|
BufferUV = (half2)TexelPos + half2(1.0f, 1.0f); // offset pixel pos by 1 to use bilinear filter
|
|
BufferUV = BufferUV * SourceSizeAndInvSize.zw;
|
|
|
|
Weight = max(AdaptationWeightTexture(BufferUV), 0.05f);
|
|
|
|
Value = SampleInput(BufferUV, 0).x;
|
|
|
|
{
|
|
// apply EyeAdaptation_BlackHistogramBucketInfluence using similar logic as Histogram method
|
|
float fBucket = saturate(Value) * (HISTOGRAM_SIZE - 1);
|
|
|
|
// Find two discrete buckets that straddle the continuous histogram position.
|
|
uint Bucket0 = (uint)(fBucket);
|
|
if (Bucket0 == 0)
|
|
{
|
|
Weight *= EyeAdaptation_BlackHistogramBucketInfluence;
|
|
}
|
|
}
|
|
|
|
GroupLuminance.x += Value * Weight;
|
|
GroupLuminance.y += Weight;
|
|
}
|
|
}
|
|
}
|
|
|
|
SharedLuminance[GroupIndex] = GroupLuminance;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
UNROLL for (uint cutoff = (ThreadGroupSize >> 1); cutoff > 0; cutoff >>= 1)
|
|
{
|
|
if (GroupIndex < cutoff)
|
|
{
|
|
SharedLuminance[GroupIndex] += SharedLuminance[GroupIndex + cutoff];
|
|
}
|
|
|
|
if (cutoff > 4) // https://www.anandtech.com/show/12834/arm-announces-the-mali-g76-scaling-up-bifrost/2 said 4 is the wavefront for bifrost mali gpu
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
}
|
|
|
|
if (GroupIndex <= 1)
|
|
{
|
|
float NormalizeFactor = SourceSizeAndInvSize.z * SourceSizeAndInvSize.w * 4.0f; // multiply 4 because bilinear filter, so sample only the 1/4 resolustion
|
|
|
|
uint2 LuminanceInt = SharedLuminance[0] * NormalizeFactor * FLOAT_PRECISION;
|
|
|
|
InterlockedAdd(OutputUIntBuffer[GroupIndex], LuminanceInt[GroupIndex]);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
Buffer<uint> LogLuminanceWeightBuffer;
|
|
|
|
float4 BasicEyeAdaptation_Mobile()
|
|
{
|
|
float LogLumSum = LogLuminanceWeightBuffer[0];
|
|
float WeightSum = LogLuminanceWeightBuffer[1];
|
|
|
|
float LogLumAve = WeightSum == 0.0f ? 1.0f : (LogLumSum / WeightSum);
|
|
// Correct for [0,1] scaling
|
|
LogLumAve = (LogLumAve - EyeAdaptation_HistogramBias) / EyeAdaptation_HistogramScale;
|
|
|
|
// Convert LogLuminanceAverage to Average Intensity
|
|
const float AverageSceneLuminance = exp2(LogLumAve);
|
|
|
|
const float MiddleGreyExposureCompensation = EyeAdaptation_ExposureCompensationSettings * EyeAdaptation_ExposureCompensationCurve * EyeAdaptation_GreyMult;// we want the average luminance remapped to 0.18, not 1.0
|
|
|
|
const float ClampedLumAve = clamp(AverageSceneLuminance, EyeAdaptation_MinAverageLuminance, EyeAdaptation_MaxAverageLuminance);
|
|
|
|
// The Exposure Scale (and thus intensity) used in the previous frame
|
|
const float ExposureScaleOld = EyeAdaptationBuffer[0].x;
|
|
|
|
const float LuminanceAveOld = MiddleGreyExposureCompensation / (ExposureScaleOld != 0.0f ? ExposureScaleOld : 1.0f);
|
|
|
|
// Time-based expoential blend of the intensity to allow the eye adaptation to ramp up over a few frames.
|
|
const float SmoothedLuminance = ComputeEyeAdaptation(LuminanceAveOld, ClampedLumAve, EyeAdaptation_DeltaWorldTime);
|
|
|
|
const float SmoothedExposureScale = 1.0f / max(0.0001f, SmoothedLuminance);
|
|
|
|
const float TargetExposureScale = 1.0f / max(0.0001f, ClampedLumAve);
|
|
|
|
float4 OutColor;
|
|
|
|
// Output the number that will rescale the image intensity
|
|
OutColor.x = MiddleGreyExposureCompensation * SmoothedExposureScale;
|
|
|
|
// Output the target value
|
|
OutColor.y = MiddleGreyExposureCompensation * TargetExposureScale;
|
|
|
|
OutColor.z = AverageSceneLuminance;
|
|
|
|
OutColor.w = MiddleGreyExposureCompensation / EyeAdaptation_GreyMult;
|
|
|
|
return OutColor;
|
|
}
|
|
|
|
#if BASIC_EYEADAPTATION_COMPUTE_SHADER
|
|
RWStructuredBuffer<float4> OutputBuffer;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void BasicEyeAdaptationCS_Mobile()
|
|
{
|
|
OutputBuffer[0] = BasicEyeAdaptation_Mobile();
|
|
OutputBuffer[1] = float4(1.0f, 0.0f, 0.0f, 0.0f);
|
|
}
|
|
|
|
#endif
|
|
|
|
const static float InvHistogramSize = 1.0f / (float)HISTOGRAM_SIZE;
|
|
|
|
const static float InvHistogramSizeMinusOne = 1.0f / (float)(HISTOGRAM_SIZE - 1);
|
|
|
|
|
|
#if HISTOGRAM_COMPUTE_SHADER
|
|
// Output histogram buffer (UAV)
|
|
RWBuffer<uint> RWHistogramBuffer;
|
|
|
|
const static uint QUARTER_HISTOGRAM_SIZE = HISTOGRAM_SIZE / 4;
|
|
|
|
// Each thread group processes LoopX * LoopY texels of the input.
|
|
const static uint2 TileSize = uint2(LOOP_SIZEX, LOOP_SIZEY) * 2; // Multiply 2 because we use bilinear sampler
|
|
|
|
const static uint ThreadGroupSize = THREADGROUP_SIZEX * THREADGROUP_SIZEY;
|
|
|
|
// THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms of the size HISTOGRAM_SIZE
|
|
groupshared float4 SharedHistogram[(HISTOGRAM_SIZE / 4) * THREADGROUP_SIZEX * THREADGROUP_SIZEY];
|
|
|
|
void WriteToHistogramBuffer(uint HitogramIndex, float NormalizeFactor)
|
|
{
|
|
uint4 LuminanceInt = SharedHistogram[(HitogramIndex / 4) * ThreadGroupSize] * NormalizeFactor * FLOAT_PRECISION;
|
|
|
|
uint LuminanceIntIndex = HitogramIndex % 4;
|
|
|
|
InterlockedAdd(RWHistogramBuffer[HitogramIndex], LuminanceInt[LuminanceIntIndex]);
|
|
}
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void Histogram_MainCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint GroupIndex: SV_GroupIndex)
|
|
{
|
|
// todo: can be cleared more efficiently
|
|
// clear all THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms
|
|
UNROLL for (uint i = 0; i < QUARTER_HISTOGRAM_SIZE; ++i)
|
|
{
|
|
SharedHistogram[i * ThreadGroupSize + GroupIndex] = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
}
|
|
|
|
// Top left input texel for this group.
|
|
uint2 LeftTop = DispatchThreadId.xy * TileSize;
|
|
|
|
uint HistogramSizeMinusOne = HISTOGRAM_SIZE - 1;
|
|
uint2 Tile, TexelPos;
|
|
float2 BufferUV;
|
|
float LogLuminance, ScreenWeight, fBucket, Weight1, Weight0;
|
|
uint x, y, Bucket0, Bucket1;
|
|
|
|
// Accumulate all pixels into THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms
|
|
LOOP for (y = 0; y < LOOP_SIZEY; ++y)
|
|
{
|
|
LOOP for (x = 0; x < LOOP_SIZEX; ++x)
|
|
{
|
|
Tile = uint2(x, y) * 2; // Multiply 2 because we use bilinear sampler
|
|
TexelPos = LeftTop + Tile;
|
|
|
|
if(TexelPos.x < SourceSizeAndInvSize.x && TexelPos.y < SourceSizeAndInvSize.y)
|
|
{
|
|
BufferUV = (float2)TexelPos + float2(1.0f, 1.0f); // offset pixel pos by 1 to use bilinear filter
|
|
BufferUV = BufferUV * SourceSizeAndInvSize.zw;
|
|
|
|
LogLuminance = SampleInput(BufferUV, 0).x;
|
|
ScreenWeight = AdaptationWeightTexture(BufferUV);
|
|
|
|
// Map the normalized histogram position into texels.
|
|
fBucket = LogLuminance * HistogramSizeMinusOne;
|
|
|
|
// Find two discrete buckets that straddle the continuous histogram position.
|
|
Bucket0 = (uint)(fBucket);
|
|
Bucket1 = Bucket0 + 1;
|
|
|
|
Bucket0 = min(Bucket0, HistogramSizeMinusOne);
|
|
Bucket1 = min(Bucket1, HistogramSizeMinusOne);
|
|
|
|
// Weighted blend between the two buckets.
|
|
Weight1 = frac(fBucket);
|
|
Weight0 = 1.0f - Weight1;
|
|
|
|
// When EyeAdaptation_BlackHistogramBucketInfluence=.0, we will ignore the last bucket. The main use
|
|
// case is so the black background pixels in the editor have no effect. But if we have cases where
|
|
// pixel values can actually be black, we want to set EyeAdaptation_LastHistogramBucketInfluence=1.0.
|
|
// This value is controlled by the cvar "r.EyeAdaptation.BlackHistogramBucketInfluence"
|
|
if (Bucket0 == 0)
|
|
{
|
|
Weight0 *= EyeAdaptation_BlackHistogramBucketInfluence;
|
|
}
|
|
|
|
// Accumulate the weight to the nearby history buckets.
|
|
#if IOS // The IOS A8 and lower devices seems don't support using a float4 array as a two dimension array, separate the operations to two steps.
|
|
float4 Histogram0 = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
float4 Histogram1 = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
Histogram0[Bucket0 % 4] = Weight0 * ScreenWeight;
|
|
Histogram1[Bucket1 % 4] = Weight1 * ScreenWeight;
|
|
|
|
SharedHistogram[(Bucket0 / 4) * ThreadGroupSize + GroupIndex] += Histogram0;
|
|
SharedHistogram[(Bucket1 / 4) * ThreadGroupSize + GroupIndex] += Histogram1;
|
|
#else
|
|
SharedHistogram[(Bucket0 / 4) * ThreadGroupSize + GroupIndex][Bucket0 % 4] += Weight0 * ScreenWeight;
|
|
SharedHistogram[(Bucket1 / 4) * ThreadGroupSize + GroupIndex][Bucket1 % 4] += Weight1 * ScreenWeight;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Reduction.
|
|
//
|
|
// float4 SharedHistogram[] is laid out like this:
|
|
// [ float4 ] [ float4 ] ... [ float4 ] [ float4 ] [ float4 ] ...
|
|
// [ Histogram 0, bins 0-3] [ Histogram 1, bins 0-3] ... [ Histogram 63, bins 0-3] [Histogram 0, bins 4-7] [Histogram 1, bins 4-7] ...
|
|
//
|
|
// To reduce we use HISTOGRAM_SIZE/4 threads to accumulate, where thread 0 accumulates bins 0-3 from all histograms, thread 1 bins 4-7, etc.
|
|
if (GroupIndex < QUARTER_HISTOGRAM_SIZE)
|
|
{
|
|
float4 Sum = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
UNROLL for (uint i = 0; i < ThreadGroupSize; ++i)
|
|
{
|
|
|
|
// Accumulate bins from histogram i
|
|
Sum += SharedHistogram[GroupIndex * ThreadGroupSize + i];
|
|
}
|
|
SharedHistogram[GroupIndex * ThreadGroupSize] = Sum;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
|
|
if (GroupIndex < HISTOGRAM_SIZE)
|
|
{
|
|
float NormalizeFactor = SourceSizeAndInvSize.z * SourceSizeAndInvSize.w * 4.0f; // multiply 4 because bilinear filter, so sample only the 1/4 resolustion
|
|
|
|
WriteToHistogramBuffer(GroupIndex, NormalizeFactor);
|
|
|
|
#if LOW_SHARED_COMPUTE_MEMORY // Need to write two texels for each thread on LowSharedComputeMemory devices since there are only 32 threads in each tile.
|
|
WriteToHistogramBuffer(GroupIndex * 2 + 1, NormalizeFactor);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HISTOGRAM_EYEADAPTATION_COMPUTE_SHADER
|
|
Buffer<uint> HistogramBuffer;
|
|
|
|
float GetHistogramBucket_Mobile(uint BucketIndex)
|
|
{
|
|
return HistogramBuffer[BucketIndex];
|
|
}
|
|
|
|
float ComputeHistogramSum_Mobile()
|
|
{
|
|
float Sum = 0;
|
|
|
|
for (uint i = 0; i < HISTOGRAM_SIZE; ++i)
|
|
{
|
|
Sum += GetHistogramBucket_Mobile(i);
|
|
}
|
|
|
|
return Sum;
|
|
}
|
|
|
|
// @param MinFractionSum e.g. ComputeHistogramSum() * 0.5f for 50% percentil
|
|
// @param MaxFractionSum e.g. ComputeHistogramSum() * 0.9f for 90% percentil
|
|
float ComputeAverageLuminanceWithoutOutlier_Mobile(float MinFractionSum, float MaxFractionSum)
|
|
{
|
|
float2 SumWithoutOutliers = 0;
|
|
|
|
UNROLL for (uint i = 0; i < HISTOGRAM_SIZE; ++i)
|
|
{
|
|
float LocalValue = GetHistogramBucket_Mobile(i);
|
|
|
|
// remove outlier at lower end
|
|
float Sub = min(LocalValue, MinFractionSum);
|
|
LocalValue = LocalValue - Sub;
|
|
MinFractionSum -= Sub;
|
|
MaxFractionSum -= Sub;
|
|
|
|
// remove outlier at upper end
|
|
LocalValue = min(LocalValue, MaxFractionSum);
|
|
MaxFractionSum -= LocalValue;
|
|
|
|
float LogLuminanceAtBucket = ComputeLogLuminanceFromHistogramPosition(float(i) * InvHistogramSizeMinusOne);
|
|
|
|
SumWithoutOutliers += float2(LogLuminanceAtBucket, 1) * LocalValue;
|
|
}
|
|
|
|
//return SumWithoutOutliers.x / max(0.0001f, SumWithoutOutliers.y);
|
|
float AvgLogLuminance = SumWithoutOutliers.x / max(0.0001f, SumWithoutOutliers.y);
|
|
|
|
return exp2(AvgLogLuminance);
|
|
}
|
|
|
|
float ComputeEyeAdaptationExposure_Mobile()
|
|
{
|
|
const float HistogramSum = ComputeHistogramSum_Mobile();
|
|
const float AverageSceneLuminance = ComputeAverageLuminanceWithoutOutlier_Mobile(HistogramSum * EyeAdaptation_ExposureLowPercent, HistogramSum * EyeAdaptation_ExposureHighPercent);
|
|
const float LumAve = AverageSceneLuminance;
|
|
|
|
return LumAve;
|
|
}
|
|
|
|
RWStructuredBuffer<float4> OutputBuffer;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void HistogramEyeAdaptationCS(uint2 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
float4 OutColor = 0;
|
|
|
|
const float AverageSceneLuminance = ComputeEyeAdaptationExposure_Mobile();
|
|
|
|
const float TargetAverageLuminance = clamp(AverageSceneLuminance, EyeAdaptation_MinAverageLuminance, EyeAdaptation_MaxAverageLuminance);
|
|
|
|
const float InvGreyMult = 5.5555555556f; // 1.0f/0.18f
|
|
|
|
// White point luminance is target luminance divided by 0.18 (18% grey).
|
|
const float TargetExposure = TargetAverageLuminance * InvGreyMult;
|
|
|
|
const float OldExposureScale = EyeAdaptationBuffer[0].x;
|
|
|
|
const float MiddleGreyExposureCompensation = EyeAdaptation_ExposureCompensationSettings * EyeAdaptation_ExposureCompensationCurve; // we want the average luminance remapped to 0.18, not 1.0
|
|
const float OldExposure = MiddleGreyExposureCompensation / (OldExposureScale != 0 ? OldExposureScale : 1.0f);
|
|
|
|
// eye adaptation changes over time
|
|
const float EstimatedExposure = ComputeEyeAdaptation(OldExposure, TargetExposure, EyeAdaptation_DeltaWorldTime);
|
|
|
|
// maybe make this an option to avoid hard clamping when transitioning between different exposure volumes?
|
|
const float SmoothedExposure = clamp(EstimatedExposure, EyeAdaptation_MinAverageLuminance * InvGreyMult, EyeAdaptation_MaxAverageLuminance * InvGreyMult);
|
|
|
|
const float SmoothedExposureScale = 1.0f / max(0.0001f, SmoothedExposure);
|
|
const float TargetExposureScale = 1.0f / max(0.0001f, TargetExposure);
|
|
|
|
OutColor.x = MiddleGreyExposureCompensation * SmoothedExposureScale;
|
|
OutColor.y = MiddleGreyExposureCompensation * TargetExposureScale;
|
|
OutColor.z = AverageSceneLuminance;
|
|
OutColor.w = MiddleGreyExposureCompensation;
|
|
|
|
OutputBuffer[0] = OutColor;
|
|
OutputBuffer[1] = float4(1.0f, 0.0f, 0.0f, 0.0f);
|
|
}
|
|
#endif
|