// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= PostProcessMobile.usf: Combined {bloom, sunshafts, depth of field} =============================================================================*/ #include "Common.ush" #include "PostProcessCommon.ush" #include "PostprocessHistogramCommon.ush" #include "ScreenPass.ush" #if MOBILE_MULTI_VIEW #define MultiViewTexture2D Texture2DArray #else #define MultiViewTexture2D Texture2D #endif MultiViewTexture2D SceneColorTexture; SamplerState SceneColorSampler; Texture2D LastFrameSceneColorTexture; SamplerState LastFrameSceneColorSampler; Texture2D SunShaftAndDofTexture; SamplerState SunShaftAndDofSampler; Texture2D DofNearTexture; SamplerState DofNearSampler; Texture2D DofDownTexture; SamplerState DofDownSampler; Texture2D DofBlurTexture; SamplerState DofBlurSampler; MultiViewTexture2D BloomDownSourceTexture; SamplerState BloomDownSourceSampler; MultiViewTexture2D BloomUpSourceATexture; SamplerState BloomUpSourceASampler; MultiViewTexture2D BloomUpSourceBTexture; SamplerState BloomUpSourceBSampler; Texture2D SunAlphaTexture; SamplerState SunAlphaSampler; Texture2D SunBlurTexture; SamplerState SunBlurSampler; MultiViewTexture2D BloomSetup_BloomTexture; SamplerState BloomSetup_BloomSampler; MultiViewTexture2D BloomUpTexture; SamplerState BloomUpSampler; Texture2D SunMergeTexture; SamplerState SunMergeSampler; Texture2D LastFrameSunMergeTexture; SamplerState LastFrameSunMergeSampler; float4 BufferSizeAndInvSize; float4 DofBlurSizeAndInvSize; float4 BufferASizeAndInvSize; float4 BufferBSizeAndInvSize; float4 BloomUpSizeAndInvSize; // Point on circle. float2 Circle(float Start, float Points, float Point) { float Rad = (3.141592 * 2.0 * (1.0 / Points)) * (Point + Start); return float2(sin(Rad), cos(Rad)); } float BloomMaxBrightness; float BloomThreshold; half FocusDistFar() { return View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion; } half FocusDistNear() { return View.DepthOfFieldFocalDistance; } // Alpha = 0.5 is full size, >0.5 rate at which near and far hit maximum. float4 SunColorApertureDiv2; // Returns 0=max near DOF, 0.5=in focus, 1.0=max far DOF half Coc(half Depth) { half FocusDist = clamp(Depth,half(FocusDistNear()),half(FocusDistFar())); half CocValue = ((Depth - FocusDist) / Depth); return saturate(CocValue * SunColorApertureDiv2.a + 0.5); } ////////////////////////// float ComputeDOFNearFocalMask(float SceneDepth) { float NearFocalPlane = View.DepthOfFieldFocalDistance; return saturate((NearFocalPlane - SceneDepth) / View.DepthOfFieldNearTransitionRegion); } // todo move to central place float ComputeDOFFarFocalMask(float SceneDepth) { float FarFocalPlane = View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion; return saturate((SceneDepth - FarFocalPlane) / View.DepthOfFieldFarTransitionRegion); } // Returns 0=max near DOF, 0.5=in focus, 1.0=max far DOF half Coc2(half Depth) { half N = ComputeDOFNearFocalMask(Depth); half F = ComputeDOFFarFocalMask(Depth); if (F > N) { return (F * 0.5) + 0.5; } return (1.0-N) * 0.5; } ////////////////////////////////// half2 SunConstDepthMaskScaleBias() { half DepthMin = 65504.0 - 16384.0; half DepthMax = 65504.0 - 0.0; // Compute scale and bias. half Scale = 1.0/(DepthMax-DepthMin); return half2(Scale,-DepthMin * Scale); } half4 SampleMultiViewTexture(MultiViewTexture2D Texture, SamplerState Sampler, float2 SceneUV, float ArrayIndex) { #if MOBILE_MULTI_VIEW return Texture.Sample(Sampler, float3(SceneUV.xy,ArrayIndex)); #else return Texture.Sample(Sampler, SceneUV); #endif } float4 SampleSceneColor(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(SceneColorTexture, SceneColorSampler, SceneUV.xy, ArrayIndex); } half4 SampleBloomDown(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(BloomDownSourceTexture, BloomDownSourceSampler, SceneUV.xy, ArrayIndex); } half4 SampleBloomUpA(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(BloomUpSourceATexture, BloomUpSourceASampler, SceneUV.xy, ArrayIndex); } // Sampling BloomUpB requires float return type to avoid precision issues leading to artifacts // e.g. blue tint all over the view when looking partially at the sky. float4 SampleBloomUpB(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(BloomUpSourceBTexture, BloomUpSourceBSampler, SceneUV.xy, ArrayIndex); } half4 SampleBloomSetup(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(BloomSetup_BloomTexture, BloomSetup_BloomSampler, SceneUV.xy, ArrayIndex); } half4 SampleBloomUp(float2 SceneUV, float ArrayIndex) { return SampleMultiViewTexture(BloomUpTexture, BloomUpSampler, SceneUV.xy, ArrayIndex); } // // Convert depth in alpha into combined circle of confusion and sun intensity. // #if SHADER_SUN_MASK void SunMaskPS_Mobile( float4 InUVPos : TEXCOORD0, in FStereoPSInput StereoInput, out HALF_TYPE OutSunShaftAndDof : SV_Target0 #if MOBILE_USESUN && METAL_MSAA_HDR_DECODE , out HALF4_TYPE OutColor : SV_Target1 #endif ) { half4 SceneColor = SampleSceneColor(InUVPos.xy,GetEyeIndex(StereoInput)); #if MOBILE_USEDEPTHTEXTURE half InDepth = ConvertFromDeviceZ(Texture2DSampleLevel(MobileSceneTextures.SceneDepthTexture, MobileSceneTextures.SceneDepthTextureSampler, InUVPos.xy, 0).r); #else half InDepth = ConvertFromDeviceZ(Texture2DSampleLevel(MobileSceneTextures.SceneDepthAuxTexture, MobileSceneTextures.SceneDepthAuxTextureSampler, InUVPos.xy, 0).r); #endif #if MOBILE_USESUN #if METAL_MSAA_HDR_DECODE SceneColor.rgb *= rcp(SceneColor.r*(-0.299) + SceneColor.g*(-0.587) + SceneColor.b*(-0.114) + 1.0); OutColor = SceneColor; #endif half2 DepthMaskScaleBias = SunConstDepthMaskScaleBias(); half FarAmount = saturate(InDepth * DepthMaskScaleBias.x + DepthMaskScaleBias.y); half3 SunAmount = SceneColor.rgb * SunColorApertureDiv2.rgb; float SunLuminance = max(Luminance(SunAmount), 6.10352e-5); float AdjustedLuminance = clamp(SunLuminance - BloomThreshold, 0.0f, BloomMaxBrightness); SunAmount = SunAmount / SunLuminance * AdjustedLuminance * 2.0f; half2 Pos = InUVPos.zw * 0.5 + 0.5; half EdgeMask = 1.0f - Pos.x * (1.0f - Pos.x) * Pos.y * (1.0f - Pos.y) * 8.0f; EdgeMask = EdgeMask * EdgeMask; FarAmount *= 1.0-EdgeMask; OutSunShaftAndDof = min(min(SunAmount.r, SunAmount.g), SunAmount.b) * FarAmount; #else OutSunShaftAndDof = 0.0; #endif #if MOBILE_USEDOF OutSunShaftAndDof += Coc2(InDepth); #endif } #endif // // Pre-tonemap before hardware box-filtered resolve. // void PreTonemapMSAA_Mobile( float4 InUVPos : TEXCOORD0, out HALF4_TYPE OutColor : SV_Target0 ) { #if (METAL_ES3_1_PROFILE && !MAC) // On-chip pre-tonemap before MSAA resolve. OutColor = SubpassFetchRGBA_0(); OutColor.rgb *= rcp(OutColor.r*0.299 + OutColor.g*0.587 + OutColor.b*0.114 + 1.0); #endif } #if MOBILE_MULTI_VIEW Texture2DArray InputTexture; #else Texture2D InputTexture; #endif SamplerState InputSampler; float4 SampleInput(float2 SceneUV, float ArrayIndex) { #if MOBILE_MULTI_VIEW return Texture2DArraySample(InputTexture, InputSampler, float3(SceneUV.xy,ArrayIndex)); #else return Texture2DSample(InputTexture, InputSampler, SceneUV); #endif } void MSAADecodeAndCopyRectPS( noperspective float4 UVAndScreenPos : TEXCOORD0, out HALF4_TYPE OutColor : SV_Target0 ) { #if (METAL_ES3_1_PROFILE && !MAC) float2 UV = UVAndScreenPos.xy; OutColor = Texture2DSample(InputTexture, InputSampler, UV); OutColor.rgb *= rcp(OutColor.r*(-0.299) + OutColor.g*(-0.587) + OutColor.b*(-0.114) + 1.0); #endif } // // Bloom Setup - Mask Bloom and Downsample 1/16 Area // void BloomVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords[4] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); float2 TransformedUV; DrawRectangle(InPosition, InTexCoord, OutPosition, TransformedUV); OutTexCoords[0] = TransformedUV + BufferSizeAndInvSize.zw * float2(-1, -1); OutTexCoords[1] = TransformedUV + BufferSizeAndInvSize.zw * float2( 1, -1); OutTexCoords[2] = TransformedUV + BufferSizeAndInvSize.zw * float2(-1, 1); OutTexCoords[3] = TransformedUV + BufferSizeAndInvSize.zw * float2( 1, 1); } float Luma4(float3 Color) { return (Color.g * 2.0) + (Color.r + Color.b); } float HdrWeight4(float3 Color) { return rcp(Luma4(Color) + 4.0); } float HdrWeightInv4(float3 Color) { return 4.0 * rcp(1.0 - Luma4(Color)); } void BloomPS_Mobile( float2 InUVs[4] : TEXCOORD0, in FStereoPSInput StereoInput #if MOBILE_USEBLOOM , out HALF4_TYPE OutColor : SV_Target0 #if MOBILE_USEDOF || MOBILE_USESUN ,out HALF_TYPE OutSunShaftAndDof : SV_Target1 #if MOBILE_USEEYEADAPTATION , out HALF_TYPE OutEyeAdaptation : SV_Target2 #endif #else #if MOBILE_USEEYEADAPTATION , out HALF_TYPE OutEyeAdaptation : SV_Target1 #endif #endif #else #if MOBILE_USEDOF || MOBILE_USESUN , out HALF_TYPE OutSunShaftAndDof : SV_Target0 #if MOBILE_USEEYEADAPTATION , out HALF_TYPE OutEyeAdaptation : SV_Target1 #endif #else #if MOBILE_USEEYEADAPTATION , out HALF_TYPE OutEyeAdaptation : SV_Target0 #endif #endif #endif ) { half3 AverageColor = 0.0f; #if MOBILE_USEBLOOM || MOBILE_USESUN || MOBILE_USEEYEADAPTATION float4 C0 = SampleSceneColor(InUVs[0],GetEyeIndex(StereoInput)); float4 C1 = SampleSceneColor(InUVs[1],GetEyeIndex(StereoInput)); float4 C2 = SampleSceneColor(InUVs[2],GetEyeIndex(StereoInput)); float4 C3 = SampleSceneColor(InUVs[3],GetEyeIndex(StereoInput)); C0 *= HdrWeight4(C0); C1 *= HdrWeight4(C1); C2 *= HdrWeight4(C2); C3 *= HdrWeight4(C3); // Output color is average. AverageColor.rgb = (C0.rgb * 0.25) + (C1.rgb * 0.25) + (C2.rgb * 0.25) + (C3.rgb * 0.25); AverageColor.rgb *= HdrWeightInv4(AverageColor); #if METAL_MSAA_HDR_DECODE // This should really happen before the average, instead doing after average as optimization. AverageColor.rgb *= rcp(AverageColor.r*(-0.299) + AverageColor.g*(-0.587) + AverageColor.b*(-0.114) + 1.0); #endif // Try to kill negatives and NaNs here AverageColor.rgb = max(AverageColor.rgb, 0); #if MOBILE_USEBLOOM || MOBILE_USESUN // Trim bloom and sunshafts black level. half TotalLuminance = Luminance(AverageColor.rgb); half BloomLuminance = TotalLuminance - BloomThreshold; half Amount = saturate(BloomLuminance * 0.5f); #endif #if MOBILE_USEBLOOM OutColor.rgb = AverageColor; OutColor.rgb *= Amount; OutColor.a = 0; #endif #endif #if MOBILE_USEDOF || MOBILE_USESUN half A0 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0]).r; half A1 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r; half A2 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r; half A3 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r; #endif // In the case of both DOF and SUN, // Split out alpha back into dual components (circle of confusion size and sun amount). // Expand {near to in-focus} {0.0 to 0.5} to {0.0 to 1.0} for near DOF diolation. // Must keep 1.0 the in-focus here (sunshaft pass will use this data). #if MOBILE_USEDOF // Expand {near to in-focus} {0.0 to 0.5} to {0.0 to 1.0} for near DOF diolation. // Must keep 1.0 the in-focus here (sunshaft pass will use this data). half Coc0 = saturate(A0*2.0); half Coc1 = saturate(A1*2.0); half Coc2 = saturate(A2*2.0); half Coc3 = saturate(A3*2.0); // Take min of COC (which is maximum near radius). OutSunShaftAndDof = min(min(Coc0,Coc1),min(Coc2,Coc3)); // Improve the quality of near diolation. OutSunShaftAndDof = 1.0 - OutSunShaftAndDof; OutSunShaftAndDof *= OutSunShaftAndDof; OutSunShaftAndDof = 1.0 - OutSunShaftAndDof; #elif MOBILE_USESUN OutSunShaftAndDof = 0.0f; #endif #if MOBILE_USESUN #if MOBILE_USEDOF half Sun0 = max(0.0, A0-1.0); half Sun1 = max(0.0, A1-1.0); half Sun2 = max(0.0, A2-1.0); half Sun3 = max(0.0, A3-1.0); #else half Sun0 = A0; half Sun1 = A1; half Sun2 = A2; half Sun3 = A3; #endif // Take average of sun intensity and adjust by bloom threshold. Amount *= 0.25; OutSunShaftAndDof += (Sun0 * Amount) + (Sun1 * Amount) + (Sun2 * Amount) + (Sun3 * Amount); #endif #if MOBILE_USEEYEADAPTATION const float Intensity = CalculateEyeAdaptationLuminance(AverageColor * View.OneOverPreExposure); const float LogIntensity = clamp(log2(Intensity), -10.0f, 20.0f); // Store log intensity in the alpha channel: scale to 0,1 range. OutEyeAdaptation = EyeAdaptation_HistogramScale * LogIntensity + EyeAdaptation_HistogramBias; #endif } // // Bloom Downsample // float BloomDownScale; void BloomDownVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float4 OutTexCoords[8] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); float Start = 2.0/14.0; float Scale = BloomDownScale; OutTexCoords[0].xy = InTexCoord.xy; OutTexCoords[0].zw = InTexCoord.xy + Circle(Start, 14.0, 0.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 14.0, 1.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[1].zw = InTexCoord.xy + Circle(Start, 14.0, 2.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 14.0, 3.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[2].zw = InTexCoord.xy + Circle(Start, 14.0, 4.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 14.0, 5.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[3].zw = InTexCoord.xy + Circle(Start, 14.0, 6.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 14.0, 7.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[4].zw = InTexCoord.xy + Circle(Start, 14.0, 8.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 14.0, 9.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[5].zw = InTexCoord.xy + Circle(Start, 14.0, 10.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[6].xy = InTexCoord.xy + Circle(Start, 14.0, 11.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[6].zw = InTexCoord.xy + Circle(Start, 14.0, 12.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[7].xy = InTexCoord.xy + Circle(Start, 14.0, 13.0) * Scale * BufferSizeAndInvSize.zw; OutTexCoords[7].zw = float2(0.0, 0.0); } void BloomDownPS_Mobile( float4 InUVs[8] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { const uint EyeIndex = GetEyeIndex(StereoInput); half4 N0 = SampleBloomDown(InUVs[0].xy, EyeIndex).rgba; half4 N1 = SampleBloomDown(InUVs[0].zw, EyeIndex).rgba; half4 N2 = SampleBloomDown(InUVs[1].xy, EyeIndex).rgba; half4 N3 = SampleBloomDown(InUVs[1].zw, EyeIndex).rgba; half4 N4 = SampleBloomDown(InUVs[2].xy, EyeIndex).rgba; half4 N5 = SampleBloomDown(InUVs[2].zw, EyeIndex).rgba; half4 N6 = SampleBloomDown(InUVs[3].xy, EyeIndex).rgba; half4 N7 = SampleBloomDown(InUVs[3].zw, EyeIndex).rgba; half4 N8 = SampleBloomDown(InUVs[4].xy, EyeIndex).rgba; half4 N9 = SampleBloomDown(InUVs[4].zw, EyeIndex).rgba; half4 N10 = SampleBloomDown(InUVs[5].xy, EyeIndex).rgba; half4 N11 = SampleBloomDown(InUVs[5].zw, EyeIndex).rgba; half4 N12 = SampleBloomDown(InUVs[6].xy, EyeIndex).rgba; half4 N13 = SampleBloomDown(InUVs[6].zw, EyeIndex).rgba; half4 N14 = SampleBloomDown(InUVs[7].xy, EyeIndex).rgba; float W = 1.0/15.0; OutColor.rgb = (N0 * W) + (N1 * W) + (N2 * W) + (N3 * W) + (N4 * W) + (N5 * W) + (N6 * W) + (N7 * W) + (N8 * W) + (N9 * W) + (N10 * W) + (N11 * W) + (N12 * W) + (N13 * W) + (N14 * W); OutColor.a = 0; } // // Bloom Upsample // float2 BloomUpScales; void BloomUpVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float4 OutTexCoords[8] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); float Start; float Scale; Start = 2.0/7.0; Scale = BloomUpScales.x; OutTexCoords[0].xy = InTexCoord.xy + Circle(Start, 7.0, 0.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[0].zw = InTexCoord.xy + Circle(Start, 7.0, 1.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 7.0, 2.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[1].zw = InTexCoord.xy + Circle(Start, 7.0, 3.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 7.0, 4.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[2].zw = InTexCoord.xy + Circle(Start, 7.0, 5.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 7.0, 6.0) * Scale * BufferASizeAndInvSize.zw; OutTexCoords[3].zw = InTexCoord.xy; Start = 2.0/7.0; Scale = BloomUpScales.y; OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 7.0, 0.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[4].zw = InTexCoord.xy + Circle(Start, 7.0, 1.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 7.0, 2.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[5].zw = InTexCoord.xy + Circle(Start, 7.0, 3.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[6].xy = InTexCoord.xy + Circle(Start, 7.0, 4.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[6].zw = InTexCoord.xy + Circle(Start, 7.0, 5.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[7].xy = InTexCoord.xy + Circle(Start, 7.0, 6.0) * Scale * BufferBSizeAndInvSize.zw; OutTexCoords[7].zw = float2(0.0, 0.0); } float4 BloomTintA; float4 BloomTintB; void BloomUpPS_Mobile( float4 InUVs[8] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { const uint EyeIndex = GetEyeIndex(StereoInput); half3 A0 = SampleBloomUpA(InUVs[0].xy, EyeIndex).rgb; half3 A1 = SampleBloomUpA(InUVs[0].zw, EyeIndex).rgb; half3 A2 = SampleBloomUpA(InUVs[1].xy, EyeIndex).rgb; half3 A3 = SampleBloomUpA(InUVs[1].zw, EyeIndex).rgb; half3 A4 = SampleBloomUpA(InUVs[2].xy, EyeIndex).rgb; half3 A5 = SampleBloomUpA(InUVs[2].zw, EyeIndex).rgb; half3 A6 = SampleBloomUpA(InUVs[3].xy, EyeIndex).rgb; half3 A7 = SampleBloomUpA(InUVs[3].zw, EyeIndex).rgb; half3 B0 = SampleBloomUpB(InUVs[3].zw, EyeIndex).rgb; half3 B1 = SampleBloomUpB(InUVs[4].xy, EyeIndex).rgb; half3 B2 = SampleBloomUpB(InUVs[4].zw, EyeIndex).rgb; half3 B3 = SampleBloomUpB(InUVs[5].xy, EyeIndex).rgb; half3 B4 = SampleBloomUpB(InUVs[5].zw, EyeIndex).rgb; half3 B5 = SampleBloomUpB(InUVs[6].xy, EyeIndex).rgb; half3 B6 = SampleBloomUpB(InUVs[6].zw, EyeIndex).rgb; half3 B7 = SampleBloomUpB(InUVs[7].xy, EyeIndex).rgb; // A is the same size source. half3 WA = BloomTintA.rgb; // B is the upsampled source. half3 WB = BloomTintB.rgb; OutColor.rgb = A0 * WA + A1 * WA + A2 * WA + A3 * WA + A4 * WA + A5 * WA + A6 * WA + A7 * WA + B0 * WB + B1 * WB + B2 * WB + B3 * WB + B4 * WB + B5 * WB + B6 * WB + B7 * WB; OutColor.a = 0; } // // Near Setup - Generate near diolation for DOF. // void DofNearVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords2 : TEXCOORD0, out float4 OutTexCoords4[4] : TEXCOORD1, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); OutTexCoords2 = InTexCoord; OutTexCoords4[0].xy = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5,-1.0); OutTexCoords4[0].zw = InTexCoord + BufferSizeAndInvSize.zw * float2( 1.0,-0.5); OutTexCoords4[1].xy = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, 1.0); OutTexCoords4[1].zw = InTexCoord + BufferSizeAndInvSize.zw * float2(-1.0, 0.5); OutTexCoords4[2].xy = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5,-1.0); OutTexCoords4[2].zw = InTexCoord + BufferSizeAndInvSize.zw * float2( 1.0, 0.5); OutTexCoords4[3].xy = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, 1.0); OutTexCoords4[3].zw = InTexCoord + BufferSizeAndInvSize.zw * float2(-1.0,-0.5); } void DofNearPS_Mobile( float2 InUVs2 : TEXCOORD0, float4 InUVs[4] : TEXCOORD1, in FStereoPSInput StereoInput, out HALF_TYPE OutColor : SV_Target0 ) { half N0 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs2).r; half N1 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0].xy).r; half N2 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0].zw).r; half N3 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1].xy).r; half N4 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1].zw).r; half N5 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2].xy).r; half N6 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2].zw).r; half N7 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3].xy).r; half N8 = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3].zw).r; // Remove sunshaft intensity component and reverse. #if MOBILE_USESUN N0 = saturate(1.0 - N0); N1 = saturate(1.0 - N1); N2 = saturate(1.0 - N2); N3 = saturate(1.0 - N3); N4 = saturate(1.0 - N4); N5 = saturate(1.0 - N5); N6 = saturate(1.0 - N6); N7 = saturate(1.0 - N7); N8 = saturate(1.0 - N8); #else // If no sun-shafts then don't need the saturate. N0 = 1.0 - N0; N1 = 1.0 - N1; N2 = 1.0 - N2; N3 = 1.0 - N3; N4 = 1.0 - N4; N5 = 1.0 - N5; N6 = 1.0 - N6; N7 = 1.0 - N7; N8 = 1.0 - N8; #endif // The first sample is 1/4 the size as the rest of the samples. half Out = (N0 * 0.25 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8) / 8.25; if(Out > 0.0) Out = sqrt(Out); OutColor = Out; } // // DOF Setup - Downsample to 1/4 area // void DofDownVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords[5] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); // Near position fixed to use UV based out output position. OutTexCoords[0] = OutPosition.xy * float2(0.5,-0.5) + 0.5; // Other source UVs based on possible non-full texture. OutTexCoords[1] = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, -0.5); OutTexCoords[2] = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, -0.5); OutTexCoords[3] = InTexCoord + BufferSizeAndInvSize.zw * float2(-0.5, 0.5); OutTexCoords[4] = InTexCoord + BufferSizeAndInvSize.zw * float2( 0.5, 0.5); } void DofDownPS_Mobile( float2 InUVs[5] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { // This shader needs float precision to work. // Fetch near diolation and scale to (0 to 16384.0) range. float N = DofNearTexture.Sample(DofNearSampler, InUVs[0]).r * 16384.0; const uint EyeIndex = GetEyeIndex(StereoInput); float4 A = SampleSceneColor(InUVs[1], EyeIndex); float4 B = SampleSceneColor(InUVs[2], EyeIndex); float4 C = SampleSceneColor(InUVs[3], EyeIndex); float4 D = SampleSceneColor(InUVs[4], EyeIndex); A.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r; B.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r; C.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r; D.a = SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[4]).r; #if MOBILE_USESUN // The {0.0 to 1.0} range is focus. // The {1.0 to 65504.0} range is light shaft source intensity (always at fully out of focus). // Must clamp back to {0.0 to 1.0} range. A.a = min(1.0, A.a); B.a = min(1.0, B.a); C.a = min(1.0, C.a); D.a = min(1.0, D.a); #endif // To support near DOF the {0.0 to 1.0} maps to {-16384.0 to 16384.0}. A.a = A.a * (2.0 * 16384.0) - 16384.0; B.a = B.a * (2.0 * 16384.0) - 16384.0; C.a = C.a * (2.0 * 16384.0) - 16384.0; D.a = D.a * (2.0 * 16384.0) - 16384.0; // Make sure there are no zeros. // Alpha ends up as circle of confusion size. // Near diolation factor applied here. // The 1/8 factor is to workaround mobile hardware lack of precision. A.a = max(N, abs(A.a) + 1.0/8.0); B.a = max(N, abs(B.a) + 1.0/8.0); C.a = max(N, abs(C.a) + 1.0/8.0); D.a = max(N, abs(D.a) + 1.0/8.0); // Mix weighted by circle of confusion. // This tends to erode the effect of more infocus samples (removes bleeding artifacts). OutColor = ((A * A.a) + (B * B.a) + (C * C.a) + (D * D.a)) * rcp(A.a + B.a + C.a + D.a); // Clamp rgb to prevent overflow during scale. OutColor.rgb = min(OutColor.rgb, 65504.0/16384.125); OutColor.rgb *= OutColor.a; } // // DOF Blur // // DOF BOKEH SAMPLING PATTERN // -------------------- // # = bilinear tap // * = the single point tap to get the current pixel // // 1 1 // 4 4 1 * 2 2 // 4 4 3 3 2 2 // 3 3 // // This pattern is very important. // All bilinear taps are not always exactly in the middle of 4 texels. // It is an asymetric pattern (minimize overlap, allow for different radii). #define DOF_1 half2(-0.500, 0.50) #define DOF_2 half2( 0.75,-0.50) #define DOF_3 half2(-0.500,-1.25) #define DOF_4 half2(-1.75,-0.50) // This will compute a constant half2 from a constant half2. // This computes the soft blend factor for intersection test // (does circle of confusion intersect pixel center). // Large feather here to make transitions smooth with a few samples. half2 DofIntersectionScaleBias(half2 Offset) { // Working in distance squared. // Normalize by maximum distance half RcpMaxDst = rcp(sqrt(dot(DOF_4, DOF_4))); half Dst0 = sqrt(dot(DOF_1, DOF_1)); half Dst1 = sqrt(dot(Offset, Offset)); Dst0 = max(Dst0, Dst1 - 0.25); Dst0 *= RcpMaxDst; Dst1 *= RcpMaxDst; half Scale = 1.0/(Dst1 - Dst0); half Bias = (-Dst0) * Scale; return half2(Scale, Bias); } half DofIntersect(half CocTap, half2 Offset) { half2 ConstScaleBias = DofIntersectionScaleBias(Offset); // Undo the scale factor. ConstScaleBias.x *= 1.0/16384.0; return saturate(CocTap * ConstScaleBias.x + ConstScaleBias.y); } half DofWeight(half Coc) { half Dst0 = sqrt(dot(DOF_3, DOF_3)) / sqrt(dot(DOF_4, DOF_4)); half Dst1 = sqrt(dot(DOF_4, DOF_4)) / sqrt(dot(DOF_4, DOF_4)); half Scale = 1.0/(Dst1 - Dst0); half Bias = (-Dst0) * Scale; // Undo the 16384.0 scale factor in this constant. Scale *= 1.0/16384.0; // Scale and Bias should be compile time constants. return saturate(Coc * Scale + Bias); } void DofBlurVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords[5] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); OutTexCoords[0] = InTexCoord.xy; OutTexCoords[1] = InTexCoord.xy + float2(DOF_1) * BufferSizeAndInvSize.zw; OutTexCoords[2] = InTexCoord.xy + float2(DOF_2) * BufferSizeAndInvSize.zw; OutTexCoords[3] = InTexCoord.xy + float2(DOF_3) * BufferSizeAndInvSize.zw; OutTexCoords[4] = InTexCoord.xy + float2(DOF_4) * BufferSizeAndInvSize.zw; } void DofBlurPS_Mobile( float2 InUVs[5] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { // Near diolation size is copied into alpha for the tonemapper pass. OutColor.a = DofNearTexture.Sample(DofNearSampler, InUVs[0]).r; half4 C1 = DofDownTexture.Sample(DofDownSampler, InUVs[1]); half4 C2 = DofDownTexture.Sample(DofDownSampler, InUVs[2]); half4 C3 = DofDownTexture.Sample(DofDownSampler, InUVs[3]); half4 C4 = DofDownTexture.Sample(DofDownSampler, InUVs[4]); // Restore color (colors are weighted by CoC to help remove bleeding). C1.rgb *= rcp(C1.a); C2.rgb *= rcp(C2.a); C3.rgb *= rcp(C3.a); C4.rgb *= rcp(C4.a); // First bilinear tap always has 1.0 weight, the rest are weighted. half W1 = 1.0, W2, W3, W4; W2 = W3 = W4 = DofWeight(C1.a); // Remove contribution of taps who's circle of confusion does not intersect the pixel. W2 *= DofIntersect(C2.a, DOF_2); W3 *= DofIntersect(C3.a, DOF_3); W4 *= DofIntersect(C4.a, DOF_4); OutColor.rgb = ((C1.rgb * W1) + (C2.rgb * W2) + (C3.rgb * W3) + (C4.rgb * W4)) * rcp(W1 + W2 + W3 + W4); } // Integrate DOF void IntegrateDOFVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float4 OutTexCoords : TEXCOORD0, out float2 OutFineDofGrain : TEXCOORD1, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, OutTexCoords.xy); // Fine adjustment is inside the possible non-full viewport in the full resolution texture. OutFineDofGrain.xy = OutTexCoords.xy + BufferSizeAndInvSize.zw * float2(-0.5, 0.5); // Want grain and a second UV based on the knowledge that the source texture has a full viewport. float2 FullViewUV = OutPosition.xy * float2(0.5, -0.5) + 0.5; // For DOF attempt to undo sampling bias for the first transition region. // This is better for the fine transition, breaks down for the larger bokeh. // This is the best compromise for mobile using 4 bilinear taps only. OutTexCoords.zw = FullViewUV.xy + DofBlurSizeAndInvSize.zw * float2(0.25, -0.5); } void IntegrateDOFPS_Mobile( in float4 TexCoords : TEXCOORD0, in float2 FineDofGrain : TEXCOORD1, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { const uint EyeIndex = GetEyeIndex(StereoInput); half4 SceneColor = SampleSceneColor(TexCoords.xy, EyeIndex); half4 DofFine = SampleSceneColor(FineDofGrain.xy, EyeIndex); half4 Dof = Texture2DSample(DofBlurTexture, DofBlurSampler, TexCoords.zw); half DofCoc = Texture2DSample(SunShaftAndDofTexture, SunShaftAndDofSampler, TexCoords.xy).r; // Convert alpha back into circle of confusion. OutColor.a = SceneColor.a; SceneColor.a = max(Dof.a, abs(DofCoc * 2.0 - 1.0)); // Convert circle of confusion into blend factors. half2 ScaleBias = CocBlendScaleBias(); // Constant. half DofAmount = saturate(SceneColor.a * ScaleBias.x + ScaleBias.y); half2 ScaleBias2 = CocBlendScaleBiasFine(); // Constant. half DofAmountFine = saturate(SceneColor.a * ScaleBias2.x + ScaleBias2.y); // Blend in fine DOF. OutColor.rgb = lerp(SceneColor.rgb, DofFine.rgb, DofAmountFine); // Blend in coarse DOF. OutColor.rgb = lerp(OutColor.rgb, Dof.rgb, DofAmount); } // // First sun shaft blur and move sun intensity from alpha to single channel output. // half HighlightCompression(half Channel) { return Channel * rcp(1.0 + Channel); } half HighlightDecompression(half Channel) { return Channel * rcp(1.0 - Channel); } // Convert from [-1 to 1] to view rectangle in texture which is somewhere in [0 to 1]. float2 SunShaftPosToUV(float2 Pos) { // return (Pos.xy * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f) * PostprocessInput0Size.zw; return Pos.xy * float2(0.5,-0.5) + 0.5; } // Center of light shaft. float2 LightShaftCenter; // Position in {-1 to 1} space. float2 SunPos() { return LightShaftCenter.xy; } float2 SunShaftRect(float2 InPosition, float amount) { float2 center = SunPos(); return SunShaftPosToUV(lerp(center, InPosition, amount)); } // Positions for sun shaft steps. // The very tight first position makes direct light to eye bloom a little. // Otherwise want even spacing. #define SUN_P0 (31.0/32.0) #define SUN_P1 (27.0/32.0) #define SUN_P2 (23.0/32.0) #define SUN_P3 (19.0/32.0) #define SUN_P4 (15.0/32.0) #define SUN_P5 (11.0/32.0) #define SUN_P6 (7.0/32.0) // SUN_P7 is fixed at zero. #define SUN_M 1.0 void SunAlphaVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords[8] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); OutTexCoords[0] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M); OutTexCoords[1] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M); OutTexCoords[2] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M); OutTexCoords[3] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M); OutTexCoords[4] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M); OutTexCoords[5] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M); OutTexCoords[6] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M); OutTexCoords[7] = InTexCoord.xy; } #undef SUN_M // Remove the +1 bias. // This sets negatives to zero because 0-1 is used for DOF. half SunUnBias(half A) { #if MOBILE_USEDOF return max(0.0, A - 1.0); #else return A; #endif } void SunAlphaPS_Mobile( float2 InUVs[8] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF_TYPE OutColor : SV_Target0 ) { OutColor = SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[0]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[1]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[2]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[3]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[4]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[5]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[6]).r) * 0.125 + SunUnBias(SunShaftAndDofTexture.Sample(SunShaftAndDofSampler, InUVs[7]).r) * 0.125; OutColor = HighlightCompression(OutColor); } // // Second sun shaft blur. // #define SUN_M 0.5 void SunBlurVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float2 OutTexCoords[8] : TEXCOORD0, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); OutTexCoords[0] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M); OutTexCoords[1] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M); OutTexCoords[2] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M); OutTexCoords[3] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M); OutTexCoords[4] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M); OutTexCoords[5] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M); OutTexCoords[6] = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M); OutTexCoords[7] = InTexCoord.xy; } #undef SUN_M void SunBlurPS_Mobile( float2 InUVs[8] : TEXCOORD0, in FStereoPSInput StereoInput, out HALF_TYPE OutColor : SV_Target0 ) { OutColor = SunAlphaTexture.Sample(SunAlphaSampler, InUVs[0]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[1]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[2]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[3]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[4]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[5]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[6]).r * 0.125 + SunAlphaTexture.Sample(SunAlphaSampler, InUVs[7]).r * 0.125; } // // Third sun shaft blur, composite with bloom, vignette. // #define SUN_M 0.25 void SunMergeVS_Mobile( in float4 InPosition : ATTRIBUTE0, in float2 InTexCoord : ATTRIBUTE1, in FStereoVSInput StereoInput, out float4 OutTexCoordVignette : TEXCOORD0, out float4 OutTexCoords[7] : TEXCOORD1, out FStereoVSOutput StereoOutput, out float4 OutPosition : SV_POSITION ) { StereoSetupVS(StereoInput, StereoOutput); DrawRectangle(InPosition, InTexCoord, OutPosition, InTexCoord); OutTexCoordVignette.xy = InTexCoord.xy; OutTexCoordVignette.zw = VignetteSpace(OutPosition.xy); float Start; float Scale; Start = 2.0/6.0; Scale = 0.66/2.0; OutTexCoords[0].xy = InTexCoord.xy + Circle(Start, 6.0, 0.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[1].xy = InTexCoord.xy + Circle(Start, 6.0, 1.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[2].xy = InTexCoord.xy + Circle(Start, 6.0, 2.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[3].xy = InTexCoord.xy + Circle(Start, 6.0, 3.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[4].xy = InTexCoord.xy + Circle(Start, 6.0, 4.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[5].xy = InTexCoord.xy + Circle(Start, 6.0, 5.0) * Scale * BloomUpSizeAndInvSize.zw; OutTexCoords[0].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P0 * SUN_M); OutTexCoords[1].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P1 * SUN_M); OutTexCoords[2].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P2 * SUN_M); OutTexCoords[3].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P3 * SUN_M); OutTexCoords[4].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P4 * SUN_M); OutTexCoords[5].zw = SunShaftRect(OutPosition.xy, 1.0 - SUN_P5 * SUN_M); OutTexCoords[6].xy = SunShaftRect(OutPosition.xy, 1.0 - SUN_P6 * SUN_M); OutTexCoords[6].zw = float2(0.0, 0.0); } #undef SUN_M float4 SunColorVignetteIntensity; float3 BloomColor; Texture2D BloomDirtMaskTexture; SamplerState BloomDirtMaskSampler; float4 BloomDirtMaskTint; void SunMergePS_Mobile( float4 InUVVignette : TEXCOORD0, float4 InUVs[7] : TEXCOORD1, in FStereoPSInput StereoInput, out HALF4_TYPE OutColor : SV_Target0 ) { const uint EyeIndex = GetEyeIndex(StereoInput); #if MOBILE_USEBLOOM float Scale1 = 1.0/7.0; float Scale2 = 1.0/7.0; half3 Bloom2 = ( SampleBloomSetup(InUVVignette.xy, EyeIndex).rgba *Scale1 + SampleBloomSetup(InUVs[0].xy, EyeIndex).rgba * Scale2 + SampleBloomSetup(InUVs[1].xy, EyeIndex).rgba * Scale2 + SampleBloomSetup(InUVs[2].xy, EyeIndex).rgba * Scale2 + SampleBloomSetup(InUVs[3].xy, EyeIndex).rgba * Scale2 + SampleBloomSetup(InUVs[4].xy, EyeIndex).rgba * Scale2 + SampleBloomSetup(InUVs[5].xy, EyeIndex).rgba * Scale2) * rcp(Scale1 * 1.0 + Scale2 * 6.0); OutColor.rgb = SampleBloomUp(InUVVignette.xy, EyeIndex); half3 BloomDirtMaskColor = BloomDirtMaskTexture.Sample(BloomDirtMaskSampler, InUVVignette.xy).rgb * BloomDirtMaskTint.rgb; // Have 5 layers on mobile. half Scale3 = 1.0/5.0; // scale existing color first OutColor.rgb *= Scale3; // add scaled bloom separately to prevent overflow before scaling OutColor.rgb += Bloom2 * Scale3 * BloomColor + BloomDirtMaskColor * OutColor.rgb; #else OutColor.rgb = half3(0.0, 0.0, 0.0); #endif #if MOBILE_USESUN half Sun = SunBlurTexture.Sample(SunBlurSampler, InUVs[0].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[1].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[2].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[3].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[4].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[5].zw).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVs[6].xy).r * 0.125 + SunBlurTexture.Sample(SunBlurSampler, InUVVignette.xy).r * 0.125; Sun = HighlightDecompression(Sun); OutColor.rgb += SunColorVignetteIntensity.rgb * Sun; #endif OutColor.a = 1.0f; } #undef SUN_P0 #undef SUN_P1 #undef SUN_P2 #undef SUN_P3 #undef SUN_P4 #undef SUN_P5 #undef SUN_P6 // EyeAdaptation StructuredBuffer EyeAdaptationBuffer; static const float FLOAT_PRECISION = 1e+5; static const float INV_FLOAT_PRECISION = 1e-5; #if CLEAR_UAV_UINT_COMPUTE_SHADER uint NumEntries; uint ClearValue; RWBuffer UAV; [numthreads(64, 1, 1)] void ClearUAVUIntCS(uint3 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.x < NumEntries) { UAV[DispatchThreadId.x] = ClearValue; } } #endif #if AVERAGE_LUMINANCE_COMPUTE_SHADER || HISTOGRAM_COMPUTE_SHADER float4 SourceSizeAndInvSize; #endif #if AVERAGE_LUMINANCE_COMPUTE_SHADER RWBuffer OutputUIntBuffer; groupshared float2 SharedLuminance[THREADGROUP_SIZEX * THREADGROUP_SIZEY]; // Each thread group processes LoopX * LoopY texels of the input. const static uint2 TileSize = uint2(LOOP_SIZEX, LOOP_SIZEY) * 2; // Multiply 2 because we use bilinear sampler const static uint ThreadGroupSize = THREADGROUP_SIZEX * THREADGROUP_SIZEY; [numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)] void AverageLuminance_MainCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID, uint GroupIndex : SV_GroupIndex) { // Top left input texel for this group. uint2 LeftTop = DispatchThreadId.xy * TileSize; uint2 Tile, TexelPos; half2 BufferUV; float Weight; float Value; float2 GroupLuminance = float2(0.0f, 0.0f); LOOP for (uint y = 0; y < LOOP_SIZEY; ++y) { LOOP for (uint x = 0; x < LOOP_SIZEX; ++x) { Tile = uint2(x, y) * 2; // Multiply 2 because we use bilinear sampler TexelPos = LeftTop + Tile; if (TexelPos.x < SourceSizeAndInvSize.x && TexelPos.y < SourceSizeAndInvSize.y) { BufferUV = (half2)TexelPos + half2(1.0f, 1.0f); // offset pixel pos by 1 to use bilinear filter BufferUV = BufferUV * SourceSizeAndInvSize.zw; Weight = max(AdaptationWeightTexture(BufferUV), 0.05f); Value = SampleInput(BufferUV, 0).x; { // apply EyeAdaptation_BlackHistogramBucketInfluence using similar logic as Histogram method float fBucket = saturate(Value) * (HISTOGRAM_SIZE - 1); // Find two discrete buckets that straddle the continuous histogram position. uint Bucket0 = (uint)(fBucket); if (Bucket0 == 0) { Weight *= EyeAdaptation_BlackHistogramBucketInfluence; } } GroupLuminance.x += Value * Weight; GroupLuminance.y += Weight; } } } SharedLuminance[GroupIndex] = GroupLuminance; GroupMemoryBarrierWithGroupSync(); UNROLL for (uint cutoff = (ThreadGroupSize >> 1); cutoff > 0; cutoff >>= 1) { if (GroupIndex < cutoff) { SharedLuminance[GroupIndex] += SharedLuminance[GroupIndex + cutoff]; } if (cutoff > 4) // https://www.anandtech.com/show/12834/arm-announces-the-mali-g76-scaling-up-bifrost/2 said 4 is the wavefront for bifrost mali gpu { GroupMemoryBarrierWithGroupSync(); } } if (GroupIndex <= 1) { float NormalizeFactor = SourceSizeAndInvSize.z * SourceSizeAndInvSize.w * 4.0f; // multiply 4 because bilinear filter, so sample only the 1/4 resolustion uint2 LuminanceInt = SharedLuminance[0] * NormalizeFactor * FLOAT_PRECISION; InterlockedAdd(OutputUIntBuffer[GroupIndex], LuminanceInt[GroupIndex]); } } #endif Buffer LogLuminanceWeightBuffer; float4 BasicEyeAdaptation_Mobile() { float LogLumSum = LogLuminanceWeightBuffer[0]; float WeightSum = LogLuminanceWeightBuffer[1]; float LogLumAve = WeightSum == 0.0f ? 1.0f : (LogLumSum / WeightSum); // Correct for [0,1] scaling LogLumAve = (LogLumAve - EyeAdaptation_HistogramBias) / EyeAdaptation_HistogramScale; // Convert LogLuminanceAverage to Average Intensity const float AverageSceneLuminance = exp2(LogLumAve); const float MiddleGreyExposureCompensation = EyeAdaptation_ExposureCompensationSettings * EyeAdaptation_ExposureCompensationCurve * EyeAdaptation_GreyMult;// we want the average luminance remapped to 0.18, not 1.0 const float ClampedLumAve = clamp(AverageSceneLuminance, EyeAdaptation_MinAverageLuminance, EyeAdaptation_MaxAverageLuminance); // The Exposure Scale (and thus intensity) used in the previous frame const float ExposureScaleOld = EyeAdaptationBuffer[0].x; const float LuminanceAveOld = MiddleGreyExposureCompensation / (ExposureScaleOld != 0.0f ? ExposureScaleOld : 1.0f); // Time-based expoential blend of the intensity to allow the eye adaptation to ramp up over a few frames. const float SmoothedLuminance = ComputeEyeAdaptation(LuminanceAveOld, ClampedLumAve, EyeAdaptation_DeltaWorldTime); const float SmoothedExposureScale = 1.0f / max(0.0001f, SmoothedLuminance); const float TargetExposureScale = 1.0f / max(0.0001f, ClampedLumAve); float4 OutColor; // Output the number that will rescale the image intensity OutColor.x = MiddleGreyExposureCompensation * SmoothedExposureScale; // Output the target value OutColor.y = MiddleGreyExposureCompensation * TargetExposureScale; OutColor.z = AverageSceneLuminance; OutColor.w = MiddleGreyExposureCompensation / EyeAdaptation_GreyMult; return OutColor; } #if BASIC_EYEADAPTATION_COMPUTE_SHADER RWStructuredBuffer OutputBuffer; [numthreads(1, 1, 1)] void BasicEyeAdaptationCS_Mobile() { OutputBuffer[0] = BasicEyeAdaptation_Mobile(); OutputBuffer[1] = float4(1.0f, 0.0f, 0.0f, 0.0f); } #endif const static float InvHistogramSize = 1.0f / (float)HISTOGRAM_SIZE; const static float InvHistogramSizeMinusOne = 1.0f / (float)(HISTOGRAM_SIZE - 1); #if HISTOGRAM_COMPUTE_SHADER // Output histogram buffer (UAV) RWBuffer RWHistogramBuffer; const static uint QUARTER_HISTOGRAM_SIZE = HISTOGRAM_SIZE / 4; // Each thread group processes LoopX * LoopY texels of the input. const static uint2 TileSize = uint2(LOOP_SIZEX, LOOP_SIZEY) * 2; // Multiply 2 because we use bilinear sampler const static uint ThreadGroupSize = THREADGROUP_SIZEX * THREADGROUP_SIZEY; // THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms of the size HISTOGRAM_SIZE groupshared float4 SharedHistogram[(HISTOGRAM_SIZE / 4) * THREADGROUP_SIZEX * THREADGROUP_SIZEY]; void WriteToHistogramBuffer(uint HitogramIndex, float NormalizeFactor) { uint4 LuminanceInt = SharedHistogram[(HitogramIndex / 4) * ThreadGroupSize] * NormalizeFactor * FLOAT_PRECISION; uint LuminanceIntIndex = HitogramIndex % 4; InterlockedAdd(RWHistogramBuffer[HitogramIndex], LuminanceInt[LuminanceIntIndex]); } [numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)] void Histogram_MainCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID, uint GroupIndex: SV_GroupIndex) { // todo: can be cleared more efficiently // clear all THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms UNROLL for (uint i = 0; i < QUARTER_HISTOGRAM_SIZE; ++i) { SharedHistogram[i * ThreadGroupSize + GroupIndex] = float4(0.0f, 0.0f, 0.0f, 0.0f); } // Top left input texel for this group. uint2 LeftTop = DispatchThreadId.xy * TileSize; uint HistogramSizeMinusOne = HISTOGRAM_SIZE - 1; uint2 Tile, TexelPos; float2 BufferUV; float LogLuminance, ScreenWeight, fBucket, Weight1, Weight0; uint x, y, Bucket0, Bucket1; // Accumulate all pixels into THREADGROUP_SIZEX*THREADGROUP_SIZEY histograms LOOP for (y = 0; y < LOOP_SIZEY; ++y) { LOOP for (x = 0; x < LOOP_SIZEX; ++x) { Tile = uint2(x, y) * 2; // Multiply 2 because we use bilinear sampler TexelPos = LeftTop + Tile; if(TexelPos.x < SourceSizeAndInvSize.x && TexelPos.y < SourceSizeAndInvSize.y) { BufferUV = (float2)TexelPos + float2(1.0f, 1.0f); // offset pixel pos by 1 to use bilinear filter BufferUV = BufferUV * SourceSizeAndInvSize.zw; LogLuminance = SampleInput(BufferUV, 0).x; ScreenWeight = AdaptationWeightTexture(BufferUV); // Map the normalized histogram position into texels. fBucket = LogLuminance * HistogramSizeMinusOne; // Find two discrete buckets that straddle the continuous histogram position. Bucket0 = (uint)(fBucket); Bucket1 = Bucket0 + 1; Bucket0 = min(Bucket0, HistogramSizeMinusOne); Bucket1 = min(Bucket1, HistogramSizeMinusOne); // Weighted blend between the two buckets. Weight1 = frac(fBucket); Weight0 = 1.0f - Weight1; // When EyeAdaptation_BlackHistogramBucketInfluence=.0, we will ignore the last bucket. The main use // case is so the black background pixels in the editor have no effect. But if we have cases where // pixel values can actually be black, we want to set EyeAdaptation_LastHistogramBucketInfluence=1.0. // This value is controlled by the cvar "r.EyeAdaptation.BlackHistogramBucketInfluence" if (Bucket0 == 0) { Weight0 *= EyeAdaptation_BlackHistogramBucketInfluence; } // Accumulate the weight to the nearby history buckets. #if IOS // The IOS A8 and lower devices seems don't support using a float4 array as a two dimension array, separate the operations to two steps. float4 Histogram0 = float4(0.0f, 0.0f, 0.0f, 0.0f); float4 Histogram1 = float4(0.0f, 0.0f, 0.0f, 0.0f); Histogram0[Bucket0 % 4] = Weight0 * ScreenWeight; Histogram1[Bucket1 % 4] = Weight1 * ScreenWeight; SharedHistogram[(Bucket0 / 4) * ThreadGroupSize + GroupIndex] += Histogram0; SharedHistogram[(Bucket1 / 4) * ThreadGroupSize + GroupIndex] += Histogram1; #else SharedHistogram[(Bucket0 / 4) * ThreadGroupSize + GroupIndex][Bucket0 % 4] += Weight0 * ScreenWeight; SharedHistogram[(Bucket1 / 4) * ThreadGroupSize + GroupIndex][Bucket1 % 4] += Weight1 * ScreenWeight; #endif } } } GroupMemoryBarrierWithGroupSync(); // Reduction. // // float4 SharedHistogram[] is laid out like this: // [ float4 ] [ float4 ] ... [ float4 ] [ float4 ] [ float4 ] ... // [ Histogram 0, bins 0-3] [ Histogram 1, bins 0-3] ... [ Histogram 63, bins 0-3] [Histogram 0, bins 4-7] [Histogram 1, bins 4-7] ... // // To reduce we use HISTOGRAM_SIZE/4 threads to accumulate, where thread 0 accumulates bins 0-3 from all histograms, thread 1 bins 4-7, etc. if (GroupIndex < QUARTER_HISTOGRAM_SIZE) { float4 Sum = float4(0.0f, 0.0f, 0.0f, 0.0f); UNROLL for (uint i = 0; i < ThreadGroupSize; ++i) { // Accumulate bins from histogram i Sum += SharedHistogram[GroupIndex * ThreadGroupSize + i]; } SharedHistogram[GroupIndex * ThreadGroupSize] = Sum; } GroupMemoryBarrierWithGroupSync(); if (GroupIndex < HISTOGRAM_SIZE) { float NormalizeFactor = SourceSizeAndInvSize.z * SourceSizeAndInvSize.w * 4.0f; // multiply 4 because bilinear filter, so sample only the 1/4 resolustion WriteToHistogramBuffer(GroupIndex, NormalizeFactor); #if LOW_SHARED_COMPUTE_MEMORY // Need to write two texels for each thread on LowSharedComputeMemory devices since there are only 32 threads in each tile. WriteToHistogramBuffer(GroupIndex * 2 + 1, NormalizeFactor); #endif } } #endif #if HISTOGRAM_EYEADAPTATION_COMPUTE_SHADER Buffer HistogramBuffer; float GetHistogramBucket_Mobile(uint BucketIndex) { return HistogramBuffer[BucketIndex]; } float ComputeHistogramSum_Mobile() { float Sum = 0; for (uint i = 0; i < HISTOGRAM_SIZE; ++i) { Sum += GetHistogramBucket_Mobile(i); } return Sum; } // @param MinFractionSum e.g. ComputeHistogramSum() * 0.5f for 50% percentil // @param MaxFractionSum e.g. ComputeHistogramSum() * 0.9f for 90% percentil float ComputeAverageLuminanceWithoutOutlier_Mobile(float MinFractionSum, float MaxFractionSum) { float2 SumWithoutOutliers = 0; UNROLL for (uint i = 0; i < HISTOGRAM_SIZE; ++i) { float LocalValue = GetHistogramBucket_Mobile(i); // remove outlier at lower end float Sub = min(LocalValue, MinFractionSum); LocalValue = LocalValue - Sub; MinFractionSum -= Sub; MaxFractionSum -= Sub; // remove outlier at upper end LocalValue = min(LocalValue, MaxFractionSum); MaxFractionSum -= LocalValue; float LogLuminanceAtBucket = ComputeLogLuminanceFromHistogramPosition(float(i) * InvHistogramSizeMinusOne); SumWithoutOutliers += float2(LogLuminanceAtBucket, 1) * LocalValue; } //return SumWithoutOutliers.x / max(0.0001f, SumWithoutOutliers.y); float AvgLogLuminance = SumWithoutOutliers.x / max(0.0001f, SumWithoutOutliers.y); return exp2(AvgLogLuminance); } float ComputeEyeAdaptationExposure_Mobile() { const float HistogramSum = ComputeHistogramSum_Mobile(); const float AverageSceneLuminance = ComputeAverageLuminanceWithoutOutlier_Mobile(HistogramSum * EyeAdaptation_ExposureLowPercent, HistogramSum * EyeAdaptation_ExposureHighPercent); const float LumAve = AverageSceneLuminance; return LumAve; } RWStructuredBuffer OutputBuffer; [numthreads(1, 1, 1)] void HistogramEyeAdaptationCS(uint2 DispatchThreadId : SV_DispatchThreadID) { float4 OutColor = 0; const float AverageSceneLuminance = ComputeEyeAdaptationExposure_Mobile(); const float TargetAverageLuminance = clamp(AverageSceneLuminance, EyeAdaptation_MinAverageLuminance, EyeAdaptation_MaxAverageLuminance); const float InvGreyMult = 5.5555555556f; // 1.0f/0.18f // White point luminance is target luminance divided by 0.18 (18% grey). const float TargetExposure = TargetAverageLuminance * InvGreyMult; const float OldExposureScale = EyeAdaptationBuffer[0].x; const float MiddleGreyExposureCompensation = EyeAdaptation_ExposureCompensationSettings * EyeAdaptation_ExposureCompensationCurve; // we want the average luminance remapped to 0.18, not 1.0 const float OldExposure = MiddleGreyExposureCompensation / (OldExposureScale != 0 ? OldExposureScale : 1.0f); // eye adaptation changes over time const float EstimatedExposure = ComputeEyeAdaptation(OldExposure, TargetExposure, EyeAdaptation_DeltaWorldTime); // maybe make this an option to avoid hard clamping when transitioning between different exposure volumes? const float SmoothedExposure = clamp(EstimatedExposure, EyeAdaptation_MinAverageLuminance * InvGreyMult, EyeAdaptation_MaxAverageLuminance * InvGreyMult); const float SmoothedExposureScale = 1.0f / max(0.0001f, SmoothedExposure); const float TargetExposureScale = 1.0f / max(0.0001f, TargetExposure); OutColor.x = MiddleGreyExposureCompensation * SmoothedExposureScale; OutColor.y = MiddleGreyExposureCompensation * TargetExposureScale; OutColor.z = AverageSceneLuminance; OutColor.w = MiddleGreyExposureCompensation; OutputBuffer[0] = OutColor; OutputBuffer[1] = float4(1.0f, 0.0f, 0.0f, 0.0f); } #endif