// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= LUTBlender.usf: Filter pixel shader source. Note: Any changes to device encoding logic in CombineLUTsCommon() must also be made in PostProcessDeviceEncodingOnly.usf. =============================================================================*/ #define USE_COLOR_MATRIX 1 #define USE_SHADOW_TINT 1 #define USE_CONTRAST 1 #include "Common.ush" #include "PostProcessCommon.ush" #include "TonemapCommon.ush" // --------------------------------------------------- // Texture0 is the neutral one and is computed in the shader Texture2D Textures_1; SamplerState Samplers_1; Texture2D Textures_2; SamplerState Samplers_2; Texture2D Textures_3; SamplerState Samplers_3; Texture2D Textures_4; SamplerState Samplers_4; // 0 is for neutral, 1 for Textures_1, 2 for ... DECLARE_SCALAR_ARRAY(float, LUTWeights, 5); half3 ColorScale; half4 OverlayColor; uint bUseMobileTonemapper; uint bIsTemperatureWhiteBalance; float LUTSize; float WhiteTemp; float WhiteTint; float4 ColorSaturation; float4 ColorContrast; float4 ColorGamma; float4 ColorGain; float4 ColorOffset; float4 ColorSaturationShadows; float4 ColorContrastShadows; float4 ColorGammaShadows; float4 ColorGainShadows; float4 ColorOffsetShadows; float4 ColorSaturationMidtones; float4 ColorContrastMidtones; float4 ColorGammaMidtones; float4 ColorGainMidtones; float4 ColorOffsetMidtones; float4 ColorSaturationHighlights; float4 ColorContrastHighlights; float4 ColorGammaHighlights; float4 ColorGainHighlights; float4 ColorOffsetHighlights; float ColorCorrectionShadowsMax; float ColorCorrectionHighlightsMin; float ColorCorrectionHighlightsMax; float4 ACESMinMaxData; float4 ACESMidData; float4 ACESCoefsLow_0; float4 ACESCoefsHigh_0; float ACESCoefsLow_4; float ACESCoefsHigh_4; float ACESSceneColorMultiplier; float ACESGamutCompression; // ACES 2.0 Texture2D ACESReachTable; Texture2D ACESGamutTable; Texture2D ACESGammaTable; static const float DefaultTemperature = 6500.0; static const float DefaultTint = 0.0; static const float Epsilon = 1e-8; float3 ColorCorrect( float3 WorkingColor, float4 ColorSaturation, float4 ColorContrast, float4 ColorGamma, float4 ColorGain, float4 ColorOffset ) { // TODO optimize float Luma = dot( WorkingColor, AP1_RGB2Y ); WorkingColor = max( 0, lerp( Luma.xxx, WorkingColor, ColorSaturation.xyz*ColorSaturation.w ) ); WorkingColor = pow( WorkingColor * (1.0 / 0.18), ColorContrast.xyz*ColorContrast.w ) * 0.18; WorkingColor = pow( WorkingColor, 1.0 / (ColorGamma.xyz*ColorGamma.w) ); WorkingColor = WorkingColor * (ColorGain.xyz * ColorGain.w) + (ColorOffset.xyz + ColorOffset.w); return WorkingColor; } // Nuke-style Color Correct float3 ColorCorrectAll( float3 WorkingColor ) { float Luma = dot( WorkingColor, AP1_RGB2Y ); // Shadow CC float3 CCColorShadows = ColorCorrect(WorkingColor, ColorSaturationShadows*ColorSaturation, ColorContrastShadows*ColorContrast, ColorGammaShadows*ColorGamma, ColorGainShadows*ColorGain, ColorOffsetShadows+ColorOffset); float CCWeightShadows = 1- smoothstep(0, ColorCorrectionShadowsMax, Luma); // Highlight CC float3 CCColorHighlights = ColorCorrect(WorkingColor, ColorSaturationHighlights*ColorSaturation, ColorContrastHighlights*ColorContrast, ColorGammaHighlights*ColorGamma, ColorGainHighlights*ColorGain, ColorOffsetHighlights+ColorOffset); float CCWeightHighlights = smoothstep(ColorCorrectionHighlightsMin, ColorCorrectionHighlightsMax, Luma); // Midtone CC float3 CCColorMidtones = ColorCorrect(WorkingColor, ColorSaturationMidtones*ColorSaturation, ColorContrastMidtones*ColorContrast, ColorGammaMidtones*ColorGamma, ColorGainMidtones*ColorGain, ColorOffsetMidtones+ColorOffset); float CCWeightMidtones = 1 - CCWeightShadows - CCWeightHighlights; // Blend Shadow, Midtone and Highlight CCs float3 WorkingColorSMH = CCColorShadows*CCWeightShadows + CCColorMidtones*CCWeightMidtones + CCColorHighlights*CCWeightHighlights; return WorkingColorSMH; } uint OutputDevice; uint OutputGamut; float OutputMaxLuminance; uint GetOutputDevice() { #if OUTPUT_DEVICE_SRGB return TONEMAPPER_OUTPUT_sRGB; #else return OutputDevice; #endif } float BlueCorrection; float ExpandGamut; float ToneCurveAmount; float4 CombineLUTsCommon(float2 InUV, uint InLayerIndex) { #if USE_VOLUME_LUT == 1 // construct the neutral color from a 3d position volume texture float4 Neutral; { float2 UV = InUV - float2(0.5f / LUTSize, 0.5f / LUTSize); Neutral = float4(UV * LUTSize / (LUTSize - 1), InLayerIndex / (LUTSize - 1), 0); } #else // construct the neutral color from a 2d position in 256x16 float4 Neutral; { float2 UV = InUV; // 0.49999f instead of 0.5f to avoid getting into negative values UV -= float2(0.49999f / (LUTSize * LUTSize), 0.49999f / LUTSize); float Scale = LUTSize / (LUTSize - 1); float3 RGB; RGB.r = frac(UV.x * LUTSize); RGB.b = UV.x - RGB.r / LUTSize; RGB.g = UV.y; Neutral = float4(RGB * Scale, 0); } #endif float4 OutColor = 0; const float3x3 AP1_2_sRGB = mul( XYZ_2_sRGB_MAT, mul( D60_2_D65_CAT, AP1_2_XYZ_MAT ) ); const float3x3 AP0_2_AP1 = mul( XYZ_2_AP1_MAT, AP0_2_XYZ_MAT ); const float3x3 AP1_2_AP0 = mul( XYZ_2_AP0_MAT, AP1_2_XYZ_MAT ); const float3x3 AP1_2_Output = OuputGamutMappingMatrix( OutputGamut ); float3 LUTEncodedColor = Neutral.rgb; float3 LinearColor; // Decode texture values as ST-2084 (Dolby PQ) BRANCH if (GetOutputDevice() >= TONEMAPPER_OUTPUT_ACES1000nitST2084) { // Since ST2084 returns linear values in nits, divide by a scale factor to convert // the reference nit result to be 1.0 in linear. // (for efficiency multiply by precomputed inverse) LinearColor = ST2084ToLinear(LUTEncodedColor) * LinearToNitsScaleInverse; } // Decode log values else { LinearColor = LogToLin(LUTEncodedColor) - LogToLin(0); } float3 BalancedColor = LinearColor; BRANCH if( abs(WhiteTemp - DefaultTemperature) > Epsilon || abs(WhiteTint - DefaultTint) > Epsilon ) { BalancedColor = WhiteBalance(LinearColor, WhiteTemp, WhiteTint, bIsTemperatureWhiteBalance, (float3x3)WorkingColorSpace.ToXYZ, (float3x3)WorkingColorSpace.FromXYZ); } float3 ColorAP1 = mul( (float3x3)WorkingColorSpace.ToAP1, BalancedColor ); // Expand bright saturated colors outside the sRGB gamut to fake wide gamut rendering. float LumaAP1 = dot( ColorAP1, AP1_RGB2Y ); float3 ChromaAP1 = ColorAP1 / LumaAP1; float ChromaDistSqr = dot( ChromaAP1 - 1, ChromaAP1 - 1 ); float ExpandAmount = ( 1 - exp2( -4 * ChromaDistSqr ) ) * ( 1 - exp2( -4 * ExpandGamut * LumaAP1*LumaAP1 ) ); // Bizarre matrix but this expands sRGB to between P3 and AP1 // CIE 1931 chromaticities: x y // Red: 0.6965 0.3065 // Green: 0.245 0.718 // Blue: 0.1302 0.0456 // White: 0.3127 0.329 const float3x3 Wide_2_XYZ_MAT = { 0.5441691, 0.2395926, 0.1666943, 0.2394656, 0.7021530, 0.0583814, -0.0023439, 0.0361834, 1.0552183, }; const float3x3 Wide_2_AP1 = mul( XYZ_2_AP1_MAT, Wide_2_XYZ_MAT ); const float3x3 ExpandMat = mul( Wide_2_AP1, AP1_2_sRGB ); float3 ColorExpand = mul( ExpandMat, ColorAP1 ); ColorAP1 = lerp( ColorAP1, ColorExpand, ExpandAmount ); ColorAP1 = ColorCorrectAll( ColorAP1 ); // Store for Linear HDR output without tone curve float3 GradedColor = mul( (float3x3)WorkingColorSpace.FromAP1, ColorAP1 ); const float3x3 BlueCorrect = { 0.9404372683, -0.0183068787, 0.0778696104, 0.0083786969, 0.8286599939, 0.1629613092, 0.0005471261, -0.0008833746, 1.0003362486 }; const float3x3 BlueCorrectInv = { 1.06318, 0.0233956, -0.0865726, -0.0106337, 1.20632, -0.19569, -0.000590887, 0.00105248, 0.999538 }; const float3x3 BlueCorrectAP1 = mul( AP0_2_AP1, mul( BlueCorrect, AP1_2_AP0 ) ); const float3x3 BlueCorrectInvAP1 = mul( AP0_2_AP1, mul( BlueCorrectInv, AP1_2_AP0 ) ); // Blue correction ColorAP1 = lerp( ColorAP1, mul( BlueCorrectAP1, ColorAP1 ), BlueCorrection ); // Tonemapped color in the AP1 gamut float3 ToneMappedColorAP1 = FilmToneMap( ColorAP1 ); ColorAP1 = lerp(ColorAP1, ToneMappedColorAP1, ToneCurveAmount); // Uncorrect blue to maintain white point ColorAP1 = lerp( ColorAP1, mul( BlueCorrectInvAP1, ColorAP1 ), BlueCorrection ); // Convert from AP1 to the working color space and clip out-of-gamut values float3 FilmColor = max(0, mul( (float3x3)WorkingColorSpace.FromAP1, ColorAP1 )); #if BLENDCOUNT > 1 { // Legacy LDR LUT color grading // FIXME // LUTs are in sRGB transfer function space // LUTs applied in sRGB gamut as well half3 GammaColor = LinearToSrgb( saturate( FilmColor ) ); float3 UVW = GammaColor * (15.0 / 16.0) + (0.5f / 16.0); GammaColor = GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 0) * GammaColor; // BLENDCOUNT is the number of LUT that are blended together including the neutral one #if BLENDCOUNT >= 2 GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 1) * UnwrappedTexture3DSample( Textures_1, Samplers_1, UVW, 16, 1 / 16.0 ).rgb; #endif #if BLENDCOUNT >= 3 GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 2) * UnwrappedTexture3DSample( Textures_2, Samplers_2, UVW, 16, 1 / 16.0 ).rgb; #endif #if BLENDCOUNT >= 4 GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 3) * UnwrappedTexture3DSample( Textures_3, Samplers_3, UVW, 16, 1 / 16.0 ).rgb; #endif #if BLENDCOUNT >= 5 GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 4) * UnwrappedTexture3DSample( Textures_4, Samplers_4, UVW, 16, 1 / 16.0 ).rgb; #endif // Back to linear space, AP1 gamut FilmColor = sRGBToLinear( GammaColor ); } #endif // apply math color correction on top to texture based solution FilmColor = ColorCorrection( FilmColor ); // blend with custom LDR color, used for Fade track in Cinematics float3 FilmColorNoGamma = lerp( FilmColor * ColorScale, OverlayColor.rgb, OverlayColor.a ); // Apply Fade track to linear outputs also GradedColor = lerp(GradedColor * ColorScale, OverlayColor.rgb, OverlayColor.a); // Apply "gamma" curve adjustment. FilmColor = pow( max(0, FilmColorNoGamma), InverseGamma.y ); // Note: Any changes to the following device encoding logic must also be made // in PostProcessDeviceEncodingOnly.usf's corresponding pixel shader. half3 OutDeviceColor = 0; BRANCH // sRGB, user specified gamut if( GetOutputDevice() == TONEMAPPER_OUTPUT_sRGB) { // Convert from sRGB to specified output gamut //float3 OutputGamutColor = mul( AP1_2_Output, mul( sRGB_2_AP1, FilmColor ) ); // FIXME: Workaround for UE-29935, pushing all colors with a 0 component to black output // Default parameters seem to cancel out (sRGB->XYZ->AP1->XYZ->sRGB), so should be okay for a temp fix float3 OutputGamutColor = WorkingColorSpace.bIsSRGB ? FilmColor : mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) ); // Apply conversion to sRGB (this must be an exact sRGB conversion else darks are bad). OutDeviceColor = LinearToSrgb( OutputGamutColor ); } // Rec 709, user specified gamut else if( GetOutputDevice() == TONEMAPPER_OUTPUT_Rec709) { // Convert from the working color space to specified output gamut float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) ); // Didn't profile yet if the branching version would be faster (different linear segment). OutDeviceColor = LinearTo709Branchless( OutputGamutColor ); } // ACES transform with PQ/2084 encoding, user specified gamut else if( GetOutputDevice() == TONEMAPPER_OUTPUT_ACES1000nitST2084 || GetOutputDevice() == TONEMAPPER_OUTPUT_ACES2000nitST2084) { #if USE_ACES_2 float3 ODTColor = ACESOutputTransform(GradedColor * ACESSceneColorMultiplier, (float3x3) WorkingColorSpace.ToAP0, OutputMaxLuminance, ACESReachTable, ACESGamutTable, ACESGammaTable); #else FACESTonemapParams AcesParams = ComputeACESTonemapParams(ACESMinMaxData, ACESMidData, ACESCoefsLow_0, ACESCoefsHigh_0, ACESCoefsLow_4, ACESCoefsHigh_4, ACESSceneColorMultiplier, ACESGamutCompression); float3 ODTColor = ACESOutputTransform( GradedColor, (float3x3)WorkingColorSpace.ToAP0, AcesParams); #endif ODTColor = mul(AP1_2_Output, ODTColor); // Apply conversion to ST-2084 (Dolby PQ) OutDeviceColor = LinearToST2084( ODTColor ); } // ACES transform with PQ/2084 encoding, AP1 gamut else if( GetOutputDevice() == TONEMAPPER_OUTPUT_ACES1000nitScRGB || GetOutputDevice() == TONEMAPPER_OUTPUT_ACES2000nitScRGB) { #if USE_ACES_2 float3 ODTColor = ACESOutputTransform(GradedColor * ACESSceneColorMultiplier, (float3x3) WorkingColorSpace.ToAP0, OutputMaxLuminance, ACESReachTable, ACESGamutTable, ACESGammaTable); #else FACESTonemapParams AcesParams = ComputeACESTonemapParams(ACESMinMaxData, ACESMidData, ACESCoefsLow_0, ACESCoefsHigh_0, ACESCoefsLow_4, ACESCoefsHigh_4, ACESSceneColorMultiplier, ACESGamutCompression); float3 ODTColor = ACESOutputTransform( GradedColor, (float3x3)WorkingColorSpace.ToAP0, AcesParams); #endif OutDeviceColor = mul(AP1_2_sRGB, ODTColor / UE_SCRGB_WHITE_NITS); } else if( GetOutputDevice() == TONEMAPPER_OUTPUT_LinearEXR) { float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, GradedColor ) ); OutDeviceColor = LinearToST2084( OutputGamutColor ); } // Linear HDR, including all color correction, but no tone curve else if( GetOutputDevice() == TONEMAPPER_OUTPUT_NoToneCurve) { OutDeviceColor = GradedColor; } // "Linear" including all color correction and the tone curve, but no device gamma else if (GetOutputDevice() == TONEMAPPER_OUTPUT_WithToneCurve) { float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColorNoGamma ) ); OutDeviceColor = OutputGamutColor; } // OutputDevice == TONEMAPPER_OUTPUT_ExplicitGammaMapping // Gamma 2.2, user specified gamut else { // Convert from the working color space to specified output gamut float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) ); // This is different than the prior "gamma" curve adjustment (but reusing the variable). // For displays set to a gamma colorspace. // Note, MacOSX native output is raw gamma 2.2 not sRGB! OutDeviceColor = pow( OutputGamutColor, InverseGamma.z ); } // Better to saturate(lerp(a,b,t)) than lerp(saturate(a),saturate(b),t) OutColor.rgb = OutDeviceColor / 1.05; OutColor.a = 0; return OutColor; } // todo: Weight[0] should be used for neutral, Texture* name should start with 1, color correction should apply on top of that #if USE_VOLUME_LUT == 1 void MainPS(FWriteToSliceGeometryOutput Input, out float4 OutColor : SV_Target0) { OutColor = CombineLUTsCommon(Input.Vertex.UV, Input.LayerIndex); } #else void MainPS(noperspective float4 InUV : TEXCOORD0, out float4 OutColor : SV_Target0) { OutColor = CombineLUTsCommon(InUV.xy, 0); } #endif #if COMPUTESHADER // xy: Unused, zw: ThreadToUVScale float2 OutputExtentInverse; #if USE_VOLUME_LUT == 1 RWTexture3D RWOutputTexture; [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID) { float2 UV = ((float2)DispatchThreadId.xy + 0.5f) * OutputExtentInverse; uint LayerIndex = DispatchThreadId.z; float4 OutColor = CombineLUTsCommon(UV, LayerIndex); uint3 PixelPos = DispatchThreadId; RWOutputTexture[PixelPos] = OutColor; } #else RWTexture2D RWOutputTexture; [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void MainCS(uint2 DispatchThreadId : SV_DispatchThreadID) { float2 UV = ((float2)DispatchThreadId + 0.5f) * OutputExtentInverse; uint LayerIndex = 0; float4 OutColor = CombineLUTsCommon(UV, LayerIndex); uint2 PixelPos = DispatchThreadId; RWOutputTexture[PixelPos] = OutColor; } #endif #endif