Files
UnrealEngine/Engine/Shaders/Private/PostProcessCombineLUTs.usf
2025-05-18 13:04:45 +08:00

472 lines
16 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
LUTBlender.usf: Filter pixel shader source.
Note: Any changes to device encoding logic in CombineLUTsCommon() must
also be made in PostProcessDeviceEncodingOnly.usf.
=============================================================================*/
#define USE_COLOR_MATRIX 1
#define USE_SHADOW_TINT 1
#define USE_CONTRAST 1
#include "Common.ush"
#include "PostProcessCommon.ush"
#include "TonemapCommon.ush"
// ---------------------------------------------------
// Texture0 is the neutral one and is computed in the shader
Texture2D Textures_1;
SamplerState Samplers_1;
Texture2D Textures_2;
SamplerState Samplers_2;
Texture2D Textures_3;
SamplerState Samplers_3;
Texture2D Textures_4;
SamplerState Samplers_4;
// 0 is for neutral, 1 for Textures_1, 2 for ...
DECLARE_SCALAR_ARRAY(float, LUTWeights, 5);
half3 ColorScale;
half4 OverlayColor;
uint bUseMobileTonemapper;
uint bIsTemperatureWhiteBalance;
float LUTSize;
float WhiteTemp;
float WhiteTint;
float4 ColorSaturation;
float4 ColorContrast;
float4 ColorGamma;
float4 ColorGain;
float4 ColorOffset;
float4 ColorSaturationShadows;
float4 ColorContrastShadows;
float4 ColorGammaShadows;
float4 ColorGainShadows;
float4 ColorOffsetShadows;
float4 ColorSaturationMidtones;
float4 ColorContrastMidtones;
float4 ColorGammaMidtones;
float4 ColorGainMidtones;
float4 ColorOffsetMidtones;
float4 ColorSaturationHighlights;
float4 ColorContrastHighlights;
float4 ColorGammaHighlights;
float4 ColorGainHighlights;
float4 ColorOffsetHighlights;
float ColorCorrectionShadowsMax;
float ColorCorrectionHighlightsMin;
float ColorCorrectionHighlightsMax;
float4 ACESMinMaxData;
float4 ACESMidData;
float4 ACESCoefsLow_0;
float4 ACESCoefsHigh_0;
float ACESCoefsLow_4;
float ACESCoefsHigh_4;
float ACESSceneColorMultiplier;
float ACESGamutCompression;
// ACES 2.0
Texture2D<float> ACESReachTable;
Texture2D<float3> ACESGamutTable;
Texture2D<float> ACESGammaTable;
static const float DefaultTemperature = 6500.0;
static const float DefaultTint = 0.0;
static const float Epsilon = 1e-8;
float3 ColorCorrect( float3 WorkingColor,
float4 ColorSaturation,
float4 ColorContrast,
float4 ColorGamma,
float4 ColorGain,
float4 ColorOffset )
{
// TODO optimize
float Luma = dot( WorkingColor, AP1_RGB2Y );
WorkingColor = max( 0, lerp( Luma.xxx, WorkingColor, ColorSaturation.xyz*ColorSaturation.w ) );
WorkingColor = pow( WorkingColor * (1.0 / 0.18), ColorContrast.xyz*ColorContrast.w ) * 0.18;
WorkingColor = pow( WorkingColor, 1.0 / (ColorGamma.xyz*ColorGamma.w) );
WorkingColor = WorkingColor * (ColorGain.xyz * ColorGain.w) + (ColorOffset.xyz + ColorOffset.w);
return WorkingColor;
}
// Nuke-style Color Correct
float3 ColorCorrectAll( float3 WorkingColor )
{
float Luma = dot( WorkingColor, AP1_RGB2Y );
// Shadow CC
float3 CCColorShadows = ColorCorrect(WorkingColor,
ColorSaturationShadows*ColorSaturation,
ColorContrastShadows*ColorContrast,
ColorGammaShadows*ColorGamma,
ColorGainShadows*ColorGain,
ColorOffsetShadows+ColorOffset);
float CCWeightShadows = 1- smoothstep(0, ColorCorrectionShadowsMax, Luma);
// Highlight CC
float3 CCColorHighlights = ColorCorrect(WorkingColor,
ColorSaturationHighlights*ColorSaturation,
ColorContrastHighlights*ColorContrast,
ColorGammaHighlights*ColorGamma,
ColorGainHighlights*ColorGain,
ColorOffsetHighlights+ColorOffset);
float CCWeightHighlights = smoothstep(ColorCorrectionHighlightsMin, ColorCorrectionHighlightsMax, Luma);
// Midtone CC
float3 CCColorMidtones = ColorCorrect(WorkingColor,
ColorSaturationMidtones*ColorSaturation,
ColorContrastMidtones*ColorContrast,
ColorGammaMidtones*ColorGamma,
ColorGainMidtones*ColorGain,
ColorOffsetMidtones+ColorOffset);
float CCWeightMidtones = 1 - CCWeightShadows - CCWeightHighlights;
// Blend Shadow, Midtone and Highlight CCs
float3 WorkingColorSMH = CCColorShadows*CCWeightShadows + CCColorMidtones*CCWeightMidtones + CCColorHighlights*CCWeightHighlights;
return WorkingColorSMH;
}
uint OutputDevice;
uint OutputGamut;
float OutputMaxLuminance;
uint GetOutputDevice()
{
#if OUTPUT_DEVICE_SRGB
return TONEMAPPER_OUTPUT_sRGB;
#else
return OutputDevice;
#endif
}
float BlueCorrection;
float ExpandGamut;
float ToneCurveAmount;
float4 CombineLUTsCommon(float2 InUV, uint InLayerIndex)
{
#if USE_VOLUME_LUT == 1
// construct the neutral color from a 3d position volume texture
float4 Neutral;
{
float2 UV = InUV - float2(0.5f / LUTSize, 0.5f / LUTSize);
Neutral = float4(UV * LUTSize / (LUTSize - 1), InLayerIndex / (LUTSize - 1), 0);
}
#else
// construct the neutral color from a 2d position in 256x16
float4 Neutral;
{
float2 UV = InUV;
// 0.49999f instead of 0.5f to avoid getting into negative values
UV -= float2(0.49999f / (LUTSize * LUTSize), 0.49999f / LUTSize);
float Scale = LUTSize / (LUTSize - 1);
float3 RGB;
RGB.r = frac(UV.x * LUTSize);
RGB.b = UV.x - RGB.r / LUTSize;
RGB.g = UV.y;
Neutral = float4(RGB * Scale, 0);
}
#endif
float4 OutColor = 0;
const float3x3 AP1_2_sRGB = mul( XYZ_2_sRGB_MAT, mul( D60_2_D65_CAT, AP1_2_XYZ_MAT ) );
const float3x3 AP0_2_AP1 = mul( XYZ_2_AP1_MAT, AP0_2_XYZ_MAT );
const float3x3 AP1_2_AP0 = mul( XYZ_2_AP0_MAT, AP1_2_XYZ_MAT );
const float3x3 AP1_2_Output = OuputGamutMappingMatrix( OutputGamut );
float3 LUTEncodedColor = Neutral.rgb;
float3 LinearColor;
// Decode texture values as ST-2084 (Dolby PQ)
BRANCH
if (GetOutputDevice() >= TONEMAPPER_OUTPUT_ACES1000nitST2084)
{
// Since ST2084 returns linear values in nits, divide by a scale factor to convert
// the reference nit result to be 1.0 in linear.
// (for efficiency multiply by precomputed inverse)
LinearColor = ST2084ToLinear(LUTEncodedColor) * LinearToNitsScaleInverse;
}
// Decode log values
else
{
LinearColor = LogToLin(LUTEncodedColor) - LogToLin(0);
}
float3 BalancedColor = LinearColor;
BRANCH
if( abs(WhiteTemp - DefaultTemperature) > Epsilon || abs(WhiteTint - DefaultTint) > Epsilon )
{
BalancedColor = WhiteBalance(LinearColor, WhiteTemp, WhiteTint, bIsTemperatureWhiteBalance, (float3x3)WorkingColorSpace.ToXYZ, (float3x3)WorkingColorSpace.FromXYZ);
}
float3 ColorAP1 = mul( (float3x3)WorkingColorSpace.ToAP1, BalancedColor );
// Expand bright saturated colors outside the sRGB gamut to fake wide gamut rendering.
float LumaAP1 = dot( ColorAP1, AP1_RGB2Y );
float3 ChromaAP1 = ColorAP1 / LumaAP1;
float ChromaDistSqr = dot( ChromaAP1 - 1, ChromaAP1 - 1 );
float ExpandAmount = ( 1 - exp2( -4 * ChromaDistSqr ) ) * ( 1 - exp2( -4 * ExpandGamut * LumaAP1*LumaAP1 ) );
// Bizarre matrix but this expands sRGB to between P3 and AP1
// CIE 1931 chromaticities: x y
// Red: 0.6965 0.3065
// Green: 0.245 0.718
// Blue: 0.1302 0.0456
// White: 0.3127 0.329
const float3x3 Wide_2_XYZ_MAT =
{
0.5441691, 0.2395926, 0.1666943,
0.2394656, 0.7021530, 0.0583814,
-0.0023439, 0.0361834, 1.0552183,
};
const float3x3 Wide_2_AP1 = mul( XYZ_2_AP1_MAT, Wide_2_XYZ_MAT );
const float3x3 ExpandMat = mul( Wide_2_AP1, AP1_2_sRGB );
float3 ColorExpand = mul( ExpandMat, ColorAP1 );
ColorAP1 = lerp( ColorAP1, ColorExpand, ExpandAmount );
ColorAP1 = ColorCorrectAll( ColorAP1 );
// Store for Linear HDR output without tone curve
float3 GradedColor = mul( (float3x3)WorkingColorSpace.FromAP1, ColorAP1 );
const float3x3 BlueCorrect =
{
0.9404372683, -0.0183068787, 0.0778696104,
0.0083786969, 0.8286599939, 0.1629613092,
0.0005471261, -0.0008833746, 1.0003362486
};
const float3x3 BlueCorrectInv =
{
1.06318, 0.0233956, -0.0865726,
-0.0106337, 1.20632, -0.19569,
-0.000590887, 0.00105248, 0.999538
};
const float3x3 BlueCorrectAP1 = mul( AP0_2_AP1, mul( BlueCorrect, AP1_2_AP0 ) );
const float3x3 BlueCorrectInvAP1 = mul( AP0_2_AP1, mul( BlueCorrectInv, AP1_2_AP0 ) );
// Blue correction
ColorAP1 = lerp( ColorAP1, mul( BlueCorrectAP1, ColorAP1 ), BlueCorrection );
// Tonemapped color in the AP1 gamut
float3 ToneMappedColorAP1 = FilmToneMap( ColorAP1 );
ColorAP1 = lerp(ColorAP1, ToneMappedColorAP1, ToneCurveAmount);
// Uncorrect blue to maintain white point
ColorAP1 = lerp( ColorAP1, mul( BlueCorrectInvAP1, ColorAP1 ), BlueCorrection );
// Convert from AP1 to the working color space and clip out-of-gamut values
float3 FilmColor = max(0, mul( (float3x3)WorkingColorSpace.FromAP1, ColorAP1 ));
#if BLENDCOUNT > 1
{
// Legacy LDR LUT color grading
// FIXME
// LUTs are in sRGB transfer function space
// LUTs applied in sRGB gamut as well
half3 GammaColor = LinearToSrgb( saturate( FilmColor ) );
float3 UVW = GammaColor * (15.0 / 16.0) + (0.5f / 16.0);
GammaColor = GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 0) * GammaColor;
// BLENDCOUNT is the number of LUT that are blended together including the neutral one
#if BLENDCOUNT >= 2
GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 1) * UnwrappedTexture3DSample( Textures_1, Samplers_1, UVW, 16, 1 / 16.0 ).rgb;
#endif
#if BLENDCOUNT >= 3
GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 2) * UnwrappedTexture3DSample( Textures_2, Samplers_2, UVW, 16, 1 / 16.0 ).rgb;
#endif
#if BLENDCOUNT >= 4
GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 3) * UnwrappedTexture3DSample( Textures_3, Samplers_3, UVW, 16, 1 / 16.0 ).rgb;
#endif
#if BLENDCOUNT >= 5
GammaColor += GET_SCALAR_ARRAY_ELEMENT(LUTWeights, 4) * UnwrappedTexture3DSample( Textures_4, Samplers_4, UVW, 16, 1 / 16.0 ).rgb;
#endif
// Back to linear space, AP1 gamut
FilmColor = sRGBToLinear( GammaColor );
}
#endif
// apply math color correction on top to texture based solution
FilmColor = ColorCorrection( FilmColor );
// blend with custom LDR color, used for Fade track in Cinematics
float3 FilmColorNoGamma = lerp( FilmColor * ColorScale, OverlayColor.rgb, OverlayColor.a );
// Apply Fade track to linear outputs also
GradedColor = lerp(GradedColor * ColorScale, OverlayColor.rgb, OverlayColor.a);
// Apply "gamma" curve adjustment.
FilmColor = pow( max(0, FilmColorNoGamma), InverseGamma.y );
// Note: Any changes to the following device encoding logic must also be made
// in PostProcessDeviceEncodingOnly.usf's corresponding pixel shader.
half3 OutDeviceColor = 0;
BRANCH
// sRGB, user specified gamut
if( GetOutputDevice() == TONEMAPPER_OUTPUT_sRGB)
{
// Convert from sRGB to specified output gamut
//float3 OutputGamutColor = mul( AP1_2_Output, mul( sRGB_2_AP1, FilmColor ) );
// FIXME: Workaround for UE-29935, pushing all colors with a 0 component to black output
// Default parameters seem to cancel out (sRGB->XYZ->AP1->XYZ->sRGB), so should be okay for a temp fix
float3 OutputGamutColor = WorkingColorSpace.bIsSRGB ? FilmColor : mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) );
// Apply conversion to sRGB (this must be an exact sRGB conversion else darks are bad).
OutDeviceColor = LinearToSrgb( OutputGamutColor );
}
// Rec 709, user specified gamut
else if( GetOutputDevice() == TONEMAPPER_OUTPUT_Rec709)
{
// Convert from the working color space to specified output gamut
float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) );
// Didn't profile yet if the branching version would be faster (different linear segment).
OutDeviceColor = LinearTo709Branchless( OutputGamutColor );
}
// ACES transform with PQ/2084 encoding, user specified gamut
else if( GetOutputDevice() == TONEMAPPER_OUTPUT_ACES1000nitST2084 || GetOutputDevice() == TONEMAPPER_OUTPUT_ACES2000nitST2084)
{
#if USE_ACES_2
float3 ODTColor = ACESOutputTransform(GradedColor * ACESSceneColorMultiplier, (float3x3) WorkingColorSpace.ToAP0, OutputMaxLuminance, ACESReachTable, ACESGamutTable, ACESGammaTable);
#else
FACESTonemapParams AcesParams = ComputeACESTonemapParams(ACESMinMaxData, ACESMidData, ACESCoefsLow_0, ACESCoefsHigh_0, ACESCoefsLow_4, ACESCoefsHigh_4, ACESSceneColorMultiplier, ACESGamutCompression);
float3 ODTColor = ACESOutputTransform( GradedColor, (float3x3)WorkingColorSpace.ToAP0, AcesParams);
#endif
ODTColor = mul(AP1_2_Output, ODTColor);
// Apply conversion to ST-2084 (Dolby PQ)
OutDeviceColor = LinearToST2084( ODTColor );
}
// ACES transform with PQ/2084 encoding, AP1 gamut
else if( GetOutputDevice() == TONEMAPPER_OUTPUT_ACES1000nitScRGB || GetOutputDevice() == TONEMAPPER_OUTPUT_ACES2000nitScRGB)
{
#if USE_ACES_2
float3 ODTColor = ACESOutputTransform(GradedColor * ACESSceneColorMultiplier, (float3x3) WorkingColorSpace.ToAP0, OutputMaxLuminance, ACESReachTable, ACESGamutTable, ACESGammaTable);
#else
FACESTonemapParams AcesParams = ComputeACESTonemapParams(ACESMinMaxData, ACESMidData, ACESCoefsLow_0, ACESCoefsHigh_0, ACESCoefsLow_4, ACESCoefsHigh_4, ACESSceneColorMultiplier, ACESGamutCompression);
float3 ODTColor = ACESOutputTransform( GradedColor, (float3x3)WorkingColorSpace.ToAP0, AcesParams);
#endif
OutDeviceColor = mul(AP1_2_sRGB, ODTColor / UE_SCRGB_WHITE_NITS);
}
else if( GetOutputDevice() == TONEMAPPER_OUTPUT_LinearEXR)
{
float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, GradedColor ) );
OutDeviceColor = LinearToST2084( OutputGamutColor );
}
// Linear HDR, including all color correction, but no tone curve
else if( GetOutputDevice() == TONEMAPPER_OUTPUT_NoToneCurve)
{
OutDeviceColor = GradedColor;
}
// "Linear" including all color correction and the tone curve, but no device gamma
else if (GetOutputDevice() == TONEMAPPER_OUTPUT_WithToneCurve)
{
float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColorNoGamma ) );
OutDeviceColor = OutputGamutColor;
}
// OutputDevice == TONEMAPPER_OUTPUT_ExplicitGammaMapping
// Gamma 2.2, user specified gamut
else
{
// Convert from the working color space to specified output gamut
float3 OutputGamutColor = mul( AP1_2_Output, mul( (float3x3)WorkingColorSpace.ToAP1, FilmColor ) );
// This is different than the prior "gamma" curve adjustment (but reusing the variable).
// For displays set to a gamma colorspace.
// Note, MacOSX native output is raw gamma 2.2 not sRGB!
OutDeviceColor = pow( OutputGamutColor, InverseGamma.z );
}
// Better to saturate(lerp(a,b,t)) than lerp(saturate(a),saturate(b),t)
OutColor.rgb = OutDeviceColor / 1.05;
OutColor.a = 0;
return OutColor;
}
// todo: Weight[0] should be used for neutral, Texture* name should start with 1, color correction should apply on top of that
#if USE_VOLUME_LUT == 1
void MainPS(FWriteToSliceGeometryOutput Input, out float4 OutColor : SV_Target0)
{
OutColor = CombineLUTsCommon(Input.Vertex.UV, Input.LayerIndex);
}
#else
void MainPS(noperspective float4 InUV : TEXCOORD0, out float4 OutColor : SV_Target0)
{
OutColor = CombineLUTsCommon(InUV.xy, 0);
}
#endif
#if COMPUTESHADER
// xy: Unused, zw: ThreadToUVScale
float2 OutputExtentInverse;
#if USE_VOLUME_LUT == 1
RWTexture3D<float4> RWOutputTexture;
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
float2 UV = ((float2)DispatchThreadId.xy + 0.5f) * OutputExtentInverse;
uint LayerIndex = DispatchThreadId.z;
float4 OutColor = CombineLUTsCommon(UV, LayerIndex);
uint3 PixelPos = DispatchThreadId;
RWOutputTexture[PixelPos] = OutColor;
}
#else
RWTexture2D<float4> RWOutputTexture;
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void MainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
float2 UV = ((float2)DispatchThreadId + 0.5f) * OutputExtentInverse;
uint LayerIndex = 0;
float4 OutColor = CombineLUTsCommon(UV, LayerIndex);
uint2 PixelPos = DispatchThreadId;
RWOutputTexture[PixelPos] = OutColor;
}
#endif
#endif