// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= ETCCompressionCommon.ush: Helpers for compute shader ETC2 texture compression =============================================================================*/ #pragma once #include "GammaCorrectionCommon.ush" #define BLOCK_MODE_INDIVIDUAL 0 #define BLOCK_MODE_DIFFERENTIAL 1u #define NUM_RGB_TABLES 8 #define NUM_ALPHA_TABLES 16 float EtcLuminance(in float3 c) { return dot(c, float3(0.3f, 0.5f, 0.2f)); } uint EtcPackColorBits(in float3 FloatColor) { uint3 ColorScaled = uint3(round(saturate(FloatColor) * 255.f)); uint ColorPacked = (ColorScaled.r & 0x000000f8) | ((ColorScaled.g & 0x000000f8) << 8u) | ((ColorScaled.b & 0x000000f8) << 16u); return ColorPacked; } uint EtcSwapEndian32(in uint x) { return ((x & 0x0000ff) << 24u) | ((x & 0x00ff00) << 8u) | ((x & 0xff0000) >> 8u) | (x >> 24u); } // Get a single scale factor to use for a YCoCg color block // This increases precision at the expense of potential blending artifacts across blocks float EtcGetYCoCgScale(float2 MinCoCg, float2 MaxCoCg) { MinCoCg = abs(MinCoCg - 128.f / 255.f); MaxCoCg = abs(MaxCoCg - 128.f / 255.f); float MaxComponent = max(max(MinCoCg.x, MinCoCg.y), max(MaxCoCg.x, MaxCoCg.y)); return (MaxComponent < 32.f / 255.f) ? 4.f : (MaxComponent < 64.f / 255.f) ? 2.f : 1.f; } void EtcApplyYCoCgScale(inout float2 CoCg, float Scale) { CoCg = (CoCg - 128.f / 255.f) * Scale + 128.f / 255.f; } uint EtcGetBlockWeights(in float3 Block[16], in float3 BaseColor, in uint TableIdx) { // weights // 3 - big negative distance // 2 - small negative distance // 1 - big positive distance // 0 - small positive distance uint BlockWeights = 0; float Distance0 = EtcParameters.RGB_DISTANCE_TABLES[TableIdx].z / 255.f; float Distance1 = EtcParameters.RGB_DISTANCE_TABLES[TableIdx].w / 255.f; float BaseLum = EtcLuminance(BaseColor); float Lum1 = min(1.f, BaseLum + Distance1); float Lum0 = BaseLum + Distance0; float Lum3 = max(0.f, BaseLum - Distance1); float Lum2 = BaseLum - Distance0; for (uint Y = 0; Y < 4; ++Y) { for (uint X = 0; X < 4; ++X) { float3 Color = Block[4u * Y + X]; float ColorLum = EtcLuminance(Color); uint EncIndex = 0u; if (ColorLum < BaseLum || ColorLum == 0.f) // make sure fully Black block is encoded correctly { EncIndex = (ColorLum - Lum3) < (Lum2 - ColorLum) ? 3u : 2u; } else { EncIndex = (Lum1 - ColorLum) < (ColorLum - Lum0) ? 1u : 0u; } uint IndexInBlock = X * 4u + Y; BlockWeights |= ((EncIndex & 1u) << IndexInBlock) | ((EncIndex >> 1u) << (16u + IndexInBlock)); } } return BlockWeights; } uint EtcSelectRGBTableIndex(float LuminanceR) { // guess a table using sub-block luminance range float Range = LuminanceR*255.f + 1.f; if (Range < 8.0) { return 0u; } else if (Range < 17.0) { return 1u; } else if (Range < 29.0) { return 2u; } else if (Range < 42.0) { return 3u; } else if (Range < 60.0) { return 4u; } else if (Range < 80.0) { return 5u; } else if (Range < 106.0) { return 6u; } return 7u; } uint2 CompressBlock_ETC1S(in float3 Block[16], bool bSRGB, bool bCoCg) { // ETC1S: differential with color delta=0 uint FlipBit = 0u; uint BlockModeBits = (BLOCK_MODE_DIFFERENTIAL << 25u); float3 BaseColorMin = 1; float3 BaseColorMax = 0; float3 BaseColorAvg = 0; for (int i = 0; i < 16; ++i) { BaseColorMin = min(BaseColorMin, Block[i]); BaseColorMax = max(BaseColorMax, Block[i]); BaseColorAvg = BaseColorAvg + Block[i]; } BaseColorAvg = BaseColorAvg / 16.f; float3 BaseColor = (bSRGB ? LinearToSrgb(BaseColorAvg) : BaseColorAvg); uint ColorBits = 0u; if (bCoCg) { float CoCgScale = EtcGetYCoCgScale(BaseColorMin.xy, BaseColorMax.xy); EtcApplyYCoCgScale(BaseColor.xy, CoCgScale); ColorBits = EtcPackColorBits(BaseColor); ColorBits |= (((uint)CoCgScale - 1) << 19u); } else { ColorBits = EtcPackColorBits(BaseColor); } float LumMin = EtcLuminance(BaseColorMin); float LumMax = EtcLuminance(BaseColorMax); float LumRange = (LumMax - LumMin) * 0.5f; uint BlockTableIdx = EtcSelectRGBTableIndex(LumRange); uint BlockWeights = EtcGetBlockWeights(Block, BaseColorAvg, BlockTableIdx); // Both these values need to be big-endian. We can build ModeBits directly in big-endian layout, but for IndexBits // it's too hard, so we'll just swap here. uint ModeBits = (BlockTableIdx << 29u) | (BlockTableIdx << 26u) | (BlockModeBits) | (FlipBit << 24u) | ColorBits; uint IndexBits = EtcSwapEndian32(BlockWeights); return uint2(ModeBits, IndexBits); } uint2 CompressBlock_ETC2_SRGB(in float3 Block[16]) { const bool bSRGB = true; const bool bCoCg = false; return CompressBlock_ETC1S(Block, bSRGB, bCoCg); } uint2 CompressBlock_ETC2_RGB(in float3 Block[16]) { const bool bSRGB = false; const bool bCoCg = false; return CompressBlock_ETC1S(Block, bSRGB, bCoCg); } void SelectAlphaMod(in float SourceAlpha, in float EncodedAlpha, int IndexInTable, inout int SelectedIndex, inout float MinDif) { float Dif = abs(EncodedAlpha - SourceAlpha); if (Dif < MinDif) { MinDif = Dif; SelectedIndex = IndexInTable; } } uint2 CompressBlock_ETC2_Alpha(in float BlockA[16]) { float MinAlpha = 1.f; float MaxAlpha = 0.f; for (int k = 0; k < 16; ++k) { float A = BlockA[k]; MinAlpha = min(A, MinAlpha); MaxAlpha = max(A, MaxAlpha); } MinAlpha = round(MinAlpha*255.f); MaxAlpha = round(MaxAlpha*255.f); float AlphaRange = MaxAlpha - MinAlpha; const float MidRange = 21.f;// an average range in ALPHA_DISTANCE_TABLES float Multiplier = clamp(round(AlphaRange/MidRange), 1.f, 15.f); int TableIdx = 0; float4 TableValueNeg = float4(0,0,0,0); float4 TableValuePos = float4(0,0,0,0); // iterating through all tables to find a best fit is quite slow // instead guess the best table based on alpha range const int TablesToTest[5] = {15,11,6,2,0}; for (int i = 0; i < 5; ++i) { TableIdx = TablesToTest[i]; TableValuePos = EtcParameters.ALPHA_DISTANCE_TABLES[TableIdx]; float TableRange = (TableValuePos.w*2 + 1)*Multiplier; float Dif = TableRange - AlphaRange; if (Dif > 0.f) { break; } } TableValueNeg = -(TableValuePos + float4(1,1,1,1)); TableValuePos*=Multiplier; TableValueNeg*=Multiplier; // make sure an exact value of MinAlpha can always be decoded from a BaseValue float BaseValue = clamp(round(-TableValueNeg.w + MinAlpha), 0.f, 255.f); TableValueNeg = TableValueNeg + BaseValue.xxxx; TableValuePos = TableValuePos + BaseValue.xxxx; uint2 BlockWeights = 0; for (int PixelIndex = 0; PixelIndex < 16; ++PixelIndex) { float Alpha = BlockA[PixelIndex]*255.f; int SelectedIndex = 0; float MinDif = 100000.f; if (Alpha < TableValuePos.x) { SelectAlphaMod(Alpha, TableValueNeg.x, 0, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValueNeg.y, 1, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValueNeg.z, 2, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValueNeg.w, 3, SelectedIndex, MinDif); } else { SelectAlphaMod(Alpha, TableValuePos.x, 4, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValuePos.y, 5, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValuePos.z, 6, SelectedIndex, MinDif); SelectAlphaMod(Alpha, TableValuePos.w, 7, SelectedIndex, MinDif); } // ETC uses column-major indexing for the pixels in a block... int TransposedIndex = (PixelIndex >> 2) | ((PixelIndex & 3) << 2); int StartBit = (15 - TransposedIndex) * 3; BlockWeights.x |= (StartBit < 32) ? SelectedIndex << StartBit : 0; int ShiftRight = (StartBit == 30) ? 2 : 0; int ShiftLeft = (StartBit >= 32) ? StartBit - 32 : 0; BlockWeights.y |= (StartBit >= 30) ? (SelectedIndex >> ShiftRight) << ShiftLeft : 0; } int MultiplierInt = Multiplier; int BaseValueInt = BaseValue; uint2 AlphaBits; AlphaBits.x = EtcSwapEndian32(BlockWeights.y | (TableIdx << 16) | (MultiplierInt << 20) | (BaseValueInt << 24)); AlphaBits.y = EtcSwapEndian32(BlockWeights.x); return AlphaBits; } uint4 CompressBlock_ETC2_RGBA(in float3 BlockRGB[16], in float BlockA[16]) { uint2 CompressedRGB = CompressBlock_ETC2_RGB(BlockRGB); uint2 CompressedAlpha = CompressBlock_ETC2_Alpha(BlockA); return uint4(CompressedAlpha, CompressedRGB); } uint4 CompressBlock_ETC2_SRGBA(in float3 BlockRGB[16], in float BlockA[16]) { uint2 CompressedRGB = CompressBlock_ETC2_SRGB(BlockRGB); uint2 CompressedAlpha = CompressBlock_ETC2_Alpha(BlockA); return uint4(CompressedAlpha, CompressedRGB); } uint4 CompressBlock_ETC2_YCoCg(in float3 BlockRGB[16]) { float BlockY[16]; for (int i = 0; i < 16; ++i) { float3 YCoCg = RGB2YCoCg(BlockRGB[i]); BlockRGB[i] = float3(YCoCg.yz, 0.0); BlockY[i] = YCoCg.x; } const bool bSRGB = false; const bool bCoCg = true; uint2 CompressedRGB = CompressBlock_ETC1S(BlockRGB, bSRGB, bCoCg); uint2 CompressedAlpha = CompressBlock_ETC2_Alpha(BlockY); return uint4(CompressedAlpha, CompressedRGB); } uint4 CompressBlock_ETC2_RG(in float BlockU[16], in float BlockV[16]) { uint2 R = CompressBlock_ETC2_Alpha(BlockU); uint2 G = CompressBlock_ETC2_Alpha(BlockV); return uint4(R, G); }