Files
UnrealEngine/Engine/Shaders/Private/ASTCCompressionCommon.ush
2025-05-18 13:04:45 +08:00

745 lines
23 KiB
HLSL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
ASTCompressionCommon.ush:
Helpers for compute shader ASTC texture compression
=============================================================================*/
#pragma once
#include "BlockCompressionCommon.ush"
#include "BCCompressionCommon.ush"
#include "GammaCorrectionCommon.ush"
#include "IntegerSequenceEncoding.ush"
float MaxNorm3(float3 v)
{
v = abs(v);
return max(max(v.x, v.y), v.z);
}
float MaxNorm4(float4 v)
{
v = abs(v);
return max(max(max(v.x, v.y), v.z), v.w);
}
void PCAOptimizeEndpoints3f(in float4 BlockRGBA[16], inout float4 BlockMin, inout float4 BlockMax)
{
// Compute mean of all texels (3D).
float3 Mean = 0.0f;
float m00 = 0.0f, m01 = 0.0f, m02 = 0.0f;
float m11 = 0.0f, m12 = 0.0f, m22 = 0.0f;
for (uint i = 0; i < 16; i++)
{
float3 c = BlockRGBA[i].rgb;
Mean += c;
m00 += c.x * c.x;
m01 += c.x * c.y;
m02 += c.x * c.z;
m11 += c.y * c.y;
m12 += c.y * c.z;
m22 += c.z * c.z;
}
Mean /= 16.0f;
m00 -= 16.0f * Mean.x * Mean.x;
m01 -= 16.0f * Mean.x * Mean.y;
m02 -= 16.0f * Mean.x * Mean.z;
m11 -= 16.0f * Mean.y * Mean.y;
m12 -= 16.0f * Mean.y * Mean.z;
m22 -= 16.0f * Mean.z * Mean.z;
// Build candidate vectors from the "rows" of the covariance matrix.
float3 CovCol0 = float3(m00, m01, m02);
float3 CovCol1 = float3(m01, m11, m12);
float3 CovCol2 = float3(m02, m12, m22);
// Choose the candidate with the largest norm.
float3 CurrentAxis = CovCol0;
float CurrentNorm = MaxNorm3(CovCol0);
float CandidateNorm = MaxNorm3(CovCol1);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol1;
CurrentNorm = CandidateNorm;
}
CandidateNorm = MaxNorm3(CovCol2);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol2;
CurrentNorm = CandidateNorm;
}
if (CurrentNorm != 0.0f)
{
CurrentAxis = normalize(CurrentAxis);
// Power-Method-style iteration to refine the principal axis.
for (uint iteration = 0; iteration < 3; iteration++)
{
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z);
}
}
else
{
CurrentAxis = float3(1.0f, 0.0f, 0.0f);
}
// Project each texel onto the principal axis to find minimum / maximum projections.
float MinProj = 999999.0f;
float MaxProj = -999999.0f;
for (uint i = 0; i < 16; i++)
{
float Projection = dot(BlockRGBA[i].rgb - Mean, CurrentAxis);
MinProj = min(MinProj, Projection);
MaxProj = max(MaxProj, Projection);
}
// Reconstruct endpoints from the min / max projections, clamped to [0..1].
BlockMin.rgb = saturate(Mean + CurrentAxis * MinProj);
BlockMax.rgb = saturate(Mean + CurrentAxis * MaxProj);
}
void PCAOptimizeEndpoints3f_6x6(in float4 BlockRGBA[36], inout float3 BlockMin, inout float3 BlockMax)
{
// Compute the mean and covariance of all texels (only RGB channels).
float3 Mean = 0.0f;
float m00 = 0.0f, m01 = 0.0f, m02 = 0.0f;
float m11 = 0.0f, m12 = 0.0f, m22 = 0.0f;
for (uint i = 0; i < 36; i++)
{
float3 c = BlockRGBA[i].rgb;
Mean += c;
m00 += c.x * c.x;
m01 += c.x * c.y;
m02 += c.x * c.z;
m11 += c.y * c.y;
m12 += c.y * c.z;
m22 += c.z * c.z;
}
Mean /= 36.0f;
// Compute the covariance components.
m00 -= 36.0f * Mean.x * Mean.x;
m01 -= 36.0f * Mean.x * Mean.y;
m02 -= 36.0f * Mean.x * Mean.z;
m11 -= 36.0f * Mean.y * Mean.y;
m12 -= 36.0f * Mean.y * Mean.z;
m22 -= 36.0f * Mean.z * Mean.z;
// Build the candidate axis vectors from the covariance matrix.
float3 CovCol0 = float3(m00, m01, m02);
float3 CovCol1 = float3(m01, m11, m12);
float3 CovCol2 = float3(m02, m12, m22);
// Choose the candidate with the largest norm.
float3 CurrentAxis = CovCol0;
float CurrentNorm = MaxNorm3(CovCol0);
float CandidateNorm = MaxNorm3(CovCol1);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol1;
CurrentNorm = CandidateNorm;
}
CandidateNorm = MaxNorm3(CovCol2);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol2;
CurrentNorm = CandidateNorm;
}
// Refine the principal axis using power iterations.
if (CurrentNorm != 0.0f)
{
CurrentAxis = normalize(CurrentAxis);
for (uint iteration = 0; iteration < 3; iteration++)
{
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z);
}
}
else
{
CurrentAxis = float3(1.0f, 0.0f, 0.0f);
}
// Project each texel onto the principal axis to find the minimum and maximum projections.
float MinProj = 999999.0f;
float MaxProj = -999999.0f;
for (uint i = 0; i < 36; i++)
{
float Projection = dot(BlockRGBA[i].rgb - Mean, CurrentAxis);
MinProj = min(MinProj, Projection);
MaxProj = max(MaxProj, Projection);
}
// Reconstruct the endpoints from the min and max projections, clamping results to [0,1].
BlockMin = saturate(Mean + CurrentAxis * MinProj);
BlockMax = saturate(Mean + CurrentAxis * MaxProj);
}
void PCAOptimizeEndpoints4f(in float4 BlockRGBA[16], inout float4 BlockMin, inout float4 BlockMax)
{
// Compute mean over all texels
float4 Mean = float4(0.0, 0.0, 0.0, 0.0);
float m00 = 0.0, m01 = 0.0, m02 = 0.0, m03 = 0.0;
float m11 = 0.0, m12 = 0.0, m13 = 0.0;
float m22 = 0.0, m23 = 0.0;
float m33 = 0.0;
for (uint i = 0; i < 16; i++)
{
float4 c = BlockRGBA[i];
Mean += c;
m00 += c.x * c.x;
m01 += c.x * c.y;
m02 += c.x * c.z;
m03 += c.x * c.w;
m11 += c.y * c.y;
m12 += c.y * c.z;
m13 += c.y * c.w;
m22 += c.z * c.z;
m23 += c.z * c.w;
m33 += c.w * c.w;
}
Mean /= 16.0;
m00 -= 16.0 * Mean.x * Mean.x;
m01 -= 16.0 * Mean.x * Mean.y;
m02 -= 16.0 * Mean.x * Mean.z;
m03 -= 16.0 * Mean.x * Mean.w;
m11 -= 16.0 * Mean.y * Mean.y;
m12 -= 16.0 * Mean.y * Mean.z;
m13 -= 16.0 * Mean.y * Mean.w;
m22 -= 16.0 * Mean.z * Mean.z;
m23 -= 16.0 * Mean.z * Mean.w;
m33 -= 16.0 * Mean.w * Mean.w;
// Build candidate vectors from the symmetric 4x4 covariance matrix.
float4 CovCol0 = float4(m00, m01, m02, m03);
float4 CovCol1 = float4(m01, m11, m12, m13);
float4 CovCol2 = float4(m02, m12, m22, m23);
float4 CovCol3 = float4(m03, m13, m23, m33);
// Evaluate max norms of the candidates.
float4 CurrentAxis = CovCol0;
float CurrentNorm = MaxNorm4(CovCol0);
float CandidateNorm = MaxNorm4(CovCol1);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol1;
CurrentNorm = CandidateNorm;
}
CandidateNorm = MaxNorm4(CovCol2);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol2;
CurrentNorm = CandidateNorm;
}
CandidateNorm = MaxNorm4(CovCol3);
if (CandidateNorm > CurrentNorm)
{
CurrentAxis = CovCol3;
CurrentNorm = CandidateNorm;
}
if (CurrentNorm > 0.0)
{
CurrentAxis = normalize(CurrentAxis);
// Refine with a few iterations of the power method.
for (uint iteration = 0; iteration < 5; iteration++)
{
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z + CovCol3 * CurrentAxis.w);
}
}
else
{
CurrentAxis = float4(1.0, 0.0, 0.0, 0.0);
}
// Project each texel (in 4D) onto Axis to locate endpoints.
float minProj = 1e10;
float maxProj = -1e10;
for (uint i = 0; i < 16; i++)
{
float proj = dot(BlockRGBA[i] - Mean, CurrentAxis);
minProj = min(minProj, proj);
maxProj = max(maxProj, proj);
}
BlockMin = saturate(Mean + CurrentAxis * minProj);
BlockMax = saturate(Mean + CurrentAxis * maxProj);
}
void LLSOptimizeEndpoints4f16(float4 Texels[16], uint WeightRange, inout float4 BlockMin, inout float4 BlockMax)
{
float4 BlockVector = BlockMax - BlockMin;
float EndPoint0Pos = dot(BlockMin, BlockVector);
float EndPoint1Pos = dot(BlockMax, BlockVector);
float4 TexelSum = 0.0f;
float4 BetaTexelSum = 0.0f;
float BetaSum = 0.0f;
float BetaSqSum = 0.0f;
for (uint i = 0; i < 16; ++i)
{
float Pos = dot(Texels[i], BlockVector);
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
uint Index = (uint) (NormalizedPos * WeightRange + 0.5f);
float Beta = Index / (float) WeightRange;
TexelSum += Texels[i];
BetaTexelSum += Texels[i] * Beta;
BetaSum += Beta;
BetaSqSum += Beta * Beta;
}
float4 AlphaTexelSum = TexelSum - BetaTexelSum;
float AlphaSqSum = 16.0f - 2 * BetaSum + BetaSqSum;
float AlphaBetaSum = BetaSum - BetaSqSum;
float Det = AlphaSqSum * BetaSqSum - AlphaBetaSum * AlphaBetaSum;
if (abs(Det) > 0.1f)
{
float RcpDet = 1.0f / Det;
BlockMin = saturate((AlphaTexelSum * BetaSqSum - BetaTexelSum * AlphaBetaSum) * RcpDet);
BlockMax = saturate((BetaTexelSum * AlphaSqSum - AlphaTexelSum * AlphaBetaSum) * RcpDet);
}
}
void LLSOptimizeEndpoints3f16(float4 Texels[16], uint WeightRange, inout float3 BlockMin, inout float3 BlockMax)
{
float3 BlockVector = BlockMax - BlockMin;
float EndPoint0Pos = dot(BlockMin, BlockVector);
float EndPoint1Pos = dot(BlockMax, BlockVector);
float3 TexelSum = float3(0.0f, 0.0f, 0.0f);
float3 BetaTexelSum = float3(0.0f, 0.0f, 0.0f);
float BetaSum = 0.0f;
float BetaSqSum = 0.0f;
for (uint i = 0; i < 16; ++i)
{
float Pos = dot(Texels[i].rgb, BlockVector);
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
uint Index = (uint)(NormalizedPos * WeightRange + 0.5f);
float Beta = Index / (float)WeightRange;
TexelSum += Texels[i].rgb;
BetaTexelSum += Texels[i].rgb * Beta;
BetaSum += Beta;
BetaSqSum += Beta * Beta;
}
float3 AlphaTexelSum = TexelSum - BetaTexelSum;
float AlphaSqSum = 16.0f - 2.0f * BetaSum + BetaSqSum;
float AlphaBetaSum = BetaSum - BetaSqSum;
float Det = AlphaSqSum * BetaSqSum - AlphaBetaSum * AlphaBetaSum;
if (abs(Det) > 0.1f)
{
float RcpDet = 1.0f / Det;
BlockMin = saturate((AlphaTexelSum * BetaSqSum - BetaTexelSum * AlphaBetaSum) * RcpDet);
BlockMax = saturate((BetaTexelSum * AlphaSqSum - AlphaTexelSum * AlphaBetaSum) * RcpDet);
}
}
// https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html See 23.10 Block Mode for more details
uint BuildBlockMode(uint WeightQuantMethod, uint BlockSize)
{
uint H = (BlockSize - 2) & 0x3;
uint W = (BlockSize - 4) & 0x3;
uint DualPlane = 0;
uint Precision = (WeightQuantMethod < 6) ? 0 : 1;
uint Range = (WeightQuantMethod % 6) + 2;
uint BlockMode = (Range >> 1) & 0x3;
BlockMode |= (Range & 0x1) << 4;
BlockMode |= H << 5;
BlockMode |= W << 7;
BlockMode |= Precision << 9;
BlockMode |= DualPlane << 10;
return BlockMode;
}
void EncodeWeights3f(in float4 BlockRGBA[16], uint WeightRange, uint WeightQuantMethod, float4 BlockMin, float4 BlockMax, inout uint4 BlockBuffer)
{
float3 BlockVector = BlockMax.rgb - BlockMin.rgb;
float EndPoint0Pos = dot(BlockMin.rgb, BlockVector);
float EndPoint1Pos = dot(BlockMax.rgb, BlockVector);
uint WeightedBlocks[16];
// Quantize the weights using the endpoings and the Weight Range
for (uint ColorIndex = 0; ColorIndex < 16; ++ColorIndex)
{
float Pos = dot(BlockRGBA[ColorIndex].rgb, BlockVector);
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
}
uint4 QuantWeights = uint4(0, 0, 0, 0);
uint QuantOffset = 0;
EncodeWeightsQuant16(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
BlockBuffer.w = reversebits(QuantWeights.x);
BlockBuffer.z = reversebits(QuantWeights.y);
BlockBuffer.y = reversebits(QuantWeights.z);
}
void EncodeWeights4f(in float4 BlockRGBA[16], uint WeightRange, uint WeightQuantMethod, float4 BlockMin, float4 BlockMax, inout uint4 BlockBuffer)
{
float4 BlockVector = BlockMax - BlockMin;
float EndPoint0Pos = dot(BlockMin, BlockVector);
float EndPoint1Pos = dot(BlockMax, BlockVector);
uint WeightedBlocks[16];
// Quantize the weights using the endpoings and the Weight Range
for (uint ColorIndex = 0; ColorIndex < 16; ++ColorIndex)
{
float Pos = dot(BlockRGBA[ColorIndex], BlockVector);
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
}
uint4 QuantWeights = uint4(0, 0, 0, 0);
uint QuantOffset = 0;
EncodeWeightsQuant16(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
BlockBuffer.w = reversebits(QuantWeights.x);
BlockBuffer.z = reversebits(QuantWeights.y);
BlockBuffer.y = reversebits(QuantWeights.z);
}
uint4 Compress_ASTC_RGBA_4x4(in float4 BlockRGBA[16])
{
// Compute initial endpoints
float4 BlockMin = BlockRGBA[0];
float4 BlockMax = BlockRGBA[0];
for (uint TexelIndex = 1; TexelIndex < 16; ++TexelIndex)
{
BlockMin = min(BlockMin, BlockRGBA[TexelIndex]);
BlockMax = max(BlockMax, BlockRGBA[TexelIndex]);
}
uint BlockSize = 4;
// Note: This is just a way to encode a void-extent like block - doesn't actually encode void-extent.
if (all(BlockMin == BlockMax))
{
uint ColorEndpointMode = 12; // LDR RGBA, direct
uint WeightQuantMethod = QUANT_4;
uint EndpointQuantMethod = QUANT_256;
uint WeightRange = 4u - 1u;
uint EndpointRange = 256u - 1u;
uint PartitionCount = 1;
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
BlockMode |= (PartitionCount - 1) << 11;
BlockMode |= ColorEndpointMode << 13;
uint4 BlockBuffer = uint4(BlockMode, 0, 0, 0);
uint BlockOffset = 11 + 2 + 4;
uint4 QuantizeEndpoint = uint4(BlockMin * EndpointRange + 0.5f);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.x, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.y, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.z, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.w, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
return BlockBuffer;
}
// Calculate the endpoint bit range
bool NoAlpha = BlockMin.a == 1.0f;
uint EndpointQuantMethod = QUANT_256;
uint EndpointRange = 256u - 1u;
uint4 BlockBuffer = uint4(0, 0, 0, 0);
uint BlockOffset = 11 + 2 + 4; // BlockMode + (PartitionCount-1) + CEM
if (NoAlpha)
{
uint ColorEndpointMode = 8; // LDR RGB, direct
uint WeightQuantMethod = QUANT_12;
uint WeightRange = 12u - 1u;
uint PartitionCount = 1;
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
BlockMode |= (PartitionCount - 1) << 11;
BlockMode |= ColorEndpointMode << 13;
BlockBuffer.x = BlockMode;
#ifdef ASTC_HIGH_PROFILE
PCAOptimizeEndpoints3f(BlockRGBA, BlockMin, BlockMax);
#endif
//LLSOptimizeEndpoints3f16(BlockRGBA, WeightRange, BlockMin, BlockMax);
// Inset the min/max
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
{
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax.rgb - BlockMin.rgb);
BlockMax.rgb = ceil((BlockMax.rgb - offset) * 255.0f) / 255.0f;
BlockMin.rgb = floor((BlockMin.rgb + offset) * 255.0f) / 255.0f;
}
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
if (SumMin > SumMax)
{
float4 tmp = BlockMin;
BlockMin = BlockMax;
BlockMax = tmp;
}
EncodeWeights3f(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
uint4 QuantizeEndpoint0 = uint4(BlockMin * EndpointRange + 0.5f);
uint4 QuantizeEndpoint1 = uint4(BlockMax * EndpointRange + 0.5f);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.x, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.x, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.y, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.y, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.z, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.z, 8);
}
else
{
uint ColorEndpointMode = 12; // LDR RGBA, direct
uint WeightQuantMethod = QUANT_6;
uint WeightRange = 6u - 1u;
uint PartitionCount = 1;
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
BlockMode |= (PartitionCount - 1) << 11;
BlockMode |= ColorEndpointMode << 13;
BlockBuffer.x = BlockMode;
#ifdef ASTC_HIGH_PROFILE
PCAOptimizeEndpoints4f(BlockRGBA, BlockMin, BlockMax);
#endif
//LLSOptimizeEndpoints4f16(BlockRGBA, WeightRange, BlockMin, BlockMax);
// Inset the min/max
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
{
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax.rgb - BlockMin.rgb);
BlockMax.rgb = ceil((BlockMax.rgb - offset) * 255.0f) / 255.0f;
BlockMin.rgb = floor((BlockMin.rgb + offset) * 255.0f) / 255.0f;
}
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
if (SumMin > SumMax)
{
float4 tmp = BlockMin;
BlockMin = BlockMax;
BlockMax = tmp;
}
EncodeWeights4f(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
uint4 QuantizeEndpoint0 = uint4(BlockMin * EndpointRange + 0.5f);
uint4 QuantizeEndpoint1 = uint4(BlockMax * EndpointRange + 0.5f);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.x, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.x, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.y, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.y, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.z, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.z, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.w, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.w, 8);
}
return BlockBuffer;
}
uint BuildBlockMode2(uint WeightQuantMethod, uint BlockX, uint BlockY)
{
uint H = (BlockX - 6) & 0x3;
uint W = (BlockY - 6) & 0x3;
uint Range = (WeightQuantMethod % 6) + 2;
uint BlockMode = 0x100;
BlockMode |= (Range & 0x1) << 4;
BlockMode |= ((Range >> 1) & 0x3) << 2;
BlockMode |= H << 9;
BlockMode |= W << 5;
return BlockMode;
}
void EncodeWeights3f_6x6(in float4 BlockRGBA[36], uint WeightRange, uint WeightQuantMethod, float3 BlockMin, float3 BlockMax, inout uint4 BlockBuffer)
{
float3 BlockVector = BlockMax - BlockMin;
float EndPoint0Pos = dot(BlockMin, BlockVector);
float EndPoint1Pos = dot(BlockMax, BlockVector);
uint WeightedBlocks[36];
// Quantize the weights using the endpoints and the Weight Range
for (uint ColorIndex = 0; ColorIndex < 36; ++ColorIndex)
{
float Pos = dot(BlockRGBA[ColorIndex].rgb, BlockVector);
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
}
uint4 QuantWeights = uint4(0, 0, 0, 0);
uint QuantOffset = 0;
EncodeWeightsQuant36(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
BlockBuffer.w |= reversebits(QuantWeights.x);
BlockBuffer.z |= reversebits(QuantWeights.y);
BlockBuffer.y |= reversebits(QuantWeights.z);
}
uint4 Compress_ASTC_RGB_6x6(in float4 BlockRGBA[36])
{
// Compute initial endpoints
float3 BlockMin = BlockRGBA[0].rgb;
float3 BlockMax = BlockRGBA[0].rgb;
for (uint TexelIndex = 1; TexelIndex < 36; ++TexelIndex)
{
BlockMin = min(BlockMin, BlockRGBA[TexelIndex].rgb);
BlockMax = max(BlockMax, BlockRGBA[TexelIndex].rgb);
}
// Note: This is just a way to encode a void-extent like block - doesn't actually encode void-extent.
if (all(BlockMin == BlockMax))
{
uint ColorEndpointMode = 8; // LDR RGBA, direct
uint WeightQuantMethod = QUANT_4;
uint EndpointQuantMethod = QUANT_256;
uint WeightRange = 4u - 1u;
uint EndpointRange = 256u - 1u;
uint PartitionCount = 1;
uint BlockMode = BuildBlockMode2(WeightQuantMethod, 4, 4);
BlockMode |= (PartitionCount - 1) << 11;
BlockMode |= ColorEndpointMode << 13;
uint4 BlockBuffer = uint4(BlockMode, 0, 0, 0);
uint BlockOffset = 11 + 2 + 4;
uint3 QuantizeEndpoint = uint3(BlockMin * EndpointRange + 0.5f);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.x, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.y, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.z, 8);
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
return BlockBuffer;
}
uint4 BlockBuffer = uint4(0, 0, 0, 0);
uint BlockOffset = 11 + 2 + 4;
uint EndpointQuantMethod = QUANT_80;
uint EndpointRange = 80u - 1u;
uint ColorEndpointMode = 8; // LDR RGB, direct
uint WeightQuantMethod = QUANT_4;
uint WeightRange = 4u - 1u;
uint PartitionCount = 1;
uint BlockMode = BuildBlockMode2(WeightQuantMethod, 6, 6);
BlockMode |= (PartitionCount - 1) << 11;
BlockMode |= ColorEndpointMode << 13;
BlockBuffer.x = BlockMode;
#ifdef ASTC_HIGH_PROFILE
PCAOptimizeEndpoints3f_6x6(BlockRGBA, BlockMin, BlockMax);
#endif
// Inset the min/max
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
{
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax - BlockMin);
BlockMax = ceil((BlockMax - offset) * (float) EndpointRange) / (float) EndpointRange;
BlockMin = floor((BlockMin + offset) * (float) EndpointRange) / (float) EndpointRange;
}
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
if (SumMin > SumMax)
{
float3 tmp = BlockMin;
BlockMin = BlockMax;
BlockMax = tmp;
}
EncodeWeights3f_6x6(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
uint3 QuantizeEndpoint0 = uint3(BlockMin * 255.0f + 0.5f);
uint3 QuantizeEndpoint1 = uint3(BlockMax * 255.0f + 0.5f);
uint EndpointNumbers[6] =
{
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.x], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.x],
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.y], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.y],
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.z], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.z]
};
EncodeEndpointsQuant6(EndpointQuantMethod, EndpointNumbers, BlockBuffer, BlockOffset);
return BlockBuffer;
}
uint4 CompressBlock_ASTC_RGBA(in float3 BlockRGB[16], in float BlockA[16])
{
float4 B[16];
[unroll]
for (int i = 0; i < 16; ++i)
{
B[i] = float4(BlockRGB[i], BlockA[i]);
}
return Compress_ASTC_RGBA_4x4(B);
}
uint4 CompressBlock_ASTC_SRGBA(in float3 BlockRGB[16], in float BlockA[16])
{
float4 B[16];
[unroll]
for (int i = 0; i < 16; ++i)
{
B[i] = float4(LinearToSrgb(BlockRGB[i]), BlockA[i]);
}
return Compress_ASTC_RGBA_4x4(B);
}
uint4 CompressBlock_ASTC_YCoCg(in float3 BlockRGB[16])
{
float4 B[16];
[unroll]
for (int i = 0; i < 16; ++i)
{
float3 YCoCg = RGB2YCoCg(BlockRGB[i]);
B[i] = float4(YCoCg.yz, 0.0, YCoCg.x);
}
return Compress_ASTC_RGBA_4x4(B);
}