745 lines
23 KiB
HLSL
745 lines
23 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
||
|
||
/*=============================================================================
|
||
ASTCompressionCommon.ush:
|
||
Helpers for compute shader ASTC texture compression
|
||
=============================================================================*/
|
||
|
||
#pragma once
|
||
|
||
#include "BlockCompressionCommon.ush"
|
||
#include "BCCompressionCommon.ush"
|
||
#include "GammaCorrectionCommon.ush"
|
||
#include "IntegerSequenceEncoding.ush"
|
||
|
||
|
||
float MaxNorm3(float3 v)
|
||
{
|
||
v = abs(v);
|
||
return max(max(v.x, v.y), v.z);
|
||
}
|
||
|
||
float MaxNorm4(float4 v)
|
||
{
|
||
v = abs(v);
|
||
return max(max(max(v.x, v.y), v.z), v.w);
|
||
}
|
||
|
||
void PCAOptimizeEndpoints3f(in float4 BlockRGBA[16], inout float4 BlockMin, inout float4 BlockMax)
|
||
{
|
||
// Compute mean of all texels (3D).
|
||
float3 Mean = 0.0f;
|
||
float m00 = 0.0f, m01 = 0.0f, m02 = 0.0f;
|
||
float m11 = 0.0f, m12 = 0.0f, m22 = 0.0f;
|
||
for (uint i = 0; i < 16; i++)
|
||
{
|
||
float3 c = BlockRGBA[i].rgb;
|
||
Mean += c;
|
||
m00 += c.x * c.x;
|
||
m01 += c.x * c.y;
|
||
m02 += c.x * c.z;
|
||
m11 += c.y * c.y;
|
||
m12 += c.y * c.z;
|
||
m22 += c.z * c.z;
|
||
}
|
||
Mean /= 16.0f;
|
||
|
||
m00 -= 16.0f * Mean.x * Mean.x;
|
||
m01 -= 16.0f * Mean.x * Mean.y;
|
||
m02 -= 16.0f * Mean.x * Mean.z;
|
||
m11 -= 16.0f * Mean.y * Mean.y;
|
||
m12 -= 16.0f * Mean.y * Mean.z;
|
||
m22 -= 16.0f * Mean.z * Mean.z;
|
||
|
||
// Build candidate vectors from the "rows" of the covariance matrix.
|
||
float3 CovCol0 = float3(m00, m01, m02);
|
||
float3 CovCol1 = float3(m01, m11, m12);
|
||
float3 CovCol2 = float3(m02, m12, m22);
|
||
|
||
// Choose the candidate with the largest norm.
|
||
float3 CurrentAxis = CovCol0;
|
||
float CurrentNorm = MaxNorm3(CovCol0);
|
||
|
||
float CandidateNorm = MaxNorm3(CovCol1);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol1;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
CandidateNorm = MaxNorm3(CovCol2);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol2;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
|
||
if (CurrentNorm != 0.0f)
|
||
{
|
||
CurrentAxis = normalize(CurrentAxis);
|
||
|
||
// Power-Method-style iteration to refine the principal axis.
|
||
for (uint iteration = 0; iteration < 3; iteration++)
|
||
{
|
||
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
CurrentAxis = float3(1.0f, 0.0f, 0.0f);
|
||
}
|
||
|
||
// Project each texel onto the principal axis to find minimum / maximum projections.
|
||
float MinProj = 999999.0f;
|
||
float MaxProj = -999999.0f;
|
||
for (uint i = 0; i < 16; i++)
|
||
{
|
||
float Projection = dot(BlockRGBA[i].rgb - Mean, CurrentAxis);
|
||
MinProj = min(MinProj, Projection);
|
||
MaxProj = max(MaxProj, Projection);
|
||
}
|
||
|
||
// Reconstruct endpoints from the min / max projections, clamped to [0..1].
|
||
BlockMin.rgb = saturate(Mean + CurrentAxis * MinProj);
|
||
BlockMax.rgb = saturate(Mean + CurrentAxis * MaxProj);
|
||
}
|
||
|
||
void PCAOptimizeEndpoints3f_6x6(in float4 BlockRGBA[36], inout float3 BlockMin, inout float3 BlockMax)
|
||
{
|
||
// Compute the mean and covariance of all texels (only RGB channels).
|
||
float3 Mean = 0.0f;
|
||
float m00 = 0.0f, m01 = 0.0f, m02 = 0.0f;
|
||
float m11 = 0.0f, m12 = 0.0f, m22 = 0.0f;
|
||
for (uint i = 0; i < 36; i++)
|
||
{
|
||
float3 c = BlockRGBA[i].rgb;
|
||
Mean += c;
|
||
m00 += c.x * c.x;
|
||
m01 += c.x * c.y;
|
||
m02 += c.x * c.z;
|
||
m11 += c.y * c.y;
|
||
m12 += c.y * c.z;
|
||
m22 += c.z * c.z;
|
||
}
|
||
Mean /= 36.0f;
|
||
|
||
// Compute the covariance components.
|
||
m00 -= 36.0f * Mean.x * Mean.x;
|
||
m01 -= 36.0f * Mean.x * Mean.y;
|
||
m02 -= 36.0f * Mean.x * Mean.z;
|
||
m11 -= 36.0f * Mean.y * Mean.y;
|
||
m12 -= 36.0f * Mean.y * Mean.z;
|
||
m22 -= 36.0f * Mean.z * Mean.z;
|
||
|
||
// Build the candidate axis vectors from the covariance matrix.
|
||
float3 CovCol0 = float3(m00, m01, m02);
|
||
float3 CovCol1 = float3(m01, m11, m12);
|
||
float3 CovCol2 = float3(m02, m12, m22);
|
||
|
||
// Choose the candidate with the largest norm.
|
||
float3 CurrentAxis = CovCol0;
|
||
float CurrentNorm = MaxNorm3(CovCol0);
|
||
float CandidateNorm = MaxNorm3(CovCol1);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol1;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
CandidateNorm = MaxNorm3(CovCol2);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol2;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
|
||
// Refine the principal axis using power iterations.
|
||
if (CurrentNorm != 0.0f)
|
||
{
|
||
CurrentAxis = normalize(CurrentAxis);
|
||
for (uint iteration = 0; iteration < 3; iteration++)
|
||
{
|
||
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
CurrentAxis = float3(1.0f, 0.0f, 0.0f);
|
||
}
|
||
|
||
// Project each texel onto the principal axis to find the minimum and maximum projections.
|
||
float MinProj = 999999.0f;
|
||
float MaxProj = -999999.0f;
|
||
for (uint i = 0; i < 36; i++)
|
||
{
|
||
float Projection = dot(BlockRGBA[i].rgb - Mean, CurrentAxis);
|
||
MinProj = min(MinProj, Projection);
|
||
MaxProj = max(MaxProj, Projection);
|
||
}
|
||
|
||
// Reconstruct the endpoints from the min and max projections, clamping results to [0,1].
|
||
BlockMin = saturate(Mean + CurrentAxis * MinProj);
|
||
BlockMax = saturate(Mean + CurrentAxis * MaxProj);
|
||
}
|
||
|
||
void PCAOptimizeEndpoints4f(in float4 BlockRGBA[16], inout float4 BlockMin, inout float4 BlockMax)
|
||
{
|
||
// Compute mean over all texels
|
||
float4 Mean = float4(0.0, 0.0, 0.0, 0.0);
|
||
float m00 = 0.0, m01 = 0.0, m02 = 0.0, m03 = 0.0;
|
||
float m11 = 0.0, m12 = 0.0, m13 = 0.0;
|
||
float m22 = 0.0, m23 = 0.0;
|
||
float m33 = 0.0;
|
||
for (uint i = 0; i < 16; i++)
|
||
{
|
||
float4 c = BlockRGBA[i];
|
||
Mean += c;
|
||
m00 += c.x * c.x;
|
||
m01 += c.x * c.y;
|
||
m02 += c.x * c.z;
|
||
m03 += c.x * c.w;
|
||
m11 += c.y * c.y;
|
||
m12 += c.y * c.z;
|
||
m13 += c.y * c.w;
|
||
m22 += c.z * c.z;
|
||
m23 += c.z * c.w;
|
||
m33 += c.w * c.w;
|
||
}
|
||
Mean /= 16.0;
|
||
|
||
m00 -= 16.0 * Mean.x * Mean.x;
|
||
m01 -= 16.0 * Mean.x * Mean.y;
|
||
m02 -= 16.0 * Mean.x * Mean.z;
|
||
m03 -= 16.0 * Mean.x * Mean.w;
|
||
m11 -= 16.0 * Mean.y * Mean.y;
|
||
m12 -= 16.0 * Mean.y * Mean.z;
|
||
m13 -= 16.0 * Mean.y * Mean.w;
|
||
m22 -= 16.0 * Mean.z * Mean.z;
|
||
m23 -= 16.0 * Mean.z * Mean.w;
|
||
m33 -= 16.0 * Mean.w * Mean.w;
|
||
|
||
// Build candidate vectors from the symmetric 4x4 covariance matrix.
|
||
float4 CovCol0 = float4(m00, m01, m02, m03);
|
||
float4 CovCol1 = float4(m01, m11, m12, m13);
|
||
float4 CovCol2 = float4(m02, m12, m22, m23);
|
||
float4 CovCol3 = float4(m03, m13, m23, m33);
|
||
|
||
// Evaluate max norms of the candidates.
|
||
float4 CurrentAxis = CovCol0;
|
||
float CurrentNorm = MaxNorm4(CovCol0);
|
||
|
||
float CandidateNorm = MaxNorm4(CovCol1);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol1;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
CandidateNorm = MaxNorm4(CovCol2);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol2;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
CandidateNorm = MaxNorm4(CovCol3);
|
||
if (CandidateNorm > CurrentNorm)
|
||
{
|
||
CurrentAxis = CovCol3;
|
||
CurrentNorm = CandidateNorm;
|
||
}
|
||
if (CurrentNorm > 0.0)
|
||
{
|
||
CurrentAxis = normalize(CurrentAxis);
|
||
// Refine with a few iterations of the power method.
|
||
for (uint iteration = 0; iteration < 5; iteration++)
|
||
{
|
||
CurrentAxis = normalize(CovCol0 * CurrentAxis.x + CovCol1 * CurrentAxis.y + CovCol2 * CurrentAxis.z + CovCol3 * CurrentAxis.w);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
CurrentAxis = float4(1.0, 0.0, 0.0, 0.0);
|
||
}
|
||
|
||
// Project each texel (in 4D) onto Axis to locate end‐points.
|
||
float minProj = 1e10;
|
||
float maxProj = -1e10;
|
||
for (uint i = 0; i < 16; i++)
|
||
{
|
||
float proj = dot(BlockRGBA[i] - Mean, CurrentAxis);
|
||
minProj = min(minProj, proj);
|
||
maxProj = max(maxProj, proj);
|
||
}
|
||
|
||
BlockMin = saturate(Mean + CurrentAxis * minProj);
|
||
BlockMax = saturate(Mean + CurrentAxis * maxProj);
|
||
}
|
||
|
||
void LLSOptimizeEndpoints4f16(float4 Texels[16], uint WeightRange, inout float4 BlockMin, inout float4 BlockMax)
|
||
{
|
||
float4 BlockVector = BlockMax - BlockMin;
|
||
float EndPoint0Pos = dot(BlockMin, BlockVector);
|
||
float EndPoint1Pos = dot(BlockMax, BlockVector);
|
||
|
||
float4 TexelSum = 0.0f;
|
||
float4 BetaTexelSum = 0.0f;
|
||
float BetaSum = 0.0f;
|
||
float BetaSqSum = 0.0f;
|
||
|
||
for (uint i = 0; i < 16; ++i)
|
||
{
|
||
float Pos = dot(Texels[i], BlockVector);
|
||
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
|
||
uint Index = (uint) (NormalizedPos * WeightRange + 0.5f);
|
||
float Beta = Index / (float) WeightRange;
|
||
|
||
TexelSum += Texels[i];
|
||
BetaTexelSum += Texels[i] * Beta;
|
||
BetaSum += Beta;
|
||
BetaSqSum += Beta * Beta;
|
||
}
|
||
|
||
float4 AlphaTexelSum = TexelSum - BetaTexelSum;
|
||
float AlphaSqSum = 16.0f - 2 * BetaSum + BetaSqSum;
|
||
float AlphaBetaSum = BetaSum - BetaSqSum;
|
||
|
||
float Det = AlphaSqSum * BetaSqSum - AlphaBetaSum * AlphaBetaSum;
|
||
if (abs(Det) > 0.1f)
|
||
{
|
||
float RcpDet = 1.0f / Det;
|
||
BlockMin = saturate((AlphaTexelSum * BetaSqSum - BetaTexelSum * AlphaBetaSum) * RcpDet);
|
||
BlockMax = saturate((BetaTexelSum * AlphaSqSum - AlphaTexelSum * AlphaBetaSum) * RcpDet);
|
||
}
|
||
}
|
||
|
||
void LLSOptimizeEndpoints3f16(float4 Texels[16], uint WeightRange, inout float3 BlockMin, inout float3 BlockMax)
|
||
{
|
||
float3 BlockVector = BlockMax - BlockMin;
|
||
float EndPoint0Pos = dot(BlockMin, BlockVector);
|
||
float EndPoint1Pos = dot(BlockMax, BlockVector);
|
||
|
||
float3 TexelSum = float3(0.0f, 0.0f, 0.0f);
|
||
float3 BetaTexelSum = float3(0.0f, 0.0f, 0.0f);
|
||
float BetaSum = 0.0f;
|
||
float BetaSqSum = 0.0f;
|
||
|
||
for (uint i = 0; i < 16; ++i)
|
||
{
|
||
float Pos = dot(Texels[i].rgb, BlockVector);
|
||
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
|
||
uint Index = (uint)(NormalizedPos * WeightRange + 0.5f);
|
||
float Beta = Index / (float)WeightRange;
|
||
|
||
TexelSum += Texels[i].rgb;
|
||
BetaTexelSum += Texels[i].rgb * Beta;
|
||
BetaSum += Beta;
|
||
BetaSqSum += Beta * Beta;
|
||
}
|
||
|
||
float3 AlphaTexelSum = TexelSum - BetaTexelSum;
|
||
float AlphaSqSum = 16.0f - 2.0f * BetaSum + BetaSqSum;
|
||
float AlphaBetaSum = BetaSum - BetaSqSum;
|
||
|
||
float Det = AlphaSqSum * BetaSqSum - AlphaBetaSum * AlphaBetaSum;
|
||
if (abs(Det) > 0.1f)
|
||
{
|
||
float RcpDet = 1.0f / Det;
|
||
BlockMin = saturate((AlphaTexelSum * BetaSqSum - BetaTexelSum * AlphaBetaSum) * RcpDet);
|
||
BlockMax = saturate((BetaTexelSum * AlphaSqSum - AlphaTexelSum * AlphaBetaSum) * RcpDet);
|
||
}
|
||
}
|
||
|
||
// https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html See 23.10 Block Mode for more details
|
||
uint BuildBlockMode(uint WeightQuantMethod, uint BlockSize)
|
||
{
|
||
uint H = (BlockSize - 2) & 0x3;
|
||
uint W = (BlockSize - 4) & 0x3;
|
||
|
||
uint DualPlane = 0;
|
||
uint Precision = (WeightQuantMethod < 6) ? 0 : 1;
|
||
uint Range = (WeightQuantMethod % 6) + 2;
|
||
|
||
uint BlockMode = (Range >> 1) & 0x3;
|
||
BlockMode |= (Range & 0x1) << 4;
|
||
BlockMode |= H << 5;
|
||
BlockMode |= W << 7;
|
||
BlockMode |= Precision << 9;
|
||
BlockMode |= DualPlane << 10;
|
||
return BlockMode;
|
||
}
|
||
|
||
void EncodeWeights3f(in float4 BlockRGBA[16], uint WeightRange, uint WeightQuantMethod, float4 BlockMin, float4 BlockMax, inout uint4 BlockBuffer)
|
||
{
|
||
float3 BlockVector = BlockMax.rgb - BlockMin.rgb;
|
||
|
||
float EndPoint0Pos = dot(BlockMin.rgb, BlockVector);
|
||
float EndPoint1Pos = dot(BlockMax.rgb, BlockVector);
|
||
|
||
uint WeightedBlocks[16];
|
||
// Quantize the weights using the endpoings and the Weight Range
|
||
for (uint ColorIndex = 0; ColorIndex < 16; ++ColorIndex)
|
||
{
|
||
float Pos = dot(BlockRGBA[ColorIndex].rgb, BlockVector);
|
||
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
|
||
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
|
||
|
||
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
|
||
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
|
||
}
|
||
|
||
uint4 QuantWeights = uint4(0, 0, 0, 0);
|
||
uint QuantOffset = 0;
|
||
EncodeWeightsQuant16(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
|
||
|
||
BlockBuffer.w = reversebits(QuantWeights.x);
|
||
BlockBuffer.z = reversebits(QuantWeights.y);
|
||
BlockBuffer.y = reversebits(QuantWeights.z);
|
||
}
|
||
|
||
void EncodeWeights4f(in float4 BlockRGBA[16], uint WeightRange, uint WeightQuantMethod, float4 BlockMin, float4 BlockMax, inout uint4 BlockBuffer)
|
||
{
|
||
float4 BlockVector = BlockMax - BlockMin;
|
||
|
||
float EndPoint0Pos = dot(BlockMin, BlockVector);
|
||
float EndPoint1Pos = dot(BlockMax, BlockVector);
|
||
|
||
uint WeightedBlocks[16];
|
||
// Quantize the weights using the endpoings and the Weight Range
|
||
for (uint ColorIndex = 0; ColorIndex < 16; ++ColorIndex)
|
||
{
|
||
float Pos = dot(BlockRGBA[ColorIndex], BlockVector);
|
||
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
|
||
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
|
||
|
||
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
|
||
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
|
||
}
|
||
|
||
uint4 QuantWeights = uint4(0, 0, 0, 0);
|
||
uint QuantOffset = 0;
|
||
EncodeWeightsQuant16(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
|
||
|
||
BlockBuffer.w = reversebits(QuantWeights.x);
|
||
BlockBuffer.z = reversebits(QuantWeights.y);
|
||
BlockBuffer.y = reversebits(QuantWeights.z);
|
||
}
|
||
|
||
uint4 Compress_ASTC_RGBA_4x4(in float4 BlockRGBA[16])
|
||
{
|
||
// Compute initial endpoints
|
||
float4 BlockMin = BlockRGBA[0];
|
||
float4 BlockMax = BlockRGBA[0];
|
||
for (uint TexelIndex = 1; TexelIndex < 16; ++TexelIndex)
|
||
{
|
||
BlockMin = min(BlockMin, BlockRGBA[TexelIndex]);
|
||
BlockMax = max(BlockMax, BlockRGBA[TexelIndex]);
|
||
}
|
||
|
||
uint BlockSize = 4;
|
||
|
||
// Note: This is just a way to encode a void-extent like block - doesn't actually encode void-extent.
|
||
if (all(BlockMin == BlockMax))
|
||
{
|
||
uint ColorEndpointMode = 12; // LDR RGBA, direct
|
||
uint WeightQuantMethod = QUANT_4;
|
||
uint EndpointQuantMethod = QUANT_256;
|
||
uint WeightRange = 4u - 1u;
|
||
uint EndpointRange = 256u - 1u;
|
||
|
||
uint PartitionCount = 1;
|
||
|
||
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
|
||
BlockMode |= (PartitionCount - 1) << 11;
|
||
BlockMode |= ColorEndpointMode << 13;
|
||
|
||
uint4 BlockBuffer = uint4(BlockMode, 0, 0, 0);
|
||
uint BlockOffset = 11 + 2 + 4;
|
||
|
||
uint4 QuantizeEndpoint = uint4(BlockMin * EndpointRange + 0.5f);
|
||
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.z, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.w, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
return BlockBuffer;
|
||
}
|
||
|
||
// Calculate the endpoint bit range
|
||
bool NoAlpha = BlockMin.a == 1.0f;
|
||
|
||
uint EndpointQuantMethod = QUANT_256;
|
||
uint EndpointRange = 256u - 1u;
|
||
|
||
uint4 BlockBuffer = uint4(0, 0, 0, 0);
|
||
uint BlockOffset = 11 + 2 + 4; // BlockMode + (PartitionCount-1) + CEM
|
||
|
||
if (NoAlpha)
|
||
{
|
||
uint ColorEndpointMode = 8; // LDR RGB, direct
|
||
uint WeightQuantMethod = QUANT_12;
|
||
uint WeightRange = 12u - 1u;
|
||
uint PartitionCount = 1;
|
||
|
||
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
|
||
BlockMode |= (PartitionCount - 1) << 11;
|
||
BlockMode |= ColorEndpointMode << 13;
|
||
BlockBuffer.x = BlockMode;
|
||
|
||
#ifdef ASTC_HIGH_PROFILE
|
||
PCAOptimizeEndpoints3f(BlockRGBA, BlockMin, BlockMax);
|
||
#endif
|
||
//LLSOptimizeEndpoints3f16(BlockRGBA, WeightRange, BlockMin, BlockMax);
|
||
|
||
// Inset the min/max
|
||
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
|
||
{
|
||
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax.rgb - BlockMin.rgb);
|
||
BlockMax.rgb = ceil((BlockMax.rgb - offset) * 255.0f) / 255.0f;
|
||
BlockMin.rgb = floor((BlockMin.rgb + offset) * 255.0f) / 255.0f;
|
||
}
|
||
|
||
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
|
||
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
|
||
if (SumMin > SumMax)
|
||
{
|
||
float4 tmp = BlockMin;
|
||
BlockMin = BlockMax;
|
||
BlockMax = tmp;
|
||
}
|
||
|
||
EncodeWeights3f(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
|
||
|
||
uint4 QuantizeEndpoint0 = uint4(BlockMin * EndpointRange + 0.5f);
|
||
uint4 QuantizeEndpoint1 = uint4(BlockMax * EndpointRange + 0.5f);
|
||
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.z, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.z, 8);
|
||
}
|
||
else
|
||
{
|
||
uint ColorEndpointMode = 12; // LDR RGBA, direct
|
||
uint WeightQuantMethod = QUANT_6;
|
||
uint WeightRange = 6u - 1u;
|
||
uint PartitionCount = 1;
|
||
|
||
uint BlockMode = BuildBlockMode(WeightQuantMethod, BlockSize);
|
||
BlockMode |= (PartitionCount - 1) << 11;
|
||
BlockMode |= ColorEndpointMode << 13;
|
||
BlockBuffer.x = BlockMode;
|
||
|
||
#ifdef ASTC_HIGH_PROFILE
|
||
PCAOptimizeEndpoints4f(BlockRGBA, BlockMin, BlockMax);
|
||
#endif
|
||
//LLSOptimizeEndpoints4f16(BlockRGBA, WeightRange, BlockMin, BlockMax);
|
||
|
||
// Inset the min/max
|
||
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
|
||
{
|
||
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax.rgb - BlockMin.rgb);
|
||
BlockMax.rgb = ceil((BlockMax.rgb - offset) * 255.0f) / 255.0f;
|
||
BlockMin.rgb = floor((BlockMin.rgb + offset) * 255.0f) / 255.0f;
|
||
}
|
||
|
||
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
|
||
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
|
||
if (SumMin > SumMax)
|
||
{
|
||
float4 tmp = BlockMin;
|
||
BlockMin = BlockMax;
|
||
BlockMax = tmp;
|
||
}
|
||
|
||
EncodeWeights4f(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
|
||
uint4 QuantizeEndpoint0 = uint4(BlockMin * EndpointRange + 0.5f);
|
||
uint4 QuantizeEndpoint1 = uint4(BlockMax * EndpointRange + 0.5f);
|
||
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.z, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.z, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint0.w, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint1.w, 8);
|
||
}
|
||
|
||
return BlockBuffer;
|
||
}
|
||
|
||
uint BuildBlockMode2(uint WeightQuantMethod, uint BlockX, uint BlockY)
|
||
{
|
||
uint H = (BlockX - 6) & 0x3;
|
||
uint W = (BlockY - 6) & 0x3;
|
||
|
||
uint Range = (WeightQuantMethod % 6) + 2;
|
||
uint BlockMode = 0x100;
|
||
BlockMode |= (Range & 0x1) << 4;
|
||
BlockMode |= ((Range >> 1) & 0x3) << 2;
|
||
BlockMode |= H << 9;
|
||
BlockMode |= W << 5;
|
||
return BlockMode;
|
||
}
|
||
|
||
void EncodeWeights3f_6x6(in float4 BlockRGBA[36], uint WeightRange, uint WeightQuantMethod, float3 BlockMin, float3 BlockMax, inout uint4 BlockBuffer)
|
||
{
|
||
float3 BlockVector = BlockMax - BlockMin;
|
||
|
||
float EndPoint0Pos = dot(BlockMin, BlockVector);
|
||
float EndPoint1Pos = dot(BlockMax, BlockVector);
|
||
|
||
uint WeightedBlocks[36];
|
||
// Quantize the weights using the endpoints and the Weight Range
|
||
for (uint ColorIndex = 0; ColorIndex < 36; ++ColorIndex)
|
||
{
|
||
float Pos = dot(BlockRGBA[ColorIndex].rgb, BlockVector);
|
||
float NormalizedPos = saturate((Pos - EndPoint0Pos) / (EndPoint1Pos - EndPoint0Pos));
|
||
uint QuantizedWeight = (uint) (NormalizedPos * WeightRange + 0.5f);
|
||
|
||
uint WeightIndex = WeightQuantMethod * WEIGHT_QUANTIZE_NUM + QuantizedWeight;
|
||
WeightedBlocks[ColorIndex] = AstcParameters.ScrambleTable[WeightIndex];
|
||
}
|
||
|
||
uint4 QuantWeights = uint4(0, 0, 0, 0);
|
||
uint QuantOffset = 0;
|
||
EncodeWeightsQuant36(WeightQuantMethod, WeightedBlocks, QuantWeights, QuantOffset);
|
||
|
||
BlockBuffer.w |= reversebits(QuantWeights.x);
|
||
BlockBuffer.z |= reversebits(QuantWeights.y);
|
||
BlockBuffer.y |= reversebits(QuantWeights.z);
|
||
}
|
||
|
||
uint4 Compress_ASTC_RGB_6x6(in float4 BlockRGBA[36])
|
||
{
|
||
// Compute initial endpoints
|
||
float3 BlockMin = BlockRGBA[0].rgb;
|
||
float3 BlockMax = BlockRGBA[0].rgb;
|
||
for (uint TexelIndex = 1; TexelIndex < 36; ++TexelIndex)
|
||
{
|
||
BlockMin = min(BlockMin, BlockRGBA[TexelIndex].rgb);
|
||
BlockMax = max(BlockMax, BlockRGBA[TexelIndex].rgb);
|
||
}
|
||
|
||
// Note: This is just a way to encode a void-extent like block - doesn't actually encode void-extent.
|
||
if (all(BlockMin == BlockMax))
|
||
{
|
||
uint ColorEndpointMode = 8; // LDR RGBA, direct
|
||
uint WeightQuantMethod = QUANT_4;
|
||
uint EndpointQuantMethod = QUANT_256;
|
||
uint WeightRange = 4u - 1u;
|
||
uint EndpointRange = 256u - 1u;
|
||
|
||
uint PartitionCount = 1;
|
||
|
||
uint BlockMode = BuildBlockMode2(WeightQuantMethod, 4, 4);
|
||
BlockMode |= (PartitionCount - 1) << 11;
|
||
BlockMode |= ColorEndpointMode << 13;
|
||
|
||
uint4 BlockBuffer = uint4(BlockMode, 0, 0, 0);
|
||
uint BlockOffset = 11 + 2 + 4;
|
||
|
||
uint3 QuantizeEndpoint = uint3(BlockMin * EndpointRange + 0.5f);
|
||
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.x, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.y, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, QuantizeEndpoint.z, 8);
|
||
WriteBits(BlockBuffer, BlockOffset, 0xFF, 8);
|
||
return BlockBuffer;
|
||
}
|
||
|
||
|
||
uint4 BlockBuffer = uint4(0, 0, 0, 0);
|
||
uint BlockOffset = 11 + 2 + 4;
|
||
|
||
uint EndpointQuantMethod = QUANT_80;
|
||
uint EndpointRange = 80u - 1u;
|
||
|
||
uint ColorEndpointMode = 8; // LDR RGB, direct
|
||
uint WeightQuantMethod = QUANT_4;
|
||
uint WeightRange = 4u - 1u;
|
||
uint PartitionCount = 1;
|
||
|
||
uint BlockMode = BuildBlockMode2(WeightQuantMethod, 6, 6);
|
||
BlockMode |= (PartitionCount - 1) << 11;
|
||
BlockMode |= ColorEndpointMode << 13;
|
||
BlockBuffer.x = BlockMode;
|
||
|
||
|
||
#ifdef ASTC_HIGH_PROFILE
|
||
PCAOptimizeEndpoints3f_6x6(BlockRGBA, BlockMin, BlockMax);
|
||
#endif
|
||
|
||
// Inset the min/max
|
||
// See https://developer.download.nvidia.com/whitepapers/2007/Real-Time-YCoCg-DXT-Compression/Real-Time%20YCoCg-DXT%20Compression.pdf
|
||
{
|
||
float3 offset = (1.0f / ((WeightRange + 1) * 4.f)) * (BlockMax - BlockMin);
|
||
BlockMax = ceil((BlockMax - offset) * (float) EndpointRange) / (float) EndpointRange;
|
||
BlockMin = floor((BlockMin + offset) * (float) EndpointRange) / (float) EndpointRange;
|
||
}
|
||
|
||
float SumMin = BlockMin.x + BlockMin.y + BlockMin.z;
|
||
float SumMax = BlockMax.x + BlockMax.y + BlockMax.z;
|
||
if (SumMin > SumMax)
|
||
{
|
||
float3 tmp = BlockMin;
|
||
BlockMin = BlockMax;
|
||
BlockMax = tmp;
|
||
}
|
||
|
||
EncodeWeights3f_6x6(BlockRGBA, WeightRange, WeightQuantMethod, BlockMin, BlockMax, BlockBuffer);
|
||
|
||
uint3 QuantizeEndpoint0 = uint3(BlockMin * 255.0f + 0.5f);
|
||
uint3 QuantizeEndpoint1 = uint3(BlockMax * 255.0f + 0.5f);
|
||
|
||
uint EndpointNumbers[6] =
|
||
{
|
||
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.x], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.x],
|
||
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.y], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.y],
|
||
AstcParameters.ColorScrambleTable80[QuantizeEndpoint0.z], AstcParameters.ColorScrambleTable80[QuantizeEndpoint1.z]
|
||
};
|
||
|
||
EncodeEndpointsQuant6(EndpointQuantMethod, EndpointNumbers, BlockBuffer, BlockOffset);
|
||
return BlockBuffer;
|
||
}
|
||
|
||
uint4 CompressBlock_ASTC_RGBA(in float3 BlockRGB[16], in float BlockA[16])
|
||
{
|
||
float4 B[16];
|
||
[unroll]
|
||
for (int i = 0; i < 16; ++i)
|
||
{
|
||
B[i] = float4(BlockRGB[i], BlockA[i]);
|
||
}
|
||
return Compress_ASTC_RGBA_4x4(B);
|
||
}
|
||
|
||
uint4 CompressBlock_ASTC_SRGBA(in float3 BlockRGB[16], in float BlockA[16])
|
||
{
|
||
float4 B[16];
|
||
[unroll]
|
||
for (int i = 0; i < 16; ++i)
|
||
{
|
||
B[i] = float4(LinearToSrgb(BlockRGB[i]), BlockA[i]);
|
||
}
|
||
return Compress_ASTC_RGBA_4x4(B);
|
||
}
|
||
|
||
uint4 CompressBlock_ASTC_YCoCg(in float3 BlockRGB[16])
|
||
{
|
||
float4 B[16];
|
||
[unroll]
|
||
for (int i = 0; i < 16; ++i)
|
||
{
|
||
float3 YCoCg = RGB2YCoCg(BlockRGB[i]);
|
||
B[i] = float4(YCoCg.yz, 0.0, YCoCg.x);
|
||
}
|
||
return Compress_ASTC_RGBA_4x4(B);
|
||
}
|
||
|