Files
UnrealEngine/Engine/Shaders/Private/PackUnpack.ush
2025-05-18 13:04:45 +08:00

374 lines
8.8 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PackUnpack.ush: Common shader code for packing and unpacking data.
Automatically included from Common.ush.
=============================================================================*/
#pragma once
#include "/Engine/Public/Platform.ush"
//
// float2 <-> u32 as Unorm16.Unorm16
//
uint PackUnorm2x16(float2 v)
{
uint2 sv = uint2(round(clamp(v, 0.0, 1.0) * 65535.0));
return (sv.x | (sv.y << 16u));
}
float2 UnpackUnorm2x16(uint p)
{
float2 Ret;
Ret.x = (p & 0xffff) * rcp(65535.0f);
Ret.y = (p >> 16u) * rcp(65535.0f);
return Ret;
}
//
// float2 <-> u32 as Snorm16.Snorm16
//
uint PackSnorm2x16(float2 v)
{
uint2 sv = uint2(round(clamp(v, -1.0, 1.0) * 32767.0) + 32767.0);
return (sv.x | (sv.y << 16u));
}
float2 UnpackSnorm2x16(uint p)
{
float2 Ret;
Ret.x = clamp((float(p & 0xffff) - 32767.0f) * rcp(32767.0f), -1.0, 1.0);
Ret.y = clamp((float(p >> 16u) - 32767.0f) * rcp(32767.0f), -1.0, 1.0);
return Ret;
}
//
// uint2 <-> u32 as u16.u16
//
#ifndef COMPILER_SUPPORTS_PACK_INTRINSICS
#define COMPILER_SUPPORTS_PACK_INTRINSICS 0
uint PackUInt2ToUInt(uint X, uint Y)
{
return X | (Y << 16u);
}
#endif
uint PackUInt2ToUInt(uint2 XY)
{
return PackUInt2ToUInt(XY.x, XY.y);
}
uint2 UnpackUInt2FromUInt(uint Packed)
{
return uint2(Packed & 0xffff, Packed >> 16);
}
//
// float2 <-> u32 as f16.f16
//
#if !COMPILER_SUPPORTS_PACK_INTRINSICS
uint PackFloat2ToUInt(float X, float Y)
{
return PackUInt2ToUInt(f32tof16(X), f32tof16(Y));
}
#endif
uint PackFloat2ToUInt(float2 XY)
{
return PackFloat2ToUInt(XY.x, XY.y);
}
float2 UnpackFloat2FromUInt(uint In)
{
return float2(f16tof32(In), f16tof32(In >> 16));
}
//
// float <-> u8
//
uint PackR8(float Value)
{
return uint(saturate(Value) * 255.0f);
}
float UnpackR8(uint In)
{
#if COMPILER_SUPPORTS_UNPACKBYTEN
return UnpackByte0(In) * (1.0f / 255.0f);
#else
return float(In & 0xFF) * (1.0f / 255.0f);
#endif
}
//
// float <-> u6
//
uint PackR6(float Value)
{
return uint(saturate(Value) * 63.0f);
}
float UnpackR6(uint In)
{
return float(In & 0x3F) * (1.0f / 63.0f);
}
//
// float4 <-> u32 as u8.u8.u8.u8
//
uint PackRGBA8(float4 In)
{
#if COMPILER_SUPPORTS_PACK_INTRINSICS
return PackFloat4ToUInt(saturate(In) * 255.0f);
#else
uint r = (uint(saturate(In.r) * 255.0f) << 0);
uint g = (uint(saturate(In.g) * 255.0f) << 8);
uint b = (uint(saturate(In.b) * 255.0f) << 16);
uint a = (uint(saturate(In.a) * 255.0f) << 24);
return r | g | b | a;
#endif
}
float4 UnpackRGBA8(uint In)
{
#if COMPILER_SUPPORTS_UNPACKBYTEN
return float4(
UnpackByte0(In),
UnpackByte1(In),
UnpackByte2(In),
UnpackByte3(In)) * (1.0f / 255.0f);
#else
float4 Out;
Out.r = float((In >> 0) & 0xFF) * (1.0f / 255.0f);
Out.g = float((In >> 8) & 0xFF) * (1.0f / 255.0f);
Out.b = float((In >> 16) & 0xFF) * (1.0f / 255.0f);
Out.a = float((In >> 24) & 0xFF) * (1.0f / 255.0f);
return Out;
#endif
}
uint PackUint4ToUint(uint4 In)
{
// TODO use BitAlignU32
uint r = (In.r & 0xFFu);
uint g = (In.g & 0xFFu) << 8u;
uint b = (In.b & 0xFFu) << 16u;
uint a = (In.a & 0xFFu) << 24u;
return r | g | b | a;
}
uint4 UnpackUintToUint4(uint In)
{
// TODO use BitAlignU32
uint4 Out;
Out.r = (In >> 0u) & 0xFFu;
Out.g = (In >> 8u) & 0xFFu;
Out.b = (In >> 16u) & 0xFFu;
Out.a = (In >> 24u) & 0xFFu;
return Out;
}
//
// float <-> unorm
//
uint PackUnorm10(float Value)
{
return uint(saturate(Value) * 1023.0f);
}
float UnpackUnorm10(uint In)
{
return float(In & 0x3FF) * (1.0f / 1023.0f);
}
//
// float <-> f10
//
uint Pack10F(float Value)
{
return (f32tof16(Value) >> 5) & 0x000003FF;
}
float Unpack10F(uint Value)
{
return f16tof32((Value << 5) & 0x7FE0);
}
//
// float3 <-> u32 as f11.f11.f10
//
uint PackR11G11B10F(float3 rgb)
{
uint r = (f32tof16(rgb.r) << 17) & 0xFFE00000;
uint g = (f32tof16(rgb.g) << 6) & 0x001FFC00;
uint b = (f32tof16(rgb.b) >> 5) & 0x000003FF;
return r | g | b;
}
float3 UnpackR11G11B10F(uint rgb)
{
float r = f16tof32((rgb >> 17) & 0x7FF0);
float g = f16tof32((rgb >> 6) & 0x7FF0);
float b = f16tof32((rgb << 5) & 0x7FE0);
return float3(r, g, b);
}
//
// float3 <-> f10.f10.f10
//
uint PackR10G10B10F(float3 rgb)
{
uint r = (f32tof16(rgb.r) << 15) & 0x3FF00000; // 0011 1111 1111 0000 0000 0000 0000 0000
uint g = (f32tof16(rgb.g) << 5) & 0x000FFC00; // 0000 0000 0000 1111 1111 1100 0000 0000
uint b = (f32tof16(rgb.b) >> 5) & 0x000003FF; // 0000 0000 0000 0000 0000 0011 1111 1111
return r | g | b;
}
float3 UnpackR10G10B10F(uint rgb)
{
float r = f16tof32((rgb >> 15) & 0x7FE0);
float g = f16tof32((rgb >> 5) & 0x7FE0);
float b = f16tof32((rgb << 5) & 0x7FE0);
return float3(r, g, b);
}
//
// float3 <-> uint
// using a RGBE encoding with 9,9,8 bits for RGB and 6 bits for the shared exponent
// See a discussion of such encodings here:
// https://cbloomrants.blogspot.com/2020/06/widespread-error-in-radiance-hdr-rgbe.html
// https://cbloomrants.blogspot.com/2020/06/followup-tidbits-on-rgbe.html
// Because we are not using these functions in the context of Radiance .hdr files,
// we can deviate from the classic 888e8 format and apply the corrections suggested in
// the post above, while changing the bit depths to get a bit more color resolution at
// the expense of less dynamic range (exponent of 8 being excessive for colors anyway)
// The RGBE_plus encoding suggested in the second blog post is slightly better in some
// cases, but fairly close to the version below. This code has the advantage of being
// slightly easier to decode.
uint PackRGB998E6(float3 rgb)
{
const float MAX_C = float((1 << 8) - 1) * asfloat((63 - 32 + 127) << 23);
uint3 cbits = asuint(min(rgb, MAX_C));
int3 exp = int3(cbits >> 23) - 127;
uint3 man = (cbits & ((1 << 23) - 1)) | (1 << 23);
int max_exp = max3(exp.x, exp.y, exp.z);
// change from [1,2) to [0.5,1)
max_exp++;
max_exp = max(max_exp, -32);
int3 shift = min(max_exp - exp - int3(9, 9, 8) + 23, 31);
// center on encode
man += 1u << (shift - 1);
man >>= shift;
if ((man.x >= (1 << 9)) ||
(man.y >= (1 << 9)) ||
(man.z >= (1 << 8)))
{
// overflow due to rounding, bump exponent so that color will fit
max_exp++;
man >>= 1;
}
return
(uint(max_exp + 32) << (9 + 9 + 8)) |
(uint(man.x ) << (9 + 8 )) |
(uint(man.y ) << (8 )) |
(uint(man.z ));
}
float3 UnpackRGB998E6(uint bits)
{
int e = ((bits >> (9 + 9 + 8)) & ((1 << 6) - 1)) - 32;
int r = ((bits >> (9 + 8 )) & ((1 << 9) - 1));
int g = ((bits >> (8 )) & ((1 << 9) - 1));
int b = ((bits ) & ((1 << 8) - 1));
float3 f = {
asfloat(uint(e + 127 - 9) << 23),
asfloat(uint(e + 127 - 9) << 23),
asfloat(uint(e + 127 - 8) << 23)
};
float3 result = { float(r), float(g), float(b) };
return result * f;
}
uint PackRGB111110(float3 rgb)
{
// NOTE: Could add a simple gamma2 here to improve precision of dark colors, but this does not seem necessary for now.
// unorm 11/11/10 encoding
uint r = uint(floor(saturate(rgb.r) * 2047.0 + 0.5));
uint g = uint(floor(saturate(rgb.g) * 2047.0 + 0.5));
uint b = uint(floor(saturate(rgb.b) * 1023.0 + 0.5));
return (r << 21) | (g << 10) | b;
}
float3 UnpackRGB111110(uint rgb)
{
float r = float((rgb >> 21) ) * (1.0 / 2047.0);
float g = float((rgb >> 10) & 2047) * (1.0 / 2047.0);
float b = float((rgb ) & 1023) * (1.0 / 1023.0);
return float3(r, g, b);
}
//
// float4 <-> uint2 as f16.f16.f16.f16
//
uint2 PackR16G16B16A16F(float4 In)
{
return uint2(PackFloat2ToUInt(In.xy), PackFloat2ToUInt(In.zw));
}
float4 UnpackR16G16B16A16F(uint2 In)
{
return float4(UnpackFloat2FromUInt(In.x), UnpackFloat2FromUInt(In.y));
}
//
// float <-> 24bit float (8bit exp, 15 bit mantissa)
//
uint PackR24F(float In)
{
return asuint(In) >> 8;
}
float UnpackR24F(uint In)
{
return asfloat(In<<8);
}
//
// Common tile coord encoding
//
#define PackTileCoordXXbits(In, InMask, InBits) ((In.x & InMask) | ((In.y & InMask) << InBits))
#define UnpackTileCoordXXbits(In,InMask, InBits) uint2(BitFieldExtractU32(In, InBits, 0), BitFieldExtractU32(In, InBits, InBits))
uint PackTileCoord8bits(uint2 In) { return PackTileCoordXXbits (In, 0xFF, 8u); }
uint2 UnpackTileCoord8bits(uint In) { return UnpackTileCoordXXbits(In, 0xFF, 8u); }
uint PackTileCoord10bits(uint2 In) { return PackTileCoordXXbits (In, 0x3FF, 10u); }
uint2 UnpackTileCoord10bits(uint In){ return UnpackTileCoordXXbits(In, 0x3FF, 10u); }
uint PackTileCoord12bits(uint2 In) { return PackTileCoordXXbits (In, 0xFFF, 12u); }
uint2 UnpackTileCoord12bits(uint In){ return UnpackTileCoordXXbits(In, 0xFFF, 12u); }
uint PackTileCoord14bits(uint2 In) { return PackTileCoordXXbits (In, 0x3FFF, 14u); }
uint2 UnpackTileCoord14bits(uint In){ return UnpackTileCoordXXbits(In, 0x3FFF, 14u); }
uint PackTileCoord16bits(uint2 In) { return PackTileCoordXXbits (In, 0xFFFF,16u); }
uint2 UnpackTileCoord16bits(uint In){ return UnpackTileCoordXXbits(In, 0xFFFF,16u); }