297 lines
9.1 KiB
HLSL
297 lines
9.1 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
uint3 UnpackToUint3(uint Value, uint3 NumComponentBits)
|
|
{
|
|
return uint3(BitFieldExtractU32(Value, NumComponentBits.x, 0),
|
|
BitFieldExtractU32(Value, NumComponentBits.y, NumComponentBits.x),
|
|
BitFieldExtractU32(Value, NumComponentBits.z, NumComponentBits.x + NumComponentBits.y));
|
|
}
|
|
|
|
uint4 UnpackToUint4(uint Value, uint4 NumComponentBits)
|
|
{
|
|
return uint4(BitFieldExtractU32(Value, NumComponentBits.x, 0),
|
|
BitFieldExtractU32(Value, NumComponentBits.y, NumComponentBits.x),
|
|
BitFieldExtractU32(Value, NumComponentBits.z, NumComponentBits.x + NumComponentBits.y),
|
|
BitFieldExtractU32(Value, NumComponentBits.w, NumComponentBits.x + NumComponentBits.y + NumComponentBits.z));
|
|
}
|
|
|
|
float3 UnpackToFloat3(uint Value, uint3 NumComponentBits)
|
|
{
|
|
checkSlow(NumComponentBits.x + NumComponentBits.y + NumComponentBits.z <= 32u);
|
|
return float3(BitFieldExtractFloat(Value, NumComponentBits.x, 0),
|
|
BitFieldExtractFloat(Value, NumComponentBits.y, NumComponentBits.x),
|
|
BitFieldExtractFloat(Value, NumComponentBits.z, NumComponentBits.x + NumComponentBits.y));
|
|
}
|
|
|
|
float3 UnpackToFloat3(uint Value, uint BitsPerElement)
|
|
{
|
|
return UnpackToFloat3(Value, uint3(BitsPerElement, BitsPerElement, BitsPerElement));
|
|
}
|
|
|
|
uint FloatToUIntScaled(float Value, float Scale)
|
|
{
|
|
return (uint)floor(Value * Scale + 0.5f);
|
|
}
|
|
|
|
uint Pack_Float4_To_R10G10B10A2_UNORM(float4 Unpacked)
|
|
{
|
|
const float4 UnpackedClamped = saturate(Unpacked);
|
|
uint Packed = ((FloatToUIntScaled(UnpackedClamped.x, 1023)) |
|
|
(FloatToUIntScaled(UnpackedClamped.y, 1023) << 10) |
|
|
(FloatToUIntScaled(UnpackedClamped.z, 1023) << 20) |
|
|
(FloatToUIntScaled(UnpackedClamped.w, 3) << 30));
|
|
return Packed;
|
|
}
|
|
|
|
float4 Unpack_R10G10B10A2_UNORM_To_Float4(uint Packed)
|
|
{
|
|
float4 Unpacked;
|
|
Unpacked.x = (float)(((Packed ) & 0x000003FF)) / 1023;
|
|
Unpacked.y = (float)(((Packed >> 10) & 0x000003FF)) / 1023;
|
|
Unpacked.z = (float)(((Packed >> 20) & 0x000003FF)) / 1023;
|
|
Unpacked.w = (float)(((Packed >> 30) & 0x00000003)) / 3;
|
|
return Unpacked;
|
|
}
|
|
|
|
/**
|
|
* Pack normalized, i.e., range [0..1], float value to uint at a certain number of bits, rounding down.
|
|
*/
|
|
uint PackNormToUintFloor(float FloatNorm, uint BitsPerElement)
|
|
{
|
|
return uint(floor(FloatNorm * float((1u << BitsPerElement) - 1u)));
|
|
}
|
|
|
|
/**
|
|
* Pack normalized, i.e., range [0..1], float value to uint at a certain number of bits, rounding up.
|
|
*/
|
|
uint PackNormToUintCeil(float FloatNorm, uint BitsPerElement)
|
|
{
|
|
return uint(ceil(FloatNorm * float((1u << BitsPerElement) - 1u)));
|
|
}
|
|
|
|
/**
|
|
* Pack uint3 into uint at a certain number of bits.
|
|
*/
|
|
uint PackToUint(uint3 Packed3, uint3 NumComponentBits)
|
|
{
|
|
checkSlow(NumComponentBits.x + NumComponentBits.y + NumComponentBits.z <= 32u);
|
|
return Packed3.x | (Packed3.y << NumComponentBits.x) | (Packed3.z << (NumComponentBits.x + NumComponentBits.y));
|
|
}
|
|
|
|
/**
|
|
* Pack normalized, i.e., range [0..1], float3 value to uint at a certain number of bits per component, rounding down.
|
|
*/
|
|
uint PackNormToUintFloor(float3 FloatNorm, uint3 NumComponentBits)
|
|
{
|
|
return PackToUint(
|
|
uint3(
|
|
PackNormToUintFloor(FloatNorm.x, NumComponentBits.x),
|
|
PackNormToUintFloor(FloatNorm.y, NumComponentBits.y),
|
|
PackNormToUintFloor(FloatNorm.z, NumComponentBits.z)
|
|
),
|
|
NumComponentBits);
|
|
}
|
|
|
|
uint PackNormToUintFloor(float3 FloatNorm, uint BitsPerElement)
|
|
{
|
|
return PackNormToUintFloor(FloatNorm, uint3(BitsPerElement, BitsPerElement, BitsPerElement));
|
|
}
|
|
|
|
/**
|
|
* Pack normalized, i.e., range [0..1], float3 value to uint at a certain number of bits per component, rounding up.
|
|
*/
|
|
uint PackNormToUintCeil(float3 FloatNorm, uint3 NumComponentBits)
|
|
{
|
|
return PackToUint(
|
|
uint3(
|
|
PackNormToUintCeil(FloatNorm.x, NumComponentBits.x),
|
|
PackNormToUintCeil(FloatNorm.y, NumComponentBits.y),
|
|
PackNormToUintCeil(FloatNorm.z, NumComponentBits.z)
|
|
),
|
|
NumComponentBits);
|
|
}
|
|
|
|
uint PackNormToUintCeil(float3 FloatNorm, uint BitsPerElement)
|
|
{
|
|
return PackNormToUintCeil(FloatNorm, uint3(BitsPerElement,BitsPerElement,BitsPerElement));
|
|
}
|
|
|
|
// Implement BitStreamReader for ByteAddressBuffer (RO), RWByteAddressBuffer (RW) and dynamic choice (RORW).
|
|
struct FBitStreamReaderState
|
|
{
|
|
uint AlignedByteAddress;
|
|
int BitOffsetFromAddress;
|
|
|
|
uint4 BufferBits;
|
|
int BufferOffset;
|
|
|
|
int CompileTimeMinBufferBits;
|
|
int CompileTimeMinDwordBits;
|
|
int CompileTimeMaxRemainingBits;
|
|
};
|
|
|
|
FBitStreamReaderState BitStreamReader_Create_Aligned(uint AlignedByteAddress, uint BitOffset, uint CompileTimeMaxRemainingBits)
|
|
{
|
|
FBitStreamReaderState State;
|
|
|
|
State.AlignedByteAddress = AlignedByteAddress;
|
|
State.BitOffsetFromAddress = BitOffset;
|
|
|
|
State.BufferBits = 0;
|
|
State.BufferOffset = 0;
|
|
|
|
State.CompileTimeMinBufferBits = 0;
|
|
State.CompileTimeMinDwordBits = 0;
|
|
State.CompileTimeMaxRemainingBits = CompileTimeMaxRemainingBits;
|
|
|
|
return State;
|
|
}
|
|
|
|
FBitStreamReaderState BitStreamReader_Create(uint ByteAddress, uint BitOffset, uint CompileTimeMaxRemainingBits)
|
|
{
|
|
uint AlignedByteAddress = ByteAddress & ~3u;
|
|
BitOffset += (ByteAddress & 3u) << 3;
|
|
return BitStreamReader_Create_Aligned(AlignedByteAddress, BitOffset, CompileTimeMaxRemainingBits);
|
|
}
|
|
|
|
#define TYPE_SUFFIX RO
|
|
#define INPUT_BUFFER_TYPE ByteAddressBuffer
|
|
#include "BitStreamReaderImplementation.ush"
|
|
#undef TYPE_SUFFIX
|
|
#undef INPUT_BUFFER_TYPE
|
|
|
|
#define TYPE_SUFFIX RW
|
|
#define INPUT_BUFFER_TYPE RWByteAddressBuffer
|
|
#include "BitStreamReaderImplementation.ush"
|
|
#undef TYPE_SUFFIX
|
|
#undef INPUT_BUFFER_TYPE
|
|
|
|
// Put bits to ByteAddressBuffer at bit offset. NumBits must be <= 31.
|
|
void PutBits(RWByteAddressBuffer Output, uint AlignedBaseAddress, uint BitOffset, uint Value, uint NumBits)
|
|
{
|
|
uint BitOffsetInDword = (BitOffset & 31u); // &31 is implicit in shifts
|
|
|
|
uint Bits = Value << BitOffsetInDword;
|
|
uint Address = AlignedBaseAddress + ((BitOffset >> 5) << 2);
|
|
uint EndBitPos = BitOffsetInDword + NumBits;
|
|
|
|
if (EndBitPos >= 32)
|
|
{
|
|
uint Mask = 0xFFFFFFFFu << (EndBitPos & 31u);
|
|
Output.InterlockedAnd(Address + 4, Mask);
|
|
Output.InterlockedOr(Address + 4, Value >> (32 - BitOffsetInDword));
|
|
}
|
|
|
|
{
|
|
uint Mask = ~BitFieldMaskU32(NumBits, BitOffset);
|
|
Output.InterlockedAnd(Address, Mask);
|
|
Output.InterlockedOr(Address, Value << BitOffsetInDword);
|
|
}
|
|
}
|
|
|
|
struct FBitStreamWriterState
|
|
{
|
|
uint StartAlignedByteAddress;
|
|
uint StartBitOffset;
|
|
uint StartBufferBits;
|
|
|
|
uint NextAlignedByteAddress;
|
|
uint BitOffset;
|
|
uint BufferBits;
|
|
};
|
|
|
|
FBitStreamWriterState BitStreamWriter_Create_Aligned(uint AlignedBaseAddressInBytes, uint BitOffset)
|
|
{
|
|
FBitStreamWriterState State;
|
|
|
|
State.StartAlignedByteAddress = AlignedBaseAddressInBytes + ((BitOffset >> 5) << 2);
|
|
BitOffset &= 31u;
|
|
State.StartBitOffset = BitOffset;
|
|
State.StartBufferBits = 0;
|
|
|
|
State.NextAlignedByteAddress = State.StartAlignedByteAddress + 4u;
|
|
State.BitOffset = BitOffset;
|
|
State.BufferBits = 0;
|
|
|
|
return State;
|
|
}
|
|
|
|
void BitStreamWriter_Writer(RWByteAddressBuffer Output, inout FBitStreamWriterState State, uint Value, int NumBits, int CompileTimeMaxBits)
|
|
{
|
|
const uint Tmp = Value << (State.BitOffset & 31u); // & 31u is implicit
|
|
|
|
if(State.BitOffset >= 32)
|
|
State.BufferBits |= Tmp;
|
|
else
|
|
State.StartBufferBits |= Tmp;
|
|
|
|
const uint NextBitOffset = State.BitOffset + NumBits;
|
|
|
|
// Overflow to next DWORD?
|
|
if ((State.BitOffset ^ NextBitOffset) >= 32)
|
|
{
|
|
// Keep first DWORD in register, so we can merge it in with atomic later
|
|
if(State.BitOffset >= 32)
|
|
{
|
|
Output.Store(State.NextAlignedByteAddress, State.BufferBits);
|
|
State.NextAlignedByteAddress += 4;
|
|
}
|
|
State.BufferBits = (CompileTimeMaxBits < 32 || (State.BitOffset & 31)) ? (Value >> ((32u - State.BitOffset) & 31u)) : 0u;
|
|
}
|
|
|
|
State.BitOffset = NextBitOffset;
|
|
}
|
|
|
|
void BitStreamWriter_Flush(RWByteAddressBuffer Output, inout FBitStreamWriterState State)
|
|
{
|
|
// Start
|
|
const uint NumBits = State.BitOffset - State.StartBitOffset;
|
|
uint StartMask = NumBits >= 32 ? 0xFFFFFFFFu :
|
|
BitFieldMaskU32(NumBits, 0);
|
|
StartMask <<= State.StartBitOffset;
|
|
Output.InterlockedAnd(State.StartAlignedByteAddress, ~StartMask);
|
|
Output.InterlockedOr(State.StartAlignedByteAddress, State.StartBufferBits);
|
|
|
|
if (State.BitOffset > 32)
|
|
{
|
|
const uint Mask = BitFieldMaskU32(State.BitOffset & 31u, 0);
|
|
Output.InterlockedAnd(State.NextAlignedByteAddress, ~Mask);
|
|
Output.InterlockedOr(State.NextAlignedByteAddress, State.BufferBits);
|
|
}
|
|
}
|
|
|
|
// Utility functions for packing bits into uints.
|
|
// When Position and NumBits can be determined at compile time this should be just as fast as manual bit packing.
|
|
uint ReadBits(uint4 Data, inout uint Position, uint NumBits)
|
|
{
|
|
uint DwordIndex = Position >> 5;
|
|
uint BitIndex = Position & 31;
|
|
|
|
uint Value = Data[DwordIndex] >> BitIndex;
|
|
if (BitIndex + NumBits > 32)
|
|
{
|
|
Value |= Data[DwordIndex + 1] << (32 - BitIndex);
|
|
}
|
|
|
|
Position += NumBits;
|
|
|
|
uint Mask = ((1u << NumBits) - 1u);
|
|
return Value & Mask;
|
|
}
|
|
|
|
void WriteBits(inout uint4 Data, inout uint Position, uint Value, uint NumBits)
|
|
{
|
|
uint DwordIndex = Position >> 5;
|
|
uint BitIndex = Position & 31;
|
|
|
|
Data[DwordIndex] |= Value << BitIndex;
|
|
if (BitIndex + NumBits > 32)
|
|
{
|
|
Data[DwordIndex + 1] |= Value >> (32 - BitIndex);
|
|
}
|
|
|
|
Position += NumBits;
|
|
}
|