Files
UnrealEngine/Engine/Source/Programs/Shared/EpicGames.Serialization/CompressedBuffer.cs
2025-05-18 13:04:45 +08:00

440 lines
15 KiB
C#

// Copyright Epic Games, Inc. All Rights Reserved.
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.IO;
using System.IO.Hashing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Blake3;
using EpicGames.Compression;
using EpicGames.Core;
namespace EpicGames.Serialization
{
class CompressedBufferHeader
{
[System.Diagnostics.CodeAnalysis.SuppressMessage("Design", "CA1028:Enum Storage should be Int32", Justification = "Interop requires byte")]
public enum CompressionMethod : byte
{
// Header is followed by one uncompressed block.
None = 0,
// Header is followed by an array of compressed block sizes then the compressed blocks.
Oodle = 3,
LZ4 = 4,
}
public const uint ExpectedMagic = 0xb7756362; // <dot>ucb
public const uint HeaderLength = 64;
// A magic number to identify a compressed buffer. Always 0xb7756362.
public uint Magic { get; set; }
// A CRC-32 used to check integrity of the buffer. Uses the polynomial 0x04c11db7.
public uint Crc32 { get; set; }
// The method used to compress the buffer. Affects layout of data following the header.
public CompressionMethod Method { get; set; }
public byte CompressionLevel { get; set; }
public byte CompressionMethodUsed { get; set; }
// The power of two size of every uncompressed block except the last. Size is 1 << BlockSizeExponent.
public byte BlockSizeExponent { get; set; }
// The number of blocks that follow the header.
public uint BlockCount { get; set; }
// The total size of the uncompressed data.
public ulong TotalRawSize { get; set; }
// The total size of the compressed data including the header.
public ulong TotalCompressedSize { get; set; }
/** The hash of the uncompressed data. */
public byte[] RawHash { get; set; } = Array.Empty<byte>();
public void ByteSwap()
{
Magic = BinaryPrimitives.ReverseEndianness(Magic);
Crc32 = BinaryPrimitives.ReverseEndianness(Crc32);
BlockCount = BinaryPrimitives.ReverseEndianness(BlockCount);
TotalRawSize = BinaryPrimitives.ReverseEndianness(TotalRawSize);
TotalCompressedSize = BinaryPrimitives.ReverseEndianness(TotalCompressedSize);
}
}
/// <summary>
/// Utilities for working with compressed buffers
/// </summary>
public static class CompressedBuffer
{
private static (CompressedBufferHeader, uint[]) ExtractHeader(BinaryReader br)
{
byte[] headerData = br.ReadBytes((int)CompressedBufferHeader.HeaderLength);
using MemoryStream ms = new MemoryStream(headerData);
using BinaryReader reader = new BinaryReader(ms);
// the header is always stored big endian
bool needsByteSwap = BitConverter.IsLittleEndian;
CompressedBufferHeader header = new CompressedBufferHeader
{
Magic = reader.ReadUInt32(),
Crc32 = reader.ReadUInt32(),
Method = (CompressedBufferHeader.CompressionMethod)reader.ReadByte(),
CompressionLevel = reader.ReadByte(),
CompressionMethodUsed = reader.ReadByte(),
BlockSizeExponent = reader.ReadByte(),
BlockCount = reader.ReadUInt32(),
TotalRawSize = reader.ReadUInt64(),
TotalCompressedSize = reader.ReadUInt64()
};
byte[] hash = reader.ReadBytes(32); // a full blake3 hash
header.RawHash = hash;
if (needsByteSwap)
{
header.ByteSwap();
}
if (header.Magic != CompressedBufferHeader.ExpectedMagic)
{
throw new InvalidMagicException(header.Magic, CompressedBufferHeader.ExpectedMagic);
}
// calculate the crc from the start of the method field (skipping magic which is a constant and the crc field itself)
const int MethodOffset = sizeof(uint) + sizeof(uint);
// none compressed objects have no extra blocks
uint blocksByteUsed = header.Method != CompressedBufferHeader.CompressionMethod.None ? header.BlockCount * (uint)sizeof(uint) : 0;
byte[] crcData = new byte[blocksByteUsed + headerData.Length];
Array.Copy(headerData, crcData, headerData.Length);
uint[] blocks = Array.Empty<uint>();
if (blocksByteUsed != 0)
{
byte[] blocksData = br.ReadBytes((int)blocksByteUsed);
Array.Copy(blocksData, 0, crcData, headerData.Length, blocksData.Length);
blocks = new uint[header.BlockCount];
for (int i = 0; i < header.BlockCount; i++)
{
ReadOnlySpan<byte> memory = new ReadOnlySpan<byte>(blocksData, i * sizeof(uint), sizeof(uint));
uint compressedBlockSize = BinaryPrimitives.ReadUInt32BigEndian(memory);
blocks[i] = compressedBlockSize;
}
}
uint calculatedCrc = Crc32.HashToUInt32(crcData.AsSpan(MethodOffset, (int)(CompressedBufferHeader.HeaderLength - MethodOffset + blocksByteUsed)));
if (header.Crc32 != calculatedCrc)
{
throw new InvalidHashException(header.Crc32, calculatedCrc);
}
return (header, blocks);
}
private static void WriteHeader(CompressedBufferHeader header, BinaryWriter writer)
{
// the header is always stored big endian
bool needsByteSwap = BitConverter.IsLittleEndian;
if (needsByteSwap)
{
header.ByteSwap();
}
writer.Write(header.Magic);
writer.Write(header.Crc32);
writer.Write((byte)header.Method);
writer.Write((byte)header.CompressionLevel);
writer.Write((byte)header.CompressionMethodUsed);
writer.Write((byte)header.BlockSizeExponent);
writer.Write(header.BlockCount);
writer.Write(header.TotalRawSize);
writer.Write(header.TotalCompressedSize);
writer.Write(header.RawHash, 0, 20); // write the first 20 bytes as iohashes are 20 bytes
for (int i = 0; i < 12; i++)
{
// the last 12 bytes should be 0 as they are reserved
writer.Write((byte)0);
}
if (needsByteSwap)
{
header.ByteSwap();
}
}
/// <summary>
/// Decompress a compressed buffer into a stream
/// </summary>
/// <param name="sourceStream">Source stream containing a compressed buffer</param>
/// <param name="streamSize">The length of the compressed buffer</param>
/// <param name="targetStream">The stream to write the decompressed content to</param>
/// <param name="cancellationToken">Cancellation token</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public static async Task DecompressContentAsync(Stream sourceStream, ulong streamSize, Stream targetStream, CancellationToken cancellationToken = default)
{
using BinaryReader br = new BinaryReader(sourceStream);
(CompressedBufferHeader header, uint[] compressedBlockSizes) = ExtractHeader(br);
if (streamSize < header.TotalCompressedSize)
{
throw new Exception($"Expected stream to be {header.TotalCompressedSize} but it was {streamSize}");
}
{
ulong decompressedPayloadOffset = 0;
bool willHaveBlocks = header.Method != CompressedBufferHeader.CompressionMethod.None;
if (willHaveBlocks)
{
ulong blockSize = 1ul << header.BlockSizeExponent;
foreach (uint compressedBlockSize in compressedBlockSizes)
{
ulong rawBlockSize = Math.Min(header.TotalRawSize - decompressedPayloadOffset, blockSize);
byte[] compressedPayload = br.ReadBytes((int)compressedBlockSize);
int writtenBytes;
// if a block has the same raw and compressed size its uncompressed and we should not attempt to decompress it
if (rawBlockSize == compressedBlockSize)
{
writtenBytes = (int)rawBlockSize;
targetStream.Write(compressedPayload);
}
else
{
writtenBytes = DecompressPayload(compressedPayload, header, rawBlockSize, targetStream);
}
decompressedPayloadOffset += (uint)writtenBytes;
}
}
else
{
await sourceStream.CopyToAsync(targetStream, cancellationToken);
}
}
}
private static int DecompressPayload(ReadOnlySpan<byte> compressedPayload, CompressedBufferHeader header, ulong rawBlockSize, Stream target)
{
switch (header.Method)
{
case CompressedBufferHeader.CompressionMethod.None:
target.Write(compressedPayload);
return compressedPayload.Length;
case CompressedBufferHeader.CompressionMethod.Oodle:
{
byte[] result = new byte[rawBlockSize];
long writtenBytes = Oodle.Decompress(compressedPayload, result);
if (writtenBytes == 0)
{
throw new Exception("Failed to run oodle decompress");
}
target.Write(result);
return (int)writtenBytes;
}
case CompressedBufferHeader.CompressionMethod.LZ4:
{
// Remove LZ4 support as it's not used and requires external dependency
/*byte[] result = new byte[rawBlockSize];
int writtenBytes = LZ4Codec.Decode(compressedPayload, result);
target.Write(result);
return writtenBytes;*/
throw new NotImplementedException("LZ4 is no longer supported");
}
default:
throw new NotImplementedException($"Method {header.Method} is not a support value");
}
}
/// <summary>
/// Compress a buffer into a stream
/// </summary>
/// <param name="s">The stream to write the compressed content into</param>
/// <param name="method">Which oodle compressor method to use</param>
/// <param name="compressionLevel">The oodle compression level</param>
/// <param name="rawContents">The uncompressed content you wish to compress</param>
/// <returns>The iohash of the uncompressed content</returns>
public static IoHash CompressContent(Stream s, OodleCompressorType method, OodleCompressionLevel compressionLevel, byte[] rawContents)
{
const long DefaultBlockSize = 256 * 1024;
long blockSize = DefaultBlockSize;
long blockCount = (rawContents.LongLength + blockSize - 1) / blockSize;
Span<byte> contentsSpan = new Span<byte>(rawContents);
List<byte[]> blocks = new List<byte[]>();
for (int i = 0; i < blockCount; i++)
{
int rawBlockSize = Math.Min(rawContents.Length - (i * (int)blockSize), (int)blockSize);
Span<byte> bufferToCompress = contentsSpan.Slice((int)(i * blockSize), rawBlockSize);
blocks.Add(bufferToCompress.ToArray());
}
return CompressContent(s, method, compressionLevel, blocks, blockSize);
}
/// <summary>
/// Compress a stream into a compressed buffer stream
/// </summary>
/// <param name="s">The stream to write the compressed content into</param>
/// <param name="method">Which oodle compressor method to use</param>
/// <param name="compressionLevel">The oodle compression level</param>
/// <param name="sourceStream">The uncompressed content in a stream that you wish to compress</param>
/// <returns>The iohash of the uncompressed content</returns>
public static IoHash CompressContent(Stream s, OodleCompressorType method, OodleCompressionLevel compressionLevel, Stream sourceStream)
{
const long DefaultBlockSize = 256 * 1024;
long blockSize = DefaultBlockSize;
long blockCount = (sourceStream.Length + blockSize - 1) / blockSize;
List<byte[]> blocks = new List<byte[]>();
for (int i = 0; i < blockCount; i++)
{
long rawBlockSize = Math.Min(sourceStream.Length - (i * blockSize), blockSize);
byte[] bufferToCompress = new byte[(int)rawBlockSize];
sourceStream.ReadFixedLengthBytes(bufferToCompress);
blocks.Add(bufferToCompress);
}
return CompressContent(s, method, compressionLevel, blocks, blockSize);
}
private static IoHash CompressContent(Stream s, OodleCompressorType method, OodleCompressionLevel compressionLevel, List<byte[]> blocks, long blockSize)
{
long blockCount = blocks.Count;
byte blockSizeExponent = (byte)Math.Floor(Math.Log2(blockSize));
List<byte[]> compressedBlocks = new List<byte[]>();
using Hasher hasher = Hasher.New();
ulong uncompressedContentLength = (ulong)blocks.Sum(b => b.LongLength);
ulong compressedContentLength = CompressedBufferHeader.HeaderLength;
for (int i = 0; i < blockCount; i++)
{
int rawBlockSize = blocks[i].Length;
byte[] bufferToCompress = blocks[i];
hasher.UpdateWithJoin(new ReadOnlySpan<byte>(bufferToCompress, 0, rawBlockSize));
int maxSize = Oodle.MaximumOutputSize(method, rawBlockSize);
byte[] compressedBlock = new byte[maxSize];
long encodedSize = Oodle.Compress(method, bufferToCompress, compressedBlock, compressionLevel);
if (encodedSize == 0)
{
throw new Exception("Failed to compress content");
}
byte[] actualCompressedBlock = new byte[encodedSize];
Array.Copy(compressedBlock, actualCompressedBlock, encodedSize);
compressedBlocks.Add(actualCompressedBlock);
compressedContentLength += (ulong)encodedSize;
}
compressedContentLength += (uint)(sizeof(uint) * blockCount);
Hash blake3Hash = hasher.Finalize();
byte[] hashData = blake3Hash.AsSpan().Slice(0, 20).ToArray();
IoHash hash = new IoHash(hashData);
CompressedBufferHeader header = new CompressedBufferHeader
{
Magic = CompressedBufferHeader.ExpectedMagic,
Crc32 = 0,
Method = CompressedBufferHeader.CompressionMethod.Oodle,
CompressionLevel = (byte)compressionLevel,
CompressionMethodUsed = (byte)method,
BlockSizeExponent = blockSizeExponent,
BlockCount = (uint)blockCount,
TotalRawSize = (ulong)uncompressedContentLength,
TotalCompressedSize = (ulong)compressedContentLength,
RawHash = hashData
};
byte[] headerAndBlocks = WriteHeaderToBuffer(header, compressedBlocks.Select(b => (uint)b.Length).ToArray());
using BinaryWriter writer = new BinaryWriter(s, Encoding.Default, leaveOpen: true);
writer.Write(headerAndBlocks);
for (int i = 0; i < blockCount; i++)
{
writer.Write(compressedBlocks[i]);
}
return hash;
}
private static byte[] WriteHeaderToBuffer(CompressedBufferHeader header, uint[] compressedBlockLengths)
{
uint blockCount = header.BlockCount;
uint blocksByteUsed = blockCount * sizeof(uint);
byte[] headerBuffer = new byte[CompressedBufferHeader.HeaderLength + blocksByteUsed];
// write the compressed buffer, but with the wrong crc which we update and rewrite later
{
using MemoryStream ms = new MemoryStream(headerBuffer);
using BinaryWriter writer = new BinaryWriter(ms);
WriteHeader(header, writer);
for (int i = 0; i < blockCount; i++)
{
uint value = compressedBlockLengths[i];
if (BitConverter.IsLittleEndian)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
writer.Write(value);
}
}
// calculate the crc from the start of the method field (skipping magic which is a constant and the crc field itself)
const int MethodOffset = sizeof(uint) + sizeof(uint);
uint calculatedCrc = Crc32.HashToUInt32(headerBuffer.AsSpan(MethodOffset, (int)(CompressedBufferHeader.HeaderLength - MethodOffset + blocksByteUsed)));
header.Crc32 = calculatedCrc;
// write the header again now that we have the crc
{
using MemoryStream ms = new MemoryStream(headerBuffer);
using BinaryWriter writer = new BinaryWriter(ms);
WriteHeader(header, writer);
}
return headerBuffer;
}
}
/// <summary>
/// Thrown when the crc32 of the header does not align with the decompressed content, usually indicating that the full content was not present.
/// </summary>
/// <param name="headerCrc32"></param>
/// <param name="calculatedCrc"></param>
public class InvalidHashException(uint headerCrc32, uint calculatedCrc)
: Exception($"Header specified crc \"{headerCrc32}\" but calculated hash was \"{calculatedCrc}\"");
/// <summary>
/// Thrown when a byte buffer is encountered that is not a compressed buffer
/// </summary>
/// <param name="headerMagic"></param>
/// <param name="expectedMagic"></param>
public class InvalidMagicException(uint headerMagic, uint expectedMagic)
: Exception($"Header magic \"{headerMagic}\" was incorrect, expected to be {expectedMagic}");
}