Files
UnrealEngine/Engine/Source/Programs/UnrealCloudDDC/Jupiter/Implementation/ContentId/CompressedBufferUtils.cs
2025-05-18 13:04:45 +08:00

531 lines
17 KiB
C#

// Copyright Epic Games, Inc. All Rights Reserved.
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Blake3;
using EpicGames.Compression;
using EpicGames.Core;
using Force.Crc32;
using Jupiter.Common.Implementation;
using K4os.Compression.LZ4;
using OpenTelemetry.Trace;
namespace Jupiter.Implementation
{
public class CompressedBufferHeader
{
[System.Diagnostics.CodeAnalysis.SuppressMessage("Design", "CA1028:Enum Storage should be Int32", Justification = "Interop requires byte")]
public enum CompressionMethod : byte
{
// Header is followed by one uncompressed block.
None = 0,
// Header is followed by an array of compressed block sizes then the compressed blocks.
Oodle = 3,
LZ4 = 4,
}
public const uint ExpectedMagic = 0xb7756362; // <dot>ucb
public const uint HeaderLength = 64;
// A magic number to identify a compressed buffer. Always 0xb7756362.
public uint Magic { get; set; }
// A CRC-32 used to check integrity of the buffer. Uses the polynomial 0x04c11db7.
public uint Crc32 { get; set; }
// The method used to compress the buffer. Affects layout of data following the header.
public CompressionMethod Method { get; set; }
public byte CompressionLevel { get; set; }
public byte CompressionMethodUsed { get; set; }
// The power of two size of every uncompressed block except the last. Size is 1 << BlockSizeExponent.
public byte BlockSizeExponent { get; set; }
// The number of blocks that follow the header.
public uint BlockCount { get; set; }
// The total size of the uncompressed data.
public ulong TotalRawSize { get; set; }
// The total size of the compressed data including the header.
public ulong TotalCompressedSize { get; set; }
/** The hash of the uncompressed data. */
public byte[] RawHash { get; set; } = Array.Empty<byte>();
public void ByteSwap()
{
Magic = BinaryPrimitives.ReverseEndianness(Magic);
Crc32 = BinaryPrimitives.ReverseEndianness(Crc32);
BlockCount = BinaryPrimitives.ReverseEndianness(BlockCount);
TotalRawSize = BinaryPrimitives.ReverseEndianness(TotalRawSize);
TotalCompressedSize = BinaryPrimitives.ReverseEndianness(TotalCompressedSize);
}
}
public class CompressedBufferUtils
{
private readonly Tracer _tracer;
private readonly BufferedPayloadFactory _payloadFactory;
public CompressedBufferUtils(Tracer tracer, BufferedPayloadFactory payloadFactory)
{
_tracer = tracer;
_payloadFactory = payloadFactory;
}
private static (CompressedBufferHeader, uint[]) ExtractHeader(BinaryReader br)
{
byte[] headerData = br.ReadBytes((int)CompressedBufferHeader.HeaderLength);
using MemoryStream ms = new MemoryStream(headerData);
using BinaryReader reader = new BinaryReader(ms);
// the header is always stored big endian
bool needsByteSwap = BitConverter.IsLittleEndian;
CompressedBufferHeader header = new CompressedBufferHeader
{
Magic = reader.ReadUInt32(),
Crc32 = reader.ReadUInt32(),
Method = (CompressedBufferHeader.CompressionMethod)reader.ReadByte(),
CompressionLevel = reader.ReadByte(),
CompressionMethodUsed = reader.ReadByte(),
BlockSizeExponent = reader.ReadByte(),
BlockCount = reader.ReadUInt32(),
TotalRawSize = reader.ReadUInt64(),
TotalCompressedSize = reader.ReadUInt64()
};
byte[] hash = reader.ReadBytes(32); // a full blake3 hash
header.RawHash = hash;
if (needsByteSwap)
{
header.ByteSwap();
}
if (header.Magic != CompressedBufferHeader.ExpectedMagic)
{
throw new InvalidMagicException(header.Magic, CompressedBufferHeader.ExpectedMagic);
}
// calculate the crc from the start of the method field (skipping magic which is a constant and the crc field itself)
const int MethodOffset = sizeof(uint) + sizeof(uint);
// none compressed objects have no extra blocks
uint blocksByteUsed = header.Method != CompressedBufferHeader.CompressionMethod.None ? header.BlockCount * (uint)sizeof(uint) : 0;
byte[] crcData = new byte[blocksByteUsed + headerData.Length];
Array.Copy(headerData, crcData, headerData.Length);
uint[] blocks = Array.Empty<uint>();
if (blocksByteUsed != 0)
{
byte[] blocksData = br.ReadBytes((int)blocksByteUsed);
Array.Copy(blocksData, 0, crcData, headerData.Length, blocksData.Length);
blocks = new uint[header.BlockCount];
for (int i = 0; i < header.BlockCount; i++)
{
ReadOnlySpan<byte> memory = new ReadOnlySpan<byte>(blocksData, i * sizeof(uint), sizeof(uint));
uint compressedBlockSize = BinaryPrimitives.ReadUInt32BigEndian(memory);
blocks[i] = compressedBlockSize;
}
}
uint calculatedCrc = Crc32Algorithm.Compute(crcData, MethodOffset, (int)(CompressedBufferHeader.HeaderLength - MethodOffset + blocksByteUsed));
if (header.Crc32 != calculatedCrc)
{
throw new InvalidHashException(header.Crc32, calculatedCrc);
}
return (header, blocks);
}
public static void WriteHeader(CompressedBufferHeader header, BinaryWriter writer)
{
// the header is always stored big endian
bool needsByteSwap = BitConverter.IsLittleEndian;
if (needsByteSwap)
{
header.ByteSwap();
}
writer.Write(header.Magic);
writer.Write(header.Crc32);
writer.Write((byte)header.Method);
writer.Write((byte)header.CompressionLevel);
writer.Write((byte)header.CompressionMethodUsed);
writer.Write((byte)header.BlockSizeExponent);
writer.Write(header.BlockCount);
writer.Write(header.TotalRawSize);
writer.Write(header.TotalCompressedSize);
writer.Write(header.RawHash, 0, 20); // write the first 20 bytes as iohashes are 20 bytes
for (int i = 0; i < 12; i++)
{
// the last 12 bytes should be 0 as they are reserved
writer.Write((byte)0);
}
if (needsByteSwap)
{
header.ByteSwap();
}
}
public async Task<(IBufferedPayload, IoHash)> DecompressContentAsync(Stream sourceStream, ulong streamSize, CancellationToken cancellationToken = default)
{
using BinaryReader br = new BinaryReader(sourceStream);
(CompressedBufferHeader header, uint[] compressedBlockSizes) = ExtractHeader(br);
if (streamSize != header.TotalCompressedSize)
{
throw new Exception($"Expected stream to be {header.TotalCompressedSize} but it was {streamSize}");
}
using FilesystemBufferedPayloadWriter bufferedPayloadWriter = _payloadFactory.CreateFilesystemBufferedPayloadWriter("cb-decompress");
{
await using Stream targetStream = bufferedPayloadWriter.GetWritableStream();
ulong decompressedPayloadOffset = 0;
bool willHaveBlocks = header.Method != CompressedBufferHeader.CompressionMethod.None;
if (willHaveBlocks)
{
ulong blockSize = 1ul << header.BlockSizeExponent;
foreach (uint compressedBlockSize in compressedBlockSizes)
{
ulong rawBlockSize = Math.Min(header.TotalRawSize - decompressedPayloadOffset, blockSize);
byte[] compressedPayload = br.ReadBytes((int)compressedBlockSize);
int writtenBytes;
// if a block has the same raw and compressed size its uncompressed and we should not attempt to decompress it
if (rawBlockSize == compressedBlockSize)
{
writtenBytes = (int)rawBlockSize;
targetStream.Write(compressedPayload);
}
else
{
writtenBytes = DecompressPayload(compressedPayload, header, rawBlockSize, targetStream);
}
decompressedPayloadOffset += (uint)writtenBytes;
}
}
else
{
await sourceStream.CopyToAsync(targetStream, cancellationToken);
}
}
// not using the buffered payload as we transfer the ownership to the caller of this method
FilesystemBufferedPayload? finalizedBufferedPayload = null;
try
{
finalizedBufferedPayload = bufferedPayloadWriter.Done();
if (header.TotalRawSize != (ulong)finalizedBufferedPayload.Length)
{
throw new Exception("Did not decompress the full payload");
}
{
using TelemetrySpan _ = _tracer.StartActiveSpan("web.hash").SetAttribute("operation.name", "web.hash");
// only read the first 20 bytes of the hash field as IoHashes are 20 bytes and not 32 bytes
byte[] slicedHash = new byte[20];
Array.Copy(header.RawHash, 0, slicedHash, 0, 20);
BlobId headerIdentifier = new BlobId(slicedHash);
await using Stream hashStream = finalizedBufferedPayload.GetStream();
BlobId contentHash = await BlobId.FromStreamAsync(hashStream, cancellationToken);
if (!headerIdentifier.Equals(contentHash))
{
throw new Exception($"Payload was expected to be {headerIdentifier} but was {contentHash}");
}
}
return (finalizedBufferedPayload, new IoHash(header.RawHash));
}
catch
{
finalizedBufferedPayload?.Dispose();
throw;
}
}
private static int DecompressPayload(ReadOnlySpan<byte> compressedPayload, CompressedBufferHeader header, ulong rawBlockSize, Stream target)
{
switch (header.Method)
{
case CompressedBufferHeader.CompressionMethod.None:
target.Write(compressedPayload);
return compressedPayload.Length;
case CompressedBufferHeader.CompressionMethod.Oodle:
{
byte[] result = new byte[rawBlockSize];
long writtenBytes = Oodle.Decompress(compressedPayload, result);
if (writtenBytes == 0)
{
throw new Exception("Failed to run oodle decompress");
}
target.Write(result);
return (int)writtenBytes;
}
case CompressedBufferHeader.CompressionMethod.LZ4:
{
byte[] result = new byte[rawBlockSize];
int writtenBytes = LZ4Codec.Decode(compressedPayload, result);
target.Write(result);
return writtenBytes;
}
default:
throw new NotImplementedException($"Method {header.Method} is not a support value");
}
}
public IoHash CompressContent(Stream s, OoodleCompressorMethod method, OoodleCompressionLevel compressionLevel, byte[] rawContents)
{
const long DefaultBlockSize = 256 * 1024;
long blockSize = DefaultBlockSize;
long blockCount = (rawContents.LongLength + blockSize - 1) / blockSize;
Span<byte> contentsSpan = new Span<byte>(rawContents);
List<byte[]> blocks = new List<byte[]>();
for (int i = 0; i < blockCount; i++)
{
int rawBlockSize = Math.Min(rawContents.Length - (i * (int)blockSize), (int)blockSize);
Span<byte> bufferToCompress = contentsSpan.Slice((int)(i * blockSize), rawBlockSize);
blocks.Add(bufferToCompress.ToArray());
}
return CompressContent(s, method, compressionLevel, blocks, blockSize);
}
public IoHash CompressContent(Stream s, OoodleCompressorMethod method, OoodleCompressionLevel compressionLevel, List<byte[]> blocks, long blockSize)
{
OodleCompressorType oodleMethod = OodleUtils.ToOodleApiCompressor(method);
OodleCompressionLevel oodleLevel = OodleUtils.ToOodleApiCompressionLevel(compressionLevel);
long blockCount = blocks.Count;
byte blockSizeExponent = (byte)Math.Floor(Math.Log2(blockSize));
List<byte[]> compressedBlocks = new List<byte[]>();
using Hasher hasher = Hasher.New();
ulong uncompressedContentLength = (ulong)blocks.Sum(b => b.LongLength);
ulong compressedContentLength = CompressedBufferHeader.HeaderLength;
for (int i = 0; i < blockCount; i++)
{
int rawBlockSize = blocks[i].Length;
byte[] bufferToCompress = blocks[i];
hasher.UpdateWithJoin(new ReadOnlySpan<byte>(bufferToCompress, 0, rawBlockSize));
int maxSize = Oodle.MaximumOutputSize(oodleMethod, rawBlockSize);
byte[] compressedBlock = new byte[maxSize];
long encodedSize = Oodle.Compress(oodleMethod, bufferToCompress, compressedBlock, oodleLevel);
if (encodedSize == 0)
{
throw new Exception("Failed to compress content");
}
byte[] actualCompressedBlock = new byte[encodedSize];
Array.Copy(compressedBlock, actualCompressedBlock, encodedSize);
compressedBlocks.Add(actualCompressedBlock);
compressedContentLength += (ulong)encodedSize;
}
compressedContentLength += (uint)(sizeof(uint) * blockCount);
Hash blake3Hash = hasher.Finalize();
byte[] hashData = blake3Hash.AsSpan().Slice(0, 20).ToArray();
IoHash hash = new IoHash(hashData);
CompressedBufferHeader header = new CompressedBufferHeader
{
Magic = CompressedBufferHeader.ExpectedMagic,
Crc32 = 0,
Method = CompressedBufferHeader.CompressionMethod.Oodle,
CompressionLevel = (byte)compressionLevel,
CompressionMethodUsed = (byte)method,
BlockSizeExponent = blockSizeExponent,
BlockCount = (uint)blockCount,
TotalRawSize = (ulong)uncompressedContentLength,
TotalCompressedSize = (ulong)compressedContentLength,
RawHash = hashData
};
byte[] headerAndBlocks = WriteHeaderToBuffer(header, compressedBlocks.Select(b => (uint)b.Length).ToArray());
using BinaryWriter writer = new BinaryWriter(s, Encoding.Default, leaveOpen: true);
writer.Write(headerAndBlocks);
for (int i = 0; i < blockCount; i++)
{
writer.Write(compressedBlocks[i]);
}
return hash;
}
public static byte[] WriteHeaderToBuffer(CompressedBufferHeader header, uint[] compressedBlockLengths)
{
uint blockCount = header.BlockCount;
uint blocksByteUsed = blockCount * sizeof(uint);
byte[] headerBuffer = new byte[CompressedBufferHeader.HeaderLength + blocksByteUsed];
// write the compressed buffer, but with the wrong crc which we update and rewrite later
{
using MemoryStream ms = new MemoryStream(headerBuffer);
using BinaryWriter writer = new BinaryWriter(ms);
WriteHeader(header, writer);
for (int i = 0; i < blockCount; i++)
{
uint value = compressedBlockLengths[i];
if (BitConverter.IsLittleEndian)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
writer.Write(value);
}
}
// calculate the crc from the start of the method field (skipping magic which is a constant and the crc field itself)
const int MethodOffset = sizeof(uint) + sizeof(uint);
uint calculatedCrc = Crc32Algorithm.Compute(headerBuffer, MethodOffset, (int)(CompressedBufferHeader.HeaderLength - MethodOffset + blocksByteUsed));
header.Crc32 = calculatedCrc;
// write the header again now that we have the crc
{
using MemoryStream ms = new MemoryStream(headerBuffer);
using BinaryWriter writer = new BinaryWriter(ms);
WriteHeader(header, writer);
}
return headerBuffer;
}
}
public class InvalidHashException : Exception
{
public InvalidHashException(uint headerCrc32, uint calculatedCrc) : base($"Header specified crc \"{headerCrc32}\" but calculated hash was \"{calculatedCrc}\"")
{
}
}
public class InvalidMagicException : Exception
{
public InvalidMagicException(uint headerMagic, uint expectedMagic) : base($"Header magic \"{headerMagic}\" was incorrect, expected to be {expectedMagic}")
{
}
}
// from OodleDataCompression.h , we define our own enums for oodle compressions used and convert to the ones expected in the oodle api
#pragma warning disable CA1028 // Enum Storage should be Int32
public enum OoodleCompressorMethod : byte
{
NotSet = 0,
Selkie = 1,
Mermaid = 2,
Kraken = 3,
Leviathan = 4
}
public enum OoodleCompressionLevel : sbyte
{
HyperFast4 = -4,
HyperFast3 = -3,
HyperFast2 = -2,
HyperFast1 = -1,
None = 0,
SuperFast = 1,
VeryFast = 2,
Fast = 3,
Normal = 4,
Optimal1 = 5,
Optimal2 = 6,
Optimal3 = 7,
Optimal4 = 8,
}
#pragma warning restore CA1028 // Enum Storage should be Int32
public static class OodleUtils
{
public static OodleCompressionLevel ToOodleApiCompressionLevel(OoodleCompressionLevel compressionLevel)
{
switch (compressionLevel)
{
case OoodleCompressionLevel.HyperFast4:
return OodleCompressionLevel.HyperFast4;
case OoodleCompressionLevel.HyperFast3:
return OodleCompressionLevel.HyperFast3;
case OoodleCompressionLevel.HyperFast2:
return OodleCompressionLevel.HyperFast2;
case OoodleCompressionLevel.HyperFast1:
return OodleCompressionLevel.HyperFast1;
case OoodleCompressionLevel.None:
return OodleCompressionLevel.None;
case OoodleCompressionLevel.SuperFast:
return OodleCompressionLevel.SuperFast;
case OoodleCompressionLevel.VeryFast:
return OodleCompressionLevel.VeryFast;
case OoodleCompressionLevel.Fast:
return OodleCompressionLevel.Fast;
case OoodleCompressionLevel.Normal:
return OodleCompressionLevel.Normal;
case OoodleCompressionLevel.Optimal1:
return OodleCompressionLevel.Optimal1;
case OoodleCompressionLevel.Optimal2:
return OodleCompressionLevel.Optimal2;
case OoodleCompressionLevel.Optimal3:
return OodleCompressionLevel.Optimal3;
case OoodleCompressionLevel.Optimal4:
return OodleCompressionLevel.Optimal4;
default:
throw new ArgumentOutOfRangeException(nameof(compressionLevel), compressionLevel, null);
}
}
public static OodleCompressorType ToOodleApiCompressor(OoodleCompressorMethod compressor)
{
switch (compressor)
{
case OoodleCompressorMethod.NotSet:
return OodleCompressorType.None;
case OoodleCompressorMethod.Selkie:
return OodleCompressorType.Selkie;
case OoodleCompressorMethod.Mermaid:
return OodleCompressorType.Mermaid;
case OoodleCompressorMethod.Kraken:
return OodleCompressorType.Kraken;
case OoodleCompressorMethod.Leviathan:
return OodleCompressorType.Leviathan;
default:
throw new ArgumentOutOfRangeException(nameof(compressor), compressor, null);
}
}
}
}