Files
UnrealEngine/Engine/Source/Programs/Unsync/Private/UnsyncHash.h
2025-05-18 13:04:45 +08:00

390 lines
7.8 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "UnsyncLog.h"
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <string>
namespace unsync {
class FBuffer;
struct FBufferView;
enum class EStrongHashAlgorithmID : uint64;
template<size_t SizeBytes>
struct THashValue
{
alignas(uint32) uint8 Data[SizeBytes];
bool operator==(const THashValue& Other) const { return !memcmp(Data, Other.Data, SizeBytes); }
bool operator!=(const THashValue& Other) const { return !(*this == Other); }
struct Hasher
{
uint32 operator()(const THashValue& Hash) const
{
uint32 Result;
memcpy(&Result, Hash.Data, 4);
return Result;
}
};
static constexpr size_t Size() { return SizeBytes; }
};
using FHash128 = THashValue<16>;
using FHash160 = THashValue<20>;
using FHash256 = THashValue<32>;
enum class EHashType : uint8 {
Unknown,
Unknown_128,
Unknown_160,
Unknown_256,
MD5, // Standard 128 bit MD5
Blake3_128, // Blake3 hash, truncated to 128 bits
Blake3_160, // Blake3 hash, truncated to 160 bits, AKA Unreal Engine IoHash
Blake3_256, // Blake3 hash, full 256 bits
};
inline size_t
GetHashSize(EHashType Type)
{
switch (Type)
{
default:
case EHashType::Unknown:
return 0;
case EHashType::Unknown_128:
return 16;
case EHashType::Unknown_160:
return 20;
case EHashType::Unknown_256:
return 32;
case EHashType::MD5:
return 16;
case EHashType::Blake3_128:
return 16;
case EHashType::Blake3_160:
return 20;
case EHashType::Blake3_256:
return 32;
}
}
EHashType ToHashType(EStrongHashAlgorithmID StrongHasher);
// Generic hash type intended for runtime uses only.
// Serialized formats should always use Hash128/160/256.
struct FGenericHash
{
alignas(uint32) uint8 Data[32];
EHashType Type = EHashType::Unknown;
size_t Size() const { return GetHashSize(Type); }
bool operator==(const FGenericHash& Other) const { return !memcmp(Data, Other.Data, Size()) && Type == Other.Type; }
bool operator!=(const FGenericHash& Other) const { return !(*this == Other); }
static FGenericHash FromHash128(const FHash128& Other, EHashType Type)
{
UNSYNC_ASSERT(GetHashSize(Type) == 16); // #wip-widehash
FGenericHash Result = {};
Result.Type = Type;
memcpy(Result.Data, Other.Data, 16);
return Result;
}
static FGenericHash FromMd5(const FHash128& Other) { return FromHash128(Other, EHashType::MD5); }
static FGenericHash FromBlake3_128(const FHash128& Other) // NOLINT
{
return FromHash128(Other, EHashType::Blake3_128);
}
static FGenericHash FromBlake3_160(const FHash160& Other) // NOLINT
{
FGenericHash Result = {};
Result.Type = EHashType::Blake3_160;
memcpy(Result.Data, Other.Data, 20);
return Result;
}
static FGenericHash FromBlake3_256(const FHash256& Other) // NOLINT
{
FGenericHash Result = {};
Result.Type = EHashType::Blake3_256;
memcpy(Result.Data, Other.Data, 32);
return Result;
}
FHash128 ToHash128() const
{
FHash128 Result = {};
memcpy(Result.Data, Data, std::min(Size(), sizeof(Result)));
return Result;
}
FHash160 ToHash160() const
{
FHash160 Result = {};
memcpy(Result.Data, Data, std::min(Size(), sizeof(Result)));
return Result;
}
struct Hasher
{
uint32 operator()(const FGenericHash& Hash) const
{
uint32 Result;
memcpy(&Result, Hash.Data, sizeof(Result));
return Result;
}
};
};
bool ParseHashFromHexString(EStrongHashAlgorithmID StrongHasher, std::string_view HexString, FGenericHash& Output);
inline FHash160
ToHash160(const FHash256& Other)
{
FHash160 Result;
memcpy(Result.Data, Other.Data, Result.Size());
return Result;
}
inline FHash128
ToHash128(const FHash256& Other)
{
FHash128 Result;
memcpy(Result.Data, Other.Data, Result.Size());
return Result;
}
inline FHash128
ToHash128(const FHash160& Other)
{
FHash128 Result;
memcpy(Result.Data, Other.Data, Result.Size());
return Result;
}
inline uint32
MurmurMix(uint32 X)
{
X ^= X >> 16;
X *= 0x85ebca6b;
X ^= X >> 13;
X *= 0xc2b2ae35;
X ^= X >> 16;
return X;
}
struct FRollingChecksum
{
uint16 A = 0;
uint16 B = 0;
uint64 Count = 0;
void Update(const uint8* Data, uint64 Size)
{
for (uint64 I = 0; I < Size; ++I)
{
Add(Data[I]);
}
}
void Reset() { *this = FRollingChecksum(); }
uint32 Get() const { return uint32(A) | uint32(B) << 16; }
void Add(uint8 X)
{
uint16 X2 = X + 31;
A += X2;
B += A;
Count++;
}
void Sub(uint8 X)
{
uint16 X2 = X + 31;
A -= X2;
B -= uint16(Count * X2);
Count--;
}
};
struct FBuzHash
{
static const uint32 TABLE[256];
static inline uint32 Rol32(uint32 V, uint32 N)
{
N &= 31;
return ((V) << (N)) | ((V) >> (32 - N));
}
uint64 Count = 0;
uint32 State = 0;
void Update(const uint8* Data, uint64 Size)
{
for (uint64 I = 0; I < Size; ++I)
{
Add(Data[I]);
}
}
void Reset() { *this = FBuzHash(); }
uint32 Get() const { return State; }
void Add(uint8 X)
{
State = Rol32(State, 1) ^ TABLE[X];
Count++;
}
void Sub(uint8 X)
{
State = State ^ Rol32(TABLE[X], uint32(Count - 1));
Count--;
}
};
template<typename HashResultType>
HashResultType HashBlake3Bytes(const uint8* Data, uint64 Size);
template<typename HashResultType>
HashResultType
HashBlake3String(std::string_view Str)
{
return HashBlake3Bytes<HashResultType>((const uint8*)Str.data(), Str.length());
}
template<typename HashResultType>
HashResultType
HashBlake3String(const char* Str)
{
return HashBlake3Bytes<HashResultType>((const uint8*)Str, strlen(Str));
}
template<typename HashResultType>
HashResultType
HashBlake3String(const wchar_t* Str)
{
return HashBlake3Bytes<HashResultType>((const uint8*)Str, wcslen(Str) * sizeof(*Str));
}
template<typename HashResultType>
HashResultType
HashBlake3String(const std::string& Str)
{
return HashBlake3Bytes<HashResultType>((const uint8*)Str.c_str(), Str.length());
}
template<typename HashResultType>
HashResultType
HashBlake3String(const std::wstring& Str)
{
return HashBlake3Bytes<HashResultType>((const uint8*)Str.c_str(), Str.length() * sizeof(Str[0]));
}
class alignas(8) FBlake3Hasher
{
public:
FBlake3Hasher();
void Update(const void* Data, size_t Size);
FHash256 Finalize();
void Reset();
private:
static constexpr size_t STATE_SIZE = 1912;
uint8 State[STATE_SIZE];
};
class alignas(8) FMD5Hasher
{
public:
FMD5Hasher();
void Update(const void* Data, size_t Size);
FHash128 Finalize();
void Reset();
private:
static constexpr size_t STATE_SIZE = 88;
uint8 State[STATE_SIZE];
};
class FStrongHasher
{
public:
FStrongHasher(EStrongHashAlgorithmID InAlgorithm) : Algorithm(InAlgorithm) {}
void Reset();
void Update(const void* Data, uint64 Size);
FGenericHash Finalize();
private:
const EStrongHashAlgorithmID Algorithm;
FBlake3Hasher Blake3;
FMD5Hasher MD5;
};
FHash128 HashMd5Bytes(const uint8* Data, uint64 Size);
FGenericHash HashBytes(const uint8* Data, uint64 Size, EHashType HashType);
FHash128 ComputeHash128(const uint8* Data, uint64 Size, EStrongHashAlgorithmID Algorithm);
FGenericHash ComputeHash(const uint8* Data, uint64 Size, EStrongHashAlgorithmID Algorithm);
FGenericHash ComputeHash(const FBuffer& Buffer, EStrongHashAlgorithmID Algorithm);
FGenericHash ComputeHash(const FBufferView& Buffer, EStrongHashAlgorithmID Algorithm);
inline FHash128
ToHash128(const FHash128& X)
{
return X;
}
inline FHash128
ToHash128(const FGenericHash& X)
{
return X.ToHash128();
}
} // namespace unsync
namespace std {
template<size_t SizeBytes>
struct hash<unsync::THashValue<SizeBytes>>
{
size_t operator()(const unsync::THashValue<SizeBytes>& Hash) const
{
typename unsync::THashValue<SizeBytes>::Hasher Hasher;
return (size_t)Hasher(Hash);
}
};
template<>
struct hash<unsync::FGenericHash>
{
size_t operator()(const unsync::FGenericHash& Hash) const
{
size_t Result;
memcpy(&Result, Hash.Data, sizeof(Result));
return Result;
}
};
} // namespace std