209 lines
5.6 KiB
C++
209 lines
5.6 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#if (defined(__AUTORTFM) && __AUTORTFM)
|
|
|
|
#include "WriteLog.h"
|
|
|
|
#include "BuildMacros.h"
|
|
#include "Utils.h"
|
|
|
|
namespace AutoRTFM
|
|
{
|
|
|
|
#if defined(__x86_64__) || defined(_M_X64)
|
|
#define WRITELOG_HASH_MAYBE_HAS_AVX2 1
|
|
#include <immintrin.h>
|
|
#else
|
|
#define WRITELOG_HASH_MAYBE_HAS_AVX2 0
|
|
#endif
|
|
|
|
namespace
|
|
{
|
|
|
|
// If Data is aligned to Size, and Size is either 1, 2, 4 or 8 bytes then Hash
|
|
// is modified and SmallHash() returns true, otherwise the call is a no-op and
|
|
// SmallHash() returns false.
|
|
UE_AUTORTFM_FORCEINLINE
|
|
bool SmallHash(const std::byte* Data, size_t Size, FWriteLog::FHash& Hash)
|
|
{
|
|
switch (Size)
|
|
{
|
|
case 8:
|
|
if (AUTORTFM_LIKELY((reinterpret_cast<uintptr_t>(Data) & 7) == 0))
|
|
{
|
|
uint64_t Load = *reinterpret_cast<uint64_t const*>(Data);
|
|
Hash = (Hash * 31) ^ static_cast<FWriteLog::FHash>(Load);
|
|
return true;
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
if (AUTORTFM_LIKELY((reinterpret_cast<uintptr_t>(Data) & 3) == 0))
|
|
{
|
|
uint32_t Load = *reinterpret_cast<uint32_t const*>(Data);
|
|
Hash = (Hash * 31) ^ static_cast<FWriteLog::FHash>(Load);
|
|
return true;
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
if (AUTORTFM_LIKELY((reinterpret_cast<uintptr_t>(Data) & 1) == 0))
|
|
{
|
|
uint16_t Load = *reinterpret_cast<uint16_t const*>(Data);
|
|
Hash = (Hash * 31) ^ static_cast<FWriteLog::FHash>(Load);
|
|
return true;
|
|
}
|
|
break;
|
|
|
|
case 1:
|
|
Hash = (Hash * 31) ^ static_cast<FWriteLog::FHash>(Data[0]);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
FWriteLog::FHash FWriteLog::Hash(size_t NumWriteEntries) const
|
|
{
|
|
#if WRITELOG_HASH_MAYBE_HAS_AVX2
|
|
// __builtin_cpu_supports("avx2") produces a linker error.
|
|
// As memory validation is an debug, opt-in feature, and most modern CPUs
|
|
// support AVX2, assume for now we have support.
|
|
constexpr bool bHasAVX2 = true /* __builtin_cpu_supports("avx2") */;
|
|
if (bHasAVX2)
|
|
{
|
|
AUTORTFM_MUST_TAIL return HashAVX2(NumWriteEntries);
|
|
}
|
|
#endif
|
|
|
|
size_t WriteIndex = 0;
|
|
|
|
FWriteLog::FHash Hash = 0;
|
|
|
|
for(auto Iter = begin(); Iter != end(); ++Iter, ++WriteIndex)
|
|
{
|
|
if (WriteIndex == NumWriteEntries)
|
|
{
|
|
break;
|
|
}
|
|
|
|
const FWriteLogEntry& Entry = *Iter;
|
|
if (AUTORTFM_UNLIKELY(Entry.bNoMemoryValidation))
|
|
{
|
|
continue; // Next write
|
|
}
|
|
|
|
const std::byte* Data = Entry.LogicalAddress;
|
|
const size_t Size = Entry.Size;
|
|
|
|
if (!SmallHash(Data, Size, Hash))
|
|
{
|
|
for (size_t I = 0; I < Size; I++)
|
|
{
|
|
Hash = (Hash * 31) ^ static_cast<FWriteLog::FHash>(Data[I]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return Hash;
|
|
}
|
|
|
|
#if WRITELOG_HASH_MAYBE_HAS_AVX2
|
|
__attribute__((__target__("avx2")))
|
|
__attribute__((no_sanitize("address"))) // Intentionally reading whole-vectors, which go beyond write bounds.
|
|
FWriteLog::FHash FWriteLog::HashAVX2(size_t NumWriteEntries) const
|
|
{
|
|
size_t WriteIndex = 0;
|
|
|
|
FWriteLog::FHash Hash = 0;
|
|
|
|
using i8x32 = __m256i;
|
|
const i8x32 Vec0To31 = _mm256_setr_epi8( // [0..31]
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f);
|
|
const i8x32 Vec1To32 = _mm256_setr_epi8( // [1..32]
|
|
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
|
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
|
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
|
0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20);
|
|
|
|
// We use streaming vector intrinsics for the hash.
|
|
// Flush once at the start to ensure all writes are visible.
|
|
_mm_sfence();
|
|
|
|
i8x32 VecHash{};
|
|
|
|
for(auto Iter = begin(); Iter != end(); ++Iter, ++WriteIndex)
|
|
{
|
|
if (WriteIndex == NumWriteEntries)
|
|
{
|
|
break;
|
|
}
|
|
|
|
const FWriteLogEntry& Entry = *Iter;
|
|
if (AUTORTFM_UNLIKELY(Entry.bNoMemoryValidation))
|
|
{
|
|
continue; // Next write
|
|
}
|
|
|
|
const std::byte* Data = Entry.LogicalAddress;
|
|
size_t Size = Entry.Size;
|
|
|
|
if (SmallHash(Data, Size, Hash))
|
|
{
|
|
continue; // Next write
|
|
}
|
|
|
|
if (const uintptr_t NumAlignmentBytes = reinterpret_cast<uintptr_t>(Data) & 31; NumAlignmentBytes != 0)
|
|
{
|
|
// Data is not 32-byte aligned.
|
|
// Perform a vector load at the aligned-down address and mask the
|
|
// bytes that we're interested in.
|
|
const char MaskStart = static_cast<char>(NumAlignmentBytes);
|
|
const char MaskEnd = static_cast<char>(std::min<size_t>(NumAlignmentBytes + Size, 127));
|
|
const i8x32 VecMaskStart = _mm256_cmpgt_epi8(Vec1To32, _mm256_set1_epi8(MaskStart));
|
|
const i8x32 VecMaskEnd = _mm256_cmpgt_epi8(_mm256_set1_epi8(MaskEnd), Vec0To31);
|
|
const i8x32 Mask = _mm256_and_si256(VecMaskStart, VecMaskEnd);
|
|
const i8x32 Load = _mm256_stream_load_si256(Data - NumAlignmentBytes);
|
|
const i8x32 MaskedLoad = _mm256_and_si256(Load, Mask);
|
|
VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ MaskedLoad;
|
|
const size_t NumBytesConsumed = std::min(32 - NumAlignmentBytes, Size);
|
|
Data += NumBytesConsumed;
|
|
Size -= NumBytesConsumed;
|
|
}
|
|
|
|
// Data is now 32-byte aligned, so we can hash in whole vectors.
|
|
while (Size >= 32)
|
|
{
|
|
const i8x32 Load = _mm256_stream_load_si256(Data);
|
|
VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ Load;
|
|
Data += 32;
|
|
Size -= 32;
|
|
}
|
|
|
|
// Any trailing bytes require more masking.
|
|
if (Size > 0)
|
|
{
|
|
const i8x32 VecSize = _mm256_set1_epi8(static_cast<char>(Size));
|
|
const i8x32 Mask = _mm256_cmpgt_epi8(VecSize, Vec0To31);
|
|
const i8x32 Load = _mm256_stream_load_si256(Data);
|
|
const i8x32 MaskedLoad = _mm256_and_si256(Load, Mask);
|
|
VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ MaskedLoad;
|
|
}
|
|
}
|
|
|
|
Hash = (Hash * 31) ^ __builtin_reduce_xor(VecHash);
|
|
|
|
return Hash;
|
|
}
|
|
#endif // WRITELOG_HASH_MAYBE_HAS_AVX2
|
|
|
|
#undef WRITELOG_HASH_MAYBE_HAS_AVX2
|
|
|
|
}
|
|
|
|
#endif // (defined(__AUTORTFM) && __AUTORTFM)
|