// Copyright Epic Games, Inc. All Rights Reserved. #if (defined(__AUTORTFM) && __AUTORTFM) #include "WriteLog.h" #include "BuildMacros.h" #include "Utils.h" namespace AutoRTFM { #if defined(__x86_64__) || defined(_M_X64) #define WRITELOG_HASH_MAYBE_HAS_AVX2 1 #include #else #define WRITELOG_HASH_MAYBE_HAS_AVX2 0 #endif namespace { // If Data is aligned to Size, and Size is either 1, 2, 4 or 8 bytes then Hash // is modified and SmallHash() returns true, otherwise the call is a no-op and // SmallHash() returns false. UE_AUTORTFM_FORCEINLINE bool SmallHash(const std::byte* Data, size_t Size, FWriteLog::FHash& Hash) { switch (Size) { case 8: if (AUTORTFM_LIKELY((reinterpret_cast(Data) & 7) == 0)) { uint64_t Load = *reinterpret_cast(Data); Hash = (Hash * 31) ^ static_cast(Load); return true; } break; case 4: if (AUTORTFM_LIKELY((reinterpret_cast(Data) & 3) == 0)) { uint32_t Load = *reinterpret_cast(Data); Hash = (Hash * 31) ^ static_cast(Load); return true; } break; case 2: if (AUTORTFM_LIKELY((reinterpret_cast(Data) & 1) == 0)) { uint16_t Load = *reinterpret_cast(Data); Hash = (Hash * 31) ^ static_cast(Load); return true; } break; case 1: Hash = (Hash * 31) ^ static_cast(Data[0]); return true; } return false; } } FWriteLog::FHash FWriteLog::Hash(size_t NumWriteEntries) const { #if WRITELOG_HASH_MAYBE_HAS_AVX2 // __builtin_cpu_supports("avx2") produces a linker error. // As memory validation is an debug, opt-in feature, and most modern CPUs // support AVX2, assume for now we have support. constexpr bool bHasAVX2 = true /* __builtin_cpu_supports("avx2") */; if (bHasAVX2) { AUTORTFM_MUST_TAIL return HashAVX2(NumWriteEntries); } #endif size_t WriteIndex = 0; FWriteLog::FHash Hash = 0; for(auto Iter = begin(); Iter != end(); ++Iter, ++WriteIndex) { if (WriteIndex == NumWriteEntries) { break; } const FWriteLogEntry& Entry = *Iter; if (AUTORTFM_UNLIKELY(Entry.bNoMemoryValidation)) { continue; // Next write } const std::byte* Data = Entry.LogicalAddress; const size_t Size = Entry.Size; if (!SmallHash(Data, Size, Hash)) { for (size_t I = 0; I < Size; I++) { Hash = (Hash * 31) ^ static_cast(Data[I]); } } } return Hash; } #if WRITELOG_HASH_MAYBE_HAS_AVX2 __attribute__((__target__("avx2"))) __attribute__((no_sanitize("address"))) // Intentionally reading whole-vectors, which go beyond write bounds. FWriteLog::FHash FWriteLog::HashAVX2(size_t NumWriteEntries) const { size_t WriteIndex = 0; FWriteLog::FHash Hash = 0; using i8x32 = __m256i; const i8x32 Vec0To31 = _mm256_setr_epi8( // [0..31] 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f); const i8x32 Vec1To32 = _mm256_setr_epi8( // [1..32] 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20); // We use streaming vector intrinsics for the hash. // Flush once at the start to ensure all writes are visible. _mm_sfence(); i8x32 VecHash{}; for(auto Iter = begin(); Iter != end(); ++Iter, ++WriteIndex) { if (WriteIndex == NumWriteEntries) { break; } const FWriteLogEntry& Entry = *Iter; if (AUTORTFM_UNLIKELY(Entry.bNoMemoryValidation)) { continue; // Next write } const std::byte* Data = Entry.LogicalAddress; size_t Size = Entry.Size; if (SmallHash(Data, Size, Hash)) { continue; // Next write } if (const uintptr_t NumAlignmentBytes = reinterpret_cast(Data) & 31; NumAlignmentBytes != 0) { // Data is not 32-byte aligned. // Perform a vector load at the aligned-down address and mask the // bytes that we're interested in. const char MaskStart = static_cast(NumAlignmentBytes); const char MaskEnd = static_cast(std::min(NumAlignmentBytes + Size, 127)); const i8x32 VecMaskStart = _mm256_cmpgt_epi8(Vec1To32, _mm256_set1_epi8(MaskStart)); const i8x32 VecMaskEnd = _mm256_cmpgt_epi8(_mm256_set1_epi8(MaskEnd), Vec0To31); const i8x32 Mask = _mm256_and_si256(VecMaskStart, VecMaskEnd); const i8x32 Load = _mm256_stream_load_si256(Data - NumAlignmentBytes); const i8x32 MaskedLoad = _mm256_and_si256(Load, Mask); VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ MaskedLoad; const size_t NumBytesConsumed = std::min(32 - NumAlignmentBytes, Size); Data += NumBytesConsumed; Size -= NumBytesConsumed; } // Data is now 32-byte aligned, so we can hash in whole vectors. while (Size >= 32) { const i8x32 Load = _mm256_stream_load_si256(Data); VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ Load; Data += 32; Size -= 32; } // Any trailing bytes require more masking. if (Size > 0) { const i8x32 VecSize = _mm256_set1_epi8(static_cast(Size)); const i8x32 Mask = _mm256_cmpgt_epi8(VecSize, Vec0To31); const i8x32 Load = _mm256_stream_load_si256(Data); const i8x32 MaskedLoad = _mm256_and_si256(Load, Mask); VecHash = VecHash ^ _mm256_bslli_epi128(VecHash, 5) ^ MaskedLoad; } } Hash = (Hash * 31) ^ __builtin_reduce_xor(VecHash); return Hash; } #endif // WRITELOG_HASH_MAYBE_HAS_AVX2 #undef WRITELOG_HASH_MAYBE_HAS_AVX2 } #endif // (defined(__AUTORTFM) && __AUTORTFM)