Files
UnrealEngine/Engine/Plugins/TextureGraph/Source/TextureGraphEngine/Helper/DataUtil.cpp
2025-05-18 13:04:45 +08:00

432 lines
12 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "DataUtil.h"
#include "Async/ParallelFor.h"
#include "CoreTypes.h"
#include "Data/Blobber.h"
#include "Helper/Promise.h"
#include "TextureGraphEngine.h"
#include "Util.h"
DEFINE_LOG_CATEGORY(LogData);
//////////////////////////////////////////////////////////////////////////
/// Hash
//////////////////////////////////////////////////////////////////////////
HashType DataUtil::Hash_One(const uint8* Data, size_t Length, HashType InitialValue /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
/// Most common conditions go first ...
if (Length > sizeof(HashType))
{
check(Length >= sizeof(HashType));
/// Calculate the number of iterations
size_t IterLength = ((size_t)(Length / sizeof(HashType))) * sizeof(HashType);
HashType HashValue = InitialValue;
for (size_t i = 0; i < IterLength; i += sizeof(HashType))
HashValue = MX_HASH_VAL(HashValue, Prime, *((HashType*)(Data + i)));
/// Now iterate the residual Length
for (size_t i = IterLength; i < Length; i++)
HashValue = MX_HASH_VAL(HashValue, Prime, (HashType)(*(Data + i)));
return HashValue;
}
else
{
check(Length <= sizeof(HashType));
HashType H = 0;
memcpy(&H, Data, Length);
return MX_HASH_VAL(InitialValue, Prime, H);
}
}
HashType DataUtil::Hash_GenericString_Name(const FString& Name, HashType InitialHash /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
// Rely on the unreal HashString feature
// The HashValue produces a 32bit key, so offset to spread the keys across the full 64 bits range
// TODO: Create a true StrCrc64 version ?
return ((HashType) FCrc::StrCrc32<TCHAR>(*Name, InitialHash)) << 32;
}
HashType DataUtil::Hash(const uint8* Data, size_t Length, HashType InitialHash /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
HashType HashValue;
/// Decide whether to chunk the HashValue or not [currently we don't allow chuking anything that isn't rounded to chunk size].
/// It should be easy to remove this constraint later on!
if (Length <= GMaxChunk * 2 || Length % GMaxChunk != 0)
HashValue = Hash_One(Data, Length, InitialHash, Prime);
else
HashValue = Hash_Chunked(Data, Length, InitialHash, Prime);
return HashValue;
}
HashType DataUtil::Hash_Chunked(const uint8* Data, size_t Length, HashType InitialValue /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
/// Upto 2 chunks we calculate in one go. This also avoids the division further down
size_t MaxChunks = Length / DataUtil::GMaxChunk;
check(MaxChunks > 1);
std::vector<HashType> results(MaxChunks);
ParallelFor(MaxChunks, [&](int32 Index)
{
HashType HashValue = Hash_One(Data + Index * GMaxChunk, GMaxChunk, InitialValue, Prime);
results[Index] = HashValue;
});
/// Now we combine all the HashValues together
return Hash_One((const uint8*)&results[0], MaxChunks * sizeof(HashType), InitialValue, Prime);
}
size_t DataUtil::GetOptimalHashingSize(size_t Size)
{
size_t RoundedDataLength = Size;
/// Round it up
size_t Remainder = RoundedDataLength % sizeof(HashType);
/// We need to pad the src data so that it can be hashed efficiently
if (Remainder != 0)
{
RoundedDataLength = Size + sizeof(HashType) - Remainder;
}
/// If we have large data then make sure it's aligned to GMaxChunk for efficient hashing
if (RoundedDataLength > DataUtil::GMaxChunk)
{
size_t ChunkRemainder = RoundedDataLength % DataUtil::GMaxChunk;
if (ChunkRemainder != 0)
{
RoundedDataLength = RoundedDataLength + DataUtil::GMaxChunk - ChunkRemainder;
}
}
return RoundedDataLength;
}
HashType DataUtil::Hash(const HashTypeVec& SubHashes, HashType InitialValue /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
check(SubHashes.size());
if (SubHashes.size() == 1)
return SubHashes[0];
const uint8* Data = (const uint8*)&SubHashes[0];
size_t Length = SubHashes.size() * sizeof(HashType);
return Hash_One(Data, Length, InitialValue, Prime);
}
HashType DataUtil::Hash(const CHashPtrVec& InSubHashes, HashType InitialValue /* = GFNVInit */, HashType Prime /* = GFNVPrime */)
{
HashTypeVec SubHashes(InSubHashes.size());
for (size_t HashIndex = 0; HashIndex < InSubHashes.size(); HashIndex++)
SubHashes[HashIndex] = InSubHashes[HashIndex]->Value();
return Hash(SubHashes, InitialValue, Prime);
}
//////////////////////////////////////////////////////////////////////////
CHash::CHash(HashType Value, bool bInIsFinal) : bIsFinal(bInIsFinal), Timestamp(FDateTime::Now())
{
if (!bIsFinal)
TempHashValue = std::make_shared<CHash>(Value, true);
else
HashValue = Value;
}
CHash::CHash(const CHashPtrVec& Sources) : Timestamp(FDateTime::Now())
{
check(Sources.size() > 0);
bIsFinal = true;
HashTypeVec HashValues(Sources.size());
for (size_t si = 0; si < Sources.size(); si++)
{
HashValues[si] = Sources[si]->Value();
bIsFinal &= Sources[si]->IsFinal();
}
HashType FinalHash = DataUtil::Hash(HashValues);
if (!bIsFinal)
{
/// IMPORTANT: The reason why we want to send isFinal = true in the ctor and then
/// set it to false later on is because we don't want the temp has to create another
/// temp HashValue internally, which is what this ctor used below does. We want to
/// make _temp, THE temp HashValue object and then set _isFinal to false so that the
/// Svc_TempHash is able to resolve it later on
TempHashValue = std::make_shared<CHash>(FinalHash, true);
TempHashValue->HashSources = Sources;
TempHashValue->bIsFinal = false;
}
else
{
HashSources = Sources;
HashValue = FinalHash;
}
}
CHashPtr CHash::ConstructFromSources(const CHashPtrVec& Sources)
{
CHashPtr HashValue = CHashPtr(new CHash(Sources));
if (HashValue->TempHashValue)
{
for (CHashPtr source : HashValue->TempHashValue->HashSources)
source->AddLink(HashValue);
}
return HashValue;
}
CHash::CHash(CHashPtr Temp)
: bIsFinal(false)
, TempHashValue(!Temp->IsTemp() ? Temp : Temp->TempHashValue)
, Timestamp(Util::Time())
{
check(TempHashValue->HashValue != DataUtil::GNullHash);
}
bool CHash::operator == (const CHash& RHS) const
{
return Value() == RHS.Value();
}
void CHash::CheckLinkCycles(std::unordered_set<CHashPtr>& Chain)
{
for (CHashPtrW link_ : Linked)
{
CHashPtr Link = link_.lock();
if (Link)
{
check(Chain.find(Link) == Chain.end());
Chain.insert(Link);
Link->CheckLinkCycles(Chain);
}
}
}
void CHash::AddLink(CHashPtrW Link)
{
/// If this HashValue is already final then we don't need to add any links
/// that need resolution later on
if (IsFinal())
return;
for (auto Iter : Linked)
{
if (Iter.lock() == Link.lock())
return;
}
Linked.push_back(Link);
}
bool CHash::TryFinalise(HashType FinalHash /* = DataUtil::s_nullHash */, bool UpdateBlobber /* = true */)
{
if (TextureGraphEngine::IsDestroying())
return false;
check(IsInGameThread());
if (TempHashValue != nullptr)
{
/// If there's a temp HashValue then the current HashValue must be null
check(HashValue == DataUtil::GNullHash);
/// Save the old Value of the temp HashValue
HashType OldHash = TempHashValue->HashValue;
IntermediateHashes = TempHashValue->GetIntermediateHashes();
bool bDidUpdate = TempHashValue->TryFinalise(FinalHash, false);
if (bDidUpdate)
{
if (TempHashValue->IsFinal())
{
bIsFinal = true;
HashValue = TempHashValue->HashValue;
UpdateLinks();
}
if (HashValue != DataUtil::GNullHash && OldHash != HashValue && UpdateBlobber)
{
CHashPtr ThisHash = shared_from_this();
IntermediateHashes.push_back(OldHash);
/// Now we try to update the mapping in blobber
for (HashType IntermediateHash : IntermediateHashes)
TextureGraphEngine::GetBlobber()->UpdateHash(IntermediateHash, ThisHash);
}
return true;
}
return bDidUpdate;
}
/// Cannot finalise a HashValue that's already been finalised!
bool bShouldUpdate = false;
bool bDidUpdate = false;
HashType CurrentHash = HashValue;
bool bIsTemp = IsTemp();
if (!HashSources.empty())
{
HashTypeVec Sources(HashSources.size());
bool bIsFinalHash = true;
for (size_t HashIndex = 0; HashIndex < HashSources.size(); HashIndex++)
{
/// Check whether the source HashValue has been finalised or not
bIsFinalHash &= HashSources[HashIndex]->IsFinal();
Sources[HashIndex] = HashSources[HashIndex]->Value();
if (HashSources[HashIndex]->GetTimestamp() > Timestamp)
bShouldUpdate = true;
}
if (bShouldUpdate || (bIsFinalHash != bIsFinal))
{
/// Save the current HashValue as we'll need to update the mapping in blobber
HashType OldHash = HashValue;
if (FinalHash != 0)
HashValue = FinalHash;
else
HashValue = DataUtil::Hash(Sources);
if (OldHash != HashValue && UpdateBlobber)
{
/// Now we try to update the mapping in blobber
TextureGraphEngine::GetBlobber()->UpdateHash(OldHash, shared_from_this());
}
bDidUpdate = true;
Timestamp = FDateTime::Now();
if (bIsFinalHash)
{
bIsFinal = true;
TempHashValue = nullptr;
}
}
}
else
{
bIsFinal = true;
TempHashValue = nullptr;
/// Explicitly set to true so that return Value doesn't change if someone changes
/// the initialisation of shouldUpdate
bShouldUpdate = true;
}
if (bDidUpdate)
{
if (!bIsTemp && CurrentHash != HashValue)
IntermediateHashes.push_back(CurrentHash);
UpdateLinks();
}
return bDidUpdate;
}
void CHash::UpdateLinks()
{
if (TextureGraphEngine::IsDestroying())
return;
if (Linked.empty())
return;
check(IsInGameThread());
auto ThisHash = shared_from_this();
for (CHashPtrW& Iter : Linked)
{
CHashPtr Link = Iter.lock();
if (Link)
{
/// Call the linked to update itself
Link->HandleLinkUpdated(ThisHash);
}
}
/// Ok, the links should've updated over here and We can clear our linked list now
/// if the hash is final, as they'll never be used after this.
if (IsFinal())
Linked.clear();
}
HashTypeVec CHash::GetIntermediateHashes() const
{
if (!TempHashValue)
return IntermediateHashes;
HashTypeVec CombinedIntermediateHashes = IntermediateHashes;
CombinedIntermediateHashes.insert(CombinedIntermediateHashes.end(), TempHashValue->IntermediateHashes.begin(), TempHashValue->IntermediateHashes.end());
return CombinedIntermediateHashes;
}
CHashPtr CHash::UpdateHash(CHashPtr NewHash, CHashPtr PrevHash)
{
if (PrevHash)
{
check(NewHash != PrevHash);
HashType PrevHashValue = PrevHash->Value();
NewHash->IntermediateHashes = PrevHash->GetIntermediateHashes();
NewHash->IntermediateHashes.push_back(PrevHashValue);
/// Copy some Data over from the previous HashValue
NewHash->Linked = PrevHash->Linked;
if (PrevHash->TempHashValue && !PrevHash->TempHashValue->Linked.empty())
NewHash->Linked.insert(NewHash->Linked.end(), PrevHash->TempHashValue->Linked.begin(), PrevHash->TempHashValue->Linked.end());
NewHash->HashSources = PrevHash->Sources();
/// Replace the Data from previous HashValue so that other HashValues that have linked
/// that particular pointer can see the latest updates as well
//*prevHash.get() = *newHash.get()
PrevHash->TempHashValue = nullptr;
PrevHash->bIsFinal = true;
PrevHash->HashValue = NewHash->HashValue;
PrevHash->Timestamp = NewHash->Timestamp;
if (PrevHashValue != PrevHash->Value() && !TextureGraphEngine::IsDestroying() && TextureGraphEngine::GetBlobber())
{
for (HashType IntermediateHash : NewHash->IntermediateHashes)
TextureGraphEngine::GetBlobber()->UpdateHash(IntermediateHash, NewHash);
NewHash->UpdateLinks();
}
}
return NewHash;
}
void CHash::HandleLinkUpdated(CHashPtr LinkUpdated)
{
/// If this has already been finalised then we don't need to do anything. This can happen
/// if the BlobHasher service got to a blob before this Link got updated
if (IsFinal())
return;
/// Recalculate the new HashValue. This will also propagate to the
/// other HashValues linked to this one
TryFinalise();
}