Files
UnrealEngine/Engine/Source/Runtime/Online/BuildPatchServices/Private/Generation/DeltaEnumeration.cpp
2025-05-18 13:04:45 +08:00

324 lines
13 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "Generation/DeltaEnumeration.h"
#include "Core/BlockStructure.h"
#include "Core/ProcessTimer.h"
#include "Generation/BuildStreamer.h"
#include "Generation/ChunkSearch.h"
#include "BuildPatchHash.h"
DECLARE_LOG_CATEGORY_EXTERN(LogDeltaEnumeration, Log, All);
DEFINE_LOG_CATEGORY(LogDeltaEnumeration);
namespace DeltaChunkEnumerationHelpers
{
uint64 GetPaddingChunkHash(uint8 Byte, uint32 WindowSize)
{
TArray <uint8> TempData;
TempData.SetNumUninitialized(WindowSize);
FMemory::Memset(TempData.GetData(), Byte, WindowSize);
return FRollingHash::GetHashForDataSet(TempData.GetData(), WindowSize);
}
FSHAHash GetPaddingChunkSha(uint8 Byte, uint32 WindowSize)
{
FSHAHash SHAHash;
TArray <uint8> TempData;
TempData.SetNumUninitialized(WindowSize);
FMemory::Memset(TempData.GetData(), Byte, WindowSize);
FSHA1::HashBuffer(TempData.GetData(), WindowSize, SHAHash.Hash);
return SHAHash;
}
}
namespace BuildPatchServices
{
class FDeltaChunkEnumeration
: public IDeltaChunkEnumeration
{
public:
FDeltaChunkEnumeration(IManifestBuildStreamer* Streamer, FStatsCollector* StatsCollector, const FBuildPatchAppManifest& Manifest, const uint32 WindowSize);
~FDeltaChunkEnumeration();
// FDeltaChunkEnumeration interface begin.
virtual void Run() override;
virtual bool IsComplete() const override;
virtual const TMap<uint64, TSet<FDeltaChunkId>>& GetChunkInventory() const override;
virtual const TMap<FDeltaChunkId, FShaId>& GetChunkShaHashes() const override;
virtual const TMap<FDeltaChunkId, FChunkBuildReference>& GetChunkBuildReferences() const override;
virtual const TMap<FShaId, TSet<FDeltaChunkId>>& GetIdenticalChunks() const override;
virtual const uint64& GetChunkHash(const FDeltaChunkId& ChunkId) const override;
virtual const FSHAHash& GetChunkShaHash(const FDeltaChunkId& ChunkId) const override;
virtual FFilenameId MakeFilenameId(const FString& Filename) override;
virtual FShaId MakeShaId(const FSHAHash& SHAHash) override;
virtual FFilenameId GetFilenameId(const FString& Filename) const override;
virtual FShaId GetShaId(const FSHAHash& SHAHash) const override;
virtual const FString& GetFilename(const FFilenameId& Filename) const override;
virtual const FSHAHash& GetSha(const FShaId& SHAHash) const override;
// FDeltaChunkEnumeration interface end.
private:
void MakeChunk(const TArray<uint8>& StreamBuffer, const int32& StreamBufferPosition, const FBlockStructure& StreamBuildStructure);
FSHAHash GetShaForDataSet(const uint8* Data, uint32 Size);
private:
IManifestBuildStreamer* Streamer;
FStatsCollector* StatsCollector;
FChunkSearcher ChunkSearcher;
const FNumberFormattingOptions PercentFormat;
const uint32 WindowSize;
const TSet<uint32> UniqueWindowSizes;
TSet<FString> FilenameTable;
TSet<FSHAHash> ShaTable;
bool bHasRan;
TMap<uint64, TSet<FDeltaChunkId>> ChunkInventory;
TMap<FDeltaChunkId, uint64> ChunkHashes;
TMap<FDeltaChunkId, FShaId> ChunkShaHashes;
TMap<FDeltaChunkId, FChunkBuildReference> ChunkBuildReferences;
TMap<FShaId, TSet<FDeltaChunkId>> IdenticalChunks;
};
FDeltaChunkEnumeration::FDeltaChunkEnumeration(IManifestBuildStreamer* InStreamer, FStatsCollector* InStatsCollector, const FBuildPatchAppManifest& InManifest, const uint32 InWindowSize)
: Streamer(InStreamer)
, StatsCollector(InStatsCollector)
, ChunkSearcher(InManifest)
, PercentFormat(FNumberFormattingOptions().SetMaximumFractionalDigits(1).SetMinimumFractionalDigits(1).SetRoundingMode(ERoundingMode::ToZero))
, WindowSize(InWindowSize)
, UniqueWindowSizes({WindowSize})
, bHasRan(false)
{
// Make sure we enumerate special chunk SHAs.
FDeltaChunkId PaddingChunkId = PaddingChunk::MakePaddingGuid(0);
FShaId PaddingShaId = MakeShaId(DeltaChunkEnumerationHelpers::GetPaddingChunkSha(0, WindowSize));
uint64 PaddingHash = DeltaChunkEnumerationHelpers::GetPaddingChunkHash(0, WindowSize);
FChunkBuildReference PaddingReference;
PaddingReference.Get<0>().AddDefaulted_GetRef().Guid = PaddingChunkId;
PaddingReference.Get<0>()[0].Offset = 0;
PaddingReference.Get<0>()[0].Size = WindowSize;
PaddingReference.Get<3>() = 0;
// Add already setup variables for byte 0.
ChunkInventory.FindOrAdd(PaddingHash).Add(PaddingChunkId);
ChunkHashes.Add(PaddingChunkId, PaddingHash);
ChunkShaHashes.Add(PaddingChunkId, PaddingShaId);
ChunkBuildReferences.Add(PaddingChunkId, PaddingReference);
// Add byte 1 through 255.
for (uint32 LoopIdx = 1; LoopIdx <= 255; ++LoopIdx)
{
const uint8 Byte = LoopIdx & 0xFF;
PaddingChunkId.D = Byte;
PaddingShaId = MakeShaId(DeltaChunkEnumerationHelpers::GetPaddingChunkSha(Byte, WindowSize));
PaddingHash = DeltaChunkEnumerationHelpers::GetPaddingChunkHash(Byte, WindowSize);
PaddingReference.Get<0>()[0].Guid = PaddingChunkId;
ChunkInventory.FindOrAdd(PaddingHash).Add(PaddingChunkId);
ChunkHashes.Add(PaddingChunkId, PaddingHash);
ChunkShaHashes.Add(PaddingChunkId, PaddingShaId);
ChunkBuildReferences.Add(PaddingChunkId, PaddingReference);
}
}
FDeltaChunkEnumeration::~FDeltaChunkEnumeration()
{
}
bool FDeltaChunkEnumeration::IsComplete() const
{
return bHasRan;
}
const TMap<uint64, TSet<FDeltaChunkId>>& FDeltaChunkEnumeration::GetChunkInventory() const
{
return ChunkInventory;
}
const TMap<FDeltaChunkId, FShaId>& FDeltaChunkEnumeration::GetChunkShaHashes() const
{
return ChunkShaHashes;
}
void FDeltaChunkEnumeration::Run()
{
// Setup scanning stats.
volatile int64* StatChunkingSize = StatsCollector->CreateStat(TEXT("BuildA: Chunked data size"), EStatFormat::DataSize);
volatile int64* StatChunkingTime = StatsCollector->CreateStat(TEXT("BuildA: Chunking time"), EStatFormat::Timer);
volatile int64* StatChunkingCompleted = StatsCollector->CreateStat(TEXT("BuildA: Progress"), EStatFormat::Percentage);
const EAllowShrinking AllowShrinking = EAllowShrinking::No;
const FBlockStructure& StreamerBlocks = Streamer->GetBlockStructure();
const uint64 ManifestAStreamSize = BlockStructureHelpers::CountSize(StreamerBlocks);
const uint32 StreamBufferReadSize = WindowSize * 32;
uint64 StreamStartPosition = 0;
TArray<uint8> StreamBuffer;
StreamBuffer.Reserve(StreamBufferReadSize + WindowSize);
uint64 BuildAChunkTimer;
FStatsCollector::AccumulateTimeBegin(BuildAChunkTimer);
while (Streamer->IsEndOfData() == false)
{
// Grab some data, maintaining accurate buffer size.
const uint32 StreamBufferPadding = StreamBuffer.Num();
StreamBuffer.SetNumUninitialized(StreamBufferPadding + StreamBufferReadSize, AllowShrinking);
const uint32 SizeRead = Streamer->DequeueData(StreamBuffer.GetData() + StreamBufferPadding, StreamBufferReadSize);
StreamBuffer.SetNumUninitialized(StreamBufferPadding + SizeRead, AllowShrinking);
// Calculate the buffer's build structure.
FBlockStructure StreamBuildStructure;
const uint64 StreamStructureSize = StreamerBlocks.SelectSerialBytes(StreamStartPosition, StreamBuffer.Num(), StreamBuildStructure);
check(SizeRead == StreamBufferReadSize || Streamer->IsEndOfData());
check(StreamStructureSize == StreamBuffer.Num());
// Chunk up data.
int32 StreamBufferPosition = 0;
const int32 LastStreamBufferPosition = StreamBuffer.Num() - WindowSize;
for (; StreamBufferPosition <= LastStreamBufferPosition; StreamBufferPosition += WindowSize)
{
MakeChunk(StreamBuffer, StreamBufferPosition, StreamBuildStructure);
}
// Is the remainder the final bytes we'll get?
if (Streamer->IsEndOfData())
{
StreamBufferPosition = StreamBuffer.Num();
// We lose out on matching the last bytes if there is not enough for one chunk in the buffer.
// This is something that we could fix with padding, but it is really not necessary yet.
checkSlow(WindowSize < (uint32)TNumericLimits<int32>::Max());
if (StreamBuffer.Num() >= (int32)WindowSize)
{
const int32 StreamBufferFinalChunkIdx = StreamBuffer.Num() - WindowSize;
MakeChunk(StreamBuffer, StreamBufferFinalChunkIdx, StreamBuildStructure);
}
}
check(StreamBufferPosition <= StreamBuffer.Num());
// Copy remainder bytes.
StreamStartPosition += StreamBuffer.Num();
const int32 CopySize = StreamBuffer.Num() - StreamBufferPosition;
if (CopySize > 0)
{
uint8* const CopyTo = StreamBuffer.GetData();
const uint8* const CopyFrom = &StreamBuffer[StreamBufferPosition];
FMemory::Memcpy(CopyTo, CopyFrom, CopySize);
}
StreamBuffer.SetNum(CopySize, AllowShrinking);
StreamStartPosition -= StreamBuffer.Num();
const double PercentChunked = (double)StreamStartPosition / (double)ManifestAStreamSize;
FStatsCollector::SetAsPercentage(StatChunkingCompleted, PercentChunked);
FStatsCollector::Set(StatChunkingSize, StreamStartPosition);
FStatsCollector::AccumulateTimeEnd(StatChunkingTime, BuildAChunkTimer);
FStatsCollector::AccumulateTimeBegin(BuildAChunkTimer);
}
FStatsCollector::SetAsPercentage(StatChunkingCompleted, 1.0);
FStatsCollector::Set(StatChunkingSize, ManifestAStreamSize);
// Collect up all chunks that were identical.
for (const TPair<FDeltaChunkId, FShaId>& ChunkShaHashPair : ChunkShaHashes)
{
IdenticalChunks.FindOrAdd(ChunkShaHashPair.Value).Add(ChunkShaHashPair.Key);
}
FStatsCollector::AccumulateTimeEnd(StatChunkingTime, BuildAChunkTimer);
bHasRan = true;
}
const TMap<FDeltaChunkId, FChunkBuildReference>& FDeltaChunkEnumeration::GetChunkBuildReferences() const
{
return ChunkBuildReferences;
}
const TMap<FShaId, TSet<FDeltaChunkId>>& FDeltaChunkEnumeration::GetIdenticalChunks() const
{
return IdenticalChunks;
}
const uint64& FDeltaChunkEnumeration::GetChunkHash(const FDeltaChunkId& ChunkId) const
{
return ChunkHashes[ChunkId];
}
const FSHAHash& FDeltaChunkEnumeration::GetChunkShaHash(const FDeltaChunkId& ChunkId) const
{
return GetSha(ChunkShaHashes[ChunkId]);
}
FFilenameId FDeltaChunkEnumeration::MakeFilenameId(const FString& Filename)
{
return FilenameTable.Add(Filename);
}
FShaId FDeltaChunkEnumeration::MakeShaId(const FSHAHash& Filename)
{
return ShaTable.Add(Filename);
}
FFilenameId FDeltaChunkEnumeration::GetFilenameId(const FString& Filename) const
{
return FilenameTable.FindId(Filename);
}
FShaId FDeltaChunkEnumeration::GetShaId(const FSHAHash& Filename) const
{
return ShaTable.FindId(Filename);
}
const FString& FDeltaChunkEnumeration::GetFilename(const FFilenameId& FilenameId) const
{
return FilenameTable[FilenameId];
}
const FSHAHash& FDeltaChunkEnumeration::GetSha(const FShaId& ShaId) const
{
return ShaTable[ShaId];
}
void FDeltaChunkEnumeration::MakeChunk(const TArray<uint8>& StreamBuffer, const int32& StreamBufferPosition, const FBlockStructure& StreamBuildStructure)
{
const uint64 ChunkHash = FRollingHash::GetHashForDataSet(&StreamBuffer[StreamBufferPosition], WindowSize);
// We get major high collision issues with trying to match chunk hash 0.
// This is a weakness in the rolling hash, and is usually generated by either 0 byte padding, or short run length cyclic data.
// Both of these types of data compress very well and so downloading it is not a huge loss.
if (ChunkHash != 0)
{
FDeltaChunkId ChunkId = FGuid::NewGuid();
ChunkInventory.FindOrAdd(ChunkHash).Add(ChunkId);
ChunkHashes.Add(ChunkId, ChunkHash);
ChunkShaHashes.Add(ChunkId, MakeShaId(GetShaForDataSet(&StreamBuffer[StreamBufferPosition], WindowSize)));
FBlockStructure ChunkBuildStructure;
const uint64 ChunkStructureSize = StreamBuildStructure.SelectSerialBytes(StreamBufferPosition, WindowSize, ChunkBuildStructure);
check(ChunkStructureSize == WindowSize);
ChunkSearcher.ForEachOverlap(ChunkBuildStructure, [&](const FBlockRange& OverlapRange, FChunkSearcher::FFileDListNode* File, FChunkSearcher::FChunkDListNode* Chunk)
{
const FChunkSearcher::FChunkNode& ChunkNode = Chunk->GetValue();
FChunkBuildReference& ChunkBuildReference = ChunkBuildReferences.FindOrAdd(ChunkId);
TArray<FChunkPart>& ChunkParts = ChunkBuildReference.Get<0>();
FFilenameId& FirstFile = ChunkBuildReference.Get<1>();
TSet<FString>& FirstFileTagset = ChunkBuildReference.Get<2>();
uint64& FirstFileLocation = ChunkBuildReference.Get<3>();
const int64 ChunkLeftChop = OverlapRange.GetFirst() - ChunkNode.BuildRange.GetFirst();
const int64 ChunkRightChop = ChunkNode.BuildRange.GetLast() - OverlapRange.GetLast();
const int64 SizeChop = ChunkLeftChop + ChunkRightChop;
if (ChunkParts.Num() == 0)
{
const FChunkSearcher::FFileNode& FileNode = File->GetValue();
FirstFile = MakeFilenameId(FileNode.Manifest->Filename);
FirstFileTagset.Append(FileNode.Manifest->InstallTags);
FirstFileLocation = (ChunkNode.BuildRange.GetFirst() - FileNode.BuildRange.GetFirst()) + ChunkLeftChop;
}
ChunkParts.Emplace(ChunkNode.ChunkPart.Guid, ChunkNode.ChunkPart.Offset + ChunkLeftChop, ChunkNode.ChunkPart.Size - SizeChop);
});
}
}
FSHAHash FDeltaChunkEnumeration::GetShaForDataSet(const uint8* Data, uint32 Size)
{
FSHAHash SHAHash;
FSHA1::HashBuffer(Data, Size, SHAHash.Hash);
return SHAHash;
}
IDeltaChunkEnumeration* FDeltaChunkEnumerationFactory::Create(IManifestBuildStreamer* Streamer, FStatsCollector* StatsCollector, const FBuildPatchAppManifest& Manifest, const uint32 WindowSize)
{
return new FDeltaChunkEnumeration(Streamer, StatsCollector, Manifest, WindowSize);
}
}