Files
UnrealEngine/Engine/Source/Runtime/RHI/Public/GPUDefragAllocator.h
2025-05-18 13:04:45 +08:00

1316 lines
39 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "HAL/ThreadSafeCounter.h"
#include "Stats/Stats.h"
#include "Misc/OutputDeviceRedirector.h"
#include "HAL/IConsoleManager.h"
#include "Containers/List.h"
#include "HAL/LowLevelMemTracker.h"
#include "ProfilingDebugging/MemoryTrace.h"
#define LOG_EVERY_ALLOCATION 0
#define DUMP_ALLOC_FREQUENCY 0 // 100
#define VALIDATE_SYNC_SIZE 0 //validates that the CPU is returning memory in an allocate that's part of an active GPU move.
#define VALIDATE_MOVES 0 //validates that GPU moves in a given frame do not overlap destination areas.
#define TRACK_RELOCATIONS (VALIDATE_SYNC_SIZE || VALIDATE_MOVES)
#define VALIDATE_MEMORY_PROTECTION 0
#define USE_ALLOCATORFIXEDSIZEFREELIST 0
class FAsyncReallocationRequest;
class FScopedGPUDefragLock;
/*-----------------------------------------------------------------------------
Custom fixed size pool best fit texture memory allocator
-----------------------------------------------------------------------------*/
// Forward declaration
class FAsyncReallocationRequest;
class FScopedGPUDefragLock;
/**
* Simple best fit allocator, splitting and coalescing whenever/ wherever possible.
* NOT THREAD-SAFE.
*
* - uses TMap to find memory chunk given a Pointer (potentially colliding with malloc/ free from main thread)
* - uses separate linked list for free allocations, assuming that relatively few free chunks due to coalescing
*/
class FGPUDefragAllocator
{
public:
typedef TDoubleLinkedList<FAsyncReallocationRequest*> FRequestList;
typedef TDoubleLinkedList<FAsyncReallocationRequest*>::TDoubleLinkedListNode FRequestNode;
/**
* Container for allocator settings.
*/
struct FSettings
{
FSettings()
: MaxDefragRelocations(128 * 1024)
, MaxDefragDownShift(32 * 1024)
, OverlappedBandwidthScale(1)
{
}
/** Maximum number of uint8s to relocate, in total, during a partial defrag. */
int32 MaxDefragRelocations;
/** Maximum number of uint8s to relocate during a partial defrag by brute-force downshifting. */
int32 MaxDefragDownShift;
/** Amount of extra bandwidth used when doing overlapped relocations */
int32 OverlappedBandwidthScale;
};
enum EMemoryElementType
{
MET_Allocated,
MET_Free,
MET_Locked,
MET_Relocating,
MET_Resizing,
MET_Resized,
MET_Max
};
struct FMemoryLayoutElement
{
FMemoryLayoutElement()
: Size(0)
, Type(MET_Allocated)
{
}
FMemoryLayoutElement(int32 InSize, EMemoryElementType InType)
: Size(InSize)
, Type(InType)
{
}
int32 Size;
EMemoryElementType Type;
friend FArchive& operator<<(FArchive& Ar, FMemoryLayoutElement& Element)
{
Ar << Element.Size;
uint32 ElementType = Element.Type;
Ar << ElementType;
Element.Type = EMemoryElementType(ElementType);
return Ar;
}
};
/**
* Container for allocator relocation stats.
*/
struct FRelocationStats
{
FRelocationStats()
: NumBytesRelocated(0)
, NumBytesDownShifted(0)
, LargestHoleSize(0)
, NumRelocations(0)
, NumHoles(0)
, NumLockedChunks(0)
{
}
/** Number of uint8s relocated, in total. */
int64 NumBytesRelocated;
/** Number of uint8s relocated by brute-force downshifting. */
int64 NumBytesDownShifted;
/** Size of the largest free consecutive memory region, before any relocations were made. */
int64 LargestHoleSize;
/** Number of relocations initiated. */
int32 NumRelocations;
/** Number of disjoint32 free memory regions, before any relocations were made. */
int32 NumHoles;
/** Number of chunks that are locked and cannot be relocated */
int32 NumLockedChunks;
};
/**
* Contains information of a single allocation or free block.
*/
class FMemoryChunk
{
public:
/**
* Private constructor.
*
* @param InBase Pointer to base of chunk
* @param InSize Size of chunk
* @param ChunkToInsertAfter Chunk to insert this after.
* @param FirstFreeChunk Reference to first free chunk Pointer.
*/
FMemoryChunk(uint8* InBase, int64 InSize, FGPUDefragAllocator& InBestFitAllocator, FMemoryChunk*& ChunkToInsertAfter, TStatId InStat)
: Base(InBase)
, Size(InSize)
, OrigSize(0)
, bIsAvailable(false)
, LockCount(0)
, DefragCounter(0)
, BestFitAllocator(InBestFitAllocator)
, SyncIndex(0)
, SyncSize(0)
, UserPayload(0)
, Stat(InStat)
, bTail(false)
{
Link(ChunkToInsertAfter);
// This is going to change bIsAvailable.
LinkFree(ChunkToInsertAfter);
}
/**
* Unlinks/ removes the chunk from the linked lists it belongs to.
*/
~FMemoryChunk()
{
// Remove from linked lists.
Unlink();
UnlinkFree();
//todo: add check for numlocks == 0
}
bool IsLocked() const
{
return (LockCount != 0);
}
/**
* Inserts this chunk after the passed in one.
*
* @param ChunkToInsertAfter Chunk to insert after
*/
void Link(FMemoryChunk*& ChunkToInsertAfter)
{
if (ChunkToInsertAfter)
{
NextChunk = ChunkToInsertAfter->NextChunk;
PreviousChunk = ChunkToInsertAfter;
ChunkToInsertAfter->NextChunk = this;
if (NextChunk)
{
NextChunk->PreviousChunk = this;
}
else
{
BestFitAllocator.LastChunk = this;
}
}
else
{
PreviousChunk = nullptr;
NextChunk = nullptr;
ChunkToInsertAfter = this;
BestFitAllocator.LastChunk = this;
}
}
/**
* Inserts this chunk at the head of the free chunk list.
*/
void LinkFree(FMemoryChunk* FirstFreeChunkToSearch);
/**
* Removes itself for linked list.
*/
void Unlink()
{
if (PreviousChunk)
{
PreviousChunk->NextChunk = NextChunk;
}
else
{
BestFitAllocator.FirstChunk = NextChunk;
}
if (NextChunk)
{
NextChunk->PreviousChunk = PreviousChunk;
}
else
{
BestFitAllocator.LastChunk = PreviousChunk;
}
PreviousChunk = nullptr;
NextChunk = nullptr;
}
/**
* Removes itself for linked "free" list. Maint32ains the free-list order.
*/
void UnlinkFree()
{
check(bIsAvailable);
bIsAvailable = false;
if (PreviousFreeChunk)
{
PreviousFreeChunk->NextFreeChunk = NextFreeChunk;
}
else
{
BestFitAllocator.FirstFreeChunk = NextFreeChunk;
}
if (NextFreeChunk)
{
NextFreeChunk->PreviousFreeChunk = PreviousFreeChunk;
}
PreviousFreeChunk = nullptr;
NextFreeChunk = nullptr;
}
/**
* Returns true if the Chunk is being asynchronously relocated due to reallocation or defrag.
*/
bool IsRelocating() const
{
return SyncIndex > BestFitAllocator.CompletedSyncIndex;
}
/**
* Returns the number of uint8s that can be allocated from this chunk.
*/
int64 GetAvailableSize() const
{
if (bIsAvailable)
{
return IsRelocating() ? (Size - SyncSize) : Size;
}
else
{
return 0;
}
}
/**
* Returns the current size (in uint8s), or the final size if it has a reallocating request.
*/
int64 GetFinalSize() const;
/**
* Sets the relocation sync index.
* @param InSyncIndex GPU synchronization identifier that can be compared with BestFitAllocator::CompletedSyncIndex
* @param InSyncSize Number of uint8s that require GPU synchronization (starting from the beginning of the chunk)
*/
void SetSyncIndex(uint32 InSyncIndex, int64 InSyncSize)
{
SyncIndex = InSyncIndex;
SyncSize = InSyncSize;
}
/**
* Returns the relocation sync index.
*/
uint32 GetSyncIndex() const
{
return SyncIndex;
}
/**
* Comparison function for Sort(), etc, based on increasing base address.
*/
static uint64 Compare(const FMemoryChunk* A, const FMemoryChunk* B)
{
return B->Base - A->Base;
}
#if USE_ALLOCATORFIXEDSIZEFREELIST
/** Custom new/delete */
void* operator new(size_t Size);
void operator delete(void *RawMemory);
#endif
/** Base of chunk. */
uint8* Base;
/** Size of chunk. */
int64 Size;
int64 OrigSize;
/** Whether the chunk is available. */
bool bIsAvailable;
/** Whether the chunk has been locked. */
int32 LockCount;
/** Defrag counter. If this chunk failed to defrag, it won't try it again until the counter is 0. */
uint16 DefragCounter;
/** Allows access to FBestFitAllocator members such as FirstChunk, FirstFreeChunk and LastChunk. */
FGPUDefragAllocator& BestFitAllocator;
/** Pointer to previous chunk. */
FMemoryChunk* PreviousChunk;
/** Pointer to next chunk. */
FMemoryChunk* NextChunk;
/** Pointer to previous free chunk. */
FMemoryChunk* PreviousFreeChunk;
/** Pointer to next free chunk. */
FMemoryChunk* NextFreeChunk;
/** SyncIndex that must be exceeded before accessing the data within this chunk. */
uint32 SyncIndex;
/** Number of uint8s covered by the SyncIndex (starting from the beginning of the chunk). */
int64 SyncSize;
/** User payload, e.g. platform-specific texture Pointer. Only chunks with payload can be relocated. */
void* UserPayload;
//stat associated with this allocation
TStatId Stat;
bool bTail;
};
/** Constructor, zero initializing all member variables */
FGPUDefragAllocator()
: MemorySize(0)
, MemoryBase(nullptr)
, AllocationAlignment(0)
, FirstChunk(nullptr)
, LastChunk(nullptr)
, FirstFreeChunk(nullptr)
, TimeSpentInAllocator(0.0)
, PaddingWasteSize(0)
, AllocatedMemorySize(0)
, AvailableMemorySize(0)
, PendingMemoryAdjustment(0)
, CurrentSyncIndex(1)
, CompletedSyncIndex(0)
, NumRelocationsInProgress(0)
, PlatformSyncFence(0)
, CurrentLargestHole(0)
, CurrentNumHoles(0)
, TotalNumRelocations(0)
, TotalNumBytesRelocated(0)
, MinLargestHole(MAX_int64)
, MaxNumHoles(0)
, NumFinishedAsyncReallocations(0)
, NumFinishedAsyncAllocations(0)
, NumCanceledAsyncRequests(0)
, BlockedCycles(0)
, NumLockedChunks(0)
, bBenchmarkMode(false)
{}
virtual ~FGPUDefragAllocator()
{
}
/**
* Initialize this allocator with a preallocated block of memory.
*
* @param InMemoryBase Base address for the block of memory
* @param InMemorySize Size of the block of memory, in uint8s
* @param InAllocationAlignment Alignment for all allocations, in uint8s
*/
virtual void Initialize(uint8* InMemoryBase, int64 InMemorySize, int32 InAllocationAlignment)
{
// Update size, Pointer and alignment.
MemoryBase = InMemoryBase;
MemorySize = InMemorySize;
AllocationAlignment = InAllocationAlignment;
check(Align(MemoryBase, AllocationAlignment) == MemoryBase);
// Update stats in a thread safe way.
FPlatformAtomics::InterlockedExchange(&AvailableMemorySize, MemorySize);
// Allocate initial chunk.
FirstChunk = new FMemoryChunk(MemoryBase, MemorySize, *this, FirstChunk, TStatId());
LastChunk = FirstChunk;
}
virtual void Initialize(uint8* InMemoryBase, int64 InMemorySize)
{
Initialize(InMemoryBase, InMemorySize, 64);
}
/**
* Returns the current allocator settings.
*
* @param OutSettings [out] Current allocator settings
*/
void GetSettings(FSettings& OutSettings)
{
OutSettings = Settings;
}
/**
* Sets new allocator settings.
*
* @param InSettings New allocator settings to replace the old ones.
*/
void SetSettings(const FSettings& InSettings)
{
Settings = InSettings;
}
/**
* Returns whether allocator has been initialized.
*/
bool IsInitialized()
{
return MemoryBase != nullptr;
}
/**
* Allocate physical memory.
*
* @param AllocationSize Size of allocation
* @param bAllowFailure Whether to allow allocation failure or not
* @return Pointer to allocated memory
*/
virtual void* Allocate(int64 AllocationSize, int32 Alignment, TStatId InStat, bool bAllowFailure);
/**
* Frees allocation associated with the specified Pointer.
*
* @param Pointer Pointer to free.
*/
virtual void Free(void* Pointer);
/**
* Locks an FMemoryChunk
*
* @param Pointer Pointer indicating which chunk to lock
*/
virtual void Lock(const void* Pointer);
/**
* Unlocks an FMemoryChunk
*
* @param Pointer Pointer indicating which chunk to unlock
*/
virtual void Unlock(const void* Pointer);
/**
* Sets the user payload for an FMemoryChunk
*
* @param Pointer Pointer indicating a chunk
* @param UserPayload User payload to set
*/
void SetUserPayload(const void* Pointer, void* UserPayload);
/**
* Returns the user payload for an FMemoryChunk
*
* @param Pointer Pointer indicating a chunk
* return The chunk's user payload
*/
void* GetUserPayload(const void* Pointer);
/**
* Returns the amount of memory allocated for the specified address.
*
* @param Pointer Pointer to check.
* @return Number of uint8s allocated
*/
int64 GetAllocatedSize(void* Pointer);
/**
* Tries to reallocate texture memory in-place (without relocating),
* by adjusting the base address of the allocation but keeping the end address the same.
* Note: Newly freed memory due to shrinking won't be available for allocation right away (need GPU sync).
*
* @param OldBaseAddress Pointer to the original allocation
* @return New base address if it succeeded, otherwise nullptr
**/
void* Reallocate(void* OldBaseAddress, int64 NewSize);
bool IsValidPoolMemory(const void* Pointer) const
{
return (Pointer != nullptr) && (Pointer >= MemoryBase) && (Pointer < (MemoryBase + MemorySize));
}
#if 0
/**
* Requests an async allocation or reallocation.
* The caller must hold on to the request until it has been completed or canceled.
*
* @param ReallocationRequest The request
* @param bForceRequest If true, the request will be accepted even if there's currently not enough free space
* @return GetMemoryStats true if the request was accepted
*/
bool AsyncReallocate(FAsyncReallocationRequest* ReallocationRequest, bool bForceRequest);
#endif
/**
* Dump allocation information.
*/
void DumpAllocs(FOutputDevice& Ar = *GLog);
/**
* Retrieves allocation stats.
*
* @param OutAllocatedMemorySize [out] Size of allocated memory
* @param OutAvailableMemorySize [out] Size of available memory
* @param OutPendingMemoryAdjustment [out] Size of pending allocation change (due to async reallocation)
*/
virtual void GetMemoryStats(int64& OutAllocatedMemorySize, int64& OutAvailableMemorySize, int64& OutPendingMemoryAdjustment, int64& OutPaddingWasteSize)
{
OutAllocatedMemorySize = AllocatedMemorySize;
OutAvailableMemorySize = AvailableMemorySize;
OutPendingMemoryAdjustment = PendingMemoryAdjustment;
OutPaddingWasteSize = PaddingWasteSize;
}
int64 GetTotalSize() const
{
return MemorySize;
}
/**
* Scans the free chunks and returns the largest size you can allocate.
*
* @param OutNumFreeChunks Upon return, contains the total number of free chunks. May be nullptr.
* @return The largest size of all free chunks.
*/
int32 GetLargestAvailableAllocation(int32* OutNumFreeChunks = nullptr);
/**
* Returns the amount of time blocked by a platform fence since the beginning of the last call to Tick(), in appCycles.
*/
uint32 GetBlockedCycles() const
{
return BlockedCycles;
}
/**
* Returns whether we're in benchmark mode or not.
*/
bool InBenchmarkMode() const
{
return bBenchmarkMode;
}
/**
* Fills a texture with to visualize the texture pool memory.
*
* @param TextureData Start address
* @param SizeX Number of pixels along X
* @param SizeY Number of pixels along Y
* @param Pitch Number of uint8s between each row
* @param PixelSize Number of uint8s each pixel represents
*
* @return true if successful, false otherwise
*/
bool GetTextureMemoryVisualizeData(FColor* TextureData, int32 SizeX, int32 SizeY, int32 Pitch, const int32 PixelSize);
void GetMemoryLayout(TArray<FMemoryLayoutElement>& MemoryLayout);
EMemoryElementType GetChunkType(FMemoryChunk* Chunk) const;
/**
* Fully defragments the memory and blocks until it's done.
*
* @param Stats [out] Stats
*/
void DefragmentMemory(FRelocationStats& Stats);
/**
* Partially defragments the memory and tries to process all async reallocation requests at the same time.
* Call this once per frame.
*
* @param Stats [out] Stats
* @param bPanicDefrag If true, performs a full defrag and ignores all reallocation requests
* @return Num bytes relocated.
*/
virtual int32 Tick(FRelocationStats& Stats, bool bPanicDefrag);
/**
* Blocks the calling thread until all relocations and reallocations that were initiated by Tick() have completed.
*
* @return true if there were any relocations in progress before this call
*/
bool FinishAllRelocations();
/**
* Blocks the calling thread until the specified request has been completed.
*
* @param Request Request to wait for. Must be a valid request.
*/
void BlockOnAsyncReallocation(FAsyncReallocationRequest* Request);
/**
* Cancels the specified reallocation request.
* Note that the allocator doesn't keep track of requests after it's been completed,
* so the user must provide the current base address. This may not match any of the
* addresses in the (old) request since the memory may have been relocated since then.
*
* @param Request Request to cancel. Must be a valid request.
* @param CurrentBaseAddress Current baseaddress used by the allocation.
*/
void CancelAsyncReallocation(FAsyncReallocationRequest* Request, const void* CurrentBaseAddress);
/**
* Performs a benchmark of the allocator and outputs the result to the log.
*
* @param MinChunkSize Minimum number of uint8s per random chunk
* @param MaxChunkSize Maximum number of uint8s per random chunk
* @param FreeRatio Free 0.0-1.0 of the memory before benchmarking
* @param LockRatio Lock 0.0-1.0 % of the memory before benchmarking
* @param bFullDefrag Whether to test full defrag (true) or continuous defrag (false)
* @param bSaveImages Whether to save before/after images to hard disk (TexturePoolBenchmark-*.bmp)
* @param Filename [opt] Filename to a previously saved memory layout to use for benchmarking, or nullptr
*/
void Benchmark(int32 MinChunkSize, int32 MaxChunkSize, float FreeRatio, float LockRatio, bool bFullDefrag, bool bSaveImages, const TCHAR* Filename);
static FORCEINLINE bool IsAligned(const volatile void* Ptr, const uint32 Alignment)
{
return !(UPTRINT(Ptr) & (Alignment - 1));
}
int32 GetAllocationAlignment() const
{
return AllocationAlignment;
}
protected:
#if TRACK_RELOCATIONS
struct FRelocationEntry
{
FRelocationEntry(const uint8* InOldBase, const uint8* InNewBase, uint64 InSize, uint64 InSyncIndex);
const uint8* OldBase;
const uint8* NewBase;
uint64 Size;
uint64 SyncIndex;
};
void ValidateRelocations(uint8* UsedBaseAddress, uint64 Size);
TArray<FRelocationEntry> Relocations;
#endif
#if VALIDATE_MEMORY_PROTECTION
struct FMemProtectTracker
{
FMemProtectTracker(const void* InMemory, const void* InUserPayload, uint64 InBlockSize, uint32 InSyncIndex)
: Memory(InMemory)
, UserPayload(InUserPayload)
, BlockSize(InBlockSize)
, SyncIndex(InSyncIndex)
{
}
const void* Memory;
const void* UserPayload;
uint64 BlockSize;
uint32 SyncIndex;
};
TArray<FMemProtectTracker> BlocksToProtect;
TArray<FMemProtectTracker> BlocksToUnProtect;
static int32 GGPUDefragDumpRelocationsToTTY;
static FAutoConsoleVariableRef CVarGPUDefragDumpRelocationsToTTY;
/** Sets Static memory privileges on blocks that have completed relocations. */
void SetStaticMemoryPrivileges();
/** Removes all CPU and GPU read/write privileges from the given Block. Used for Free memory that is not part of a relocation. */
virtual void PlatformSetNoMemoryPrivileges(const FMemProtectTracker& Block) {};
/** Allows all CPU and GPU read/write privileges on the given Block. Used to reset privileges on allocation */
virtual void PlatformSetStandardMemoryPrivileges(const FMemProtectTracker& Block) {};
/** Allows all CPU R/W, GPU Read, and GPUWrite Platform implementation can determine that it should do so. Used when UserPayload is set. Used during defraggable registration, and after waiting on the appropriate fence. */
virtual void PlatformSetStaticMemoryPrivileges(const FMemProtectTracker& BlocksToAllow) {};
/** Allows Only GPU R/W. CPU access is a bug. This is used when this memory is part of an active relocation */
virtual void PlatformSetRelocationMemoryPrivileges(const FMemProtectTracker& BlocksToAllow) {};
virtual void PlatformSetRelocationMemoryPrivileges(const TArray<FMemProtectTracker>& BlocksToRemove) {};
#endif
/**
* Copy memory from one location to another. If it returns false, the defragmentation
* process will assume the memory is not relocatable and keep it in place.
* Note: Source and destination may overlap.
*
* @param Dest Destination memory start address
* @param Source Source memory start address
* @param Size Number of uint8s to copy
* @param UserPayload User payload for this allocation
*/
virtual void PlatformRelocate(void* Dest, const void* Source, int64 Size, void* UserPayload) = 0;
/**
* Inserts a fence to synchronize relocations.
* The fence can be blocked on at a later time to ensure that all relocations initiated
* so far has been fully completed.
*
* @return New fence value
*/
virtual uint64 PlatformInsertFence() = 0;
/**
* Blocks the calling thread until all relocations initiated before the fence
* was added has been fully completed.
*
* @param Fence Fence to block on
*/
virtual void PlatformBlockOnFence(uint64 Fence) = 0;
/**
* Allows each platform to decide whether an allocation can be relocated at this time.
*
* @param Source Base address of the allocation
* @param UserPayload User payload for the allocation
* @return true if the allocation can be relocated at this time
*/
virtual bool PlatformCanRelocate(const void* Source, void* UserPayload) const = 0;
/**
* Notifies the platform that an async reallocation request has been completed.
*
* @param FinishedRequest The request that got completed
* @param UserPayload User payload for the allocation
*/
virtual void PlatformNotifyReallocationFinished(FAsyncReallocationRequest* FinishedRequest, void* UserPayload) = 0;
/**
* Copy memory from one location to another. If it returns false, the defragmentation
* process will assume the memory is not relocatable and keep it in place.
* Note: Source and destination may overlap.
*
* @param Stats [out] Stats
* @param Dest Destination memory chunk
* @param DestOffset Destination offset, counted from the base address of the destination memory chunk, in uint8s
* @param Source Base address of the source memory
* @param Size Number of uint8s to copy
* @param UserPayload User payload for the allocation
*/
void Relocate(FRelocationStats& Stats, FMemoryChunk* Dest, int64 DestOffset, const void* Source, int64 Size, void* UserPayload)
{
MemoryTrace_ReallocFree((uint64)Source);
MemoryTrace_ReallocAlloc((uint64)Dest->Base, Size, 4);
LLM_IF_ENABLED(FLowLevelMemTracker::Get().OnLowLevelAllocMoved(ELLMTracker::Default, Dest->Base, Source));
uint8* DestAddr = Dest->Base + DestOffset;
int64 MemDistance = (int64)(DestAddr) - (int64)(Source);
int64 AbsDistance = FMath::Abs(MemDistance);
bool bOverlappedMove = AbsDistance < Size;
if (!bBenchmarkMode)
{
#if VALIDATE_MEMORY_PROTECTION
BlocksToProtect.Emplace(DestAddr, UserPayload, Size, CurrentSyncIndex);
BlocksToProtect.Emplace(Source, UserPayload, Size, CurrentSyncIndex);
#endif
PlatformRelocate(DestAddr, Source, Size, UserPayload);
}
int64 RelocateSize = bOverlappedMove ? (Size * Settings.MaxDefragRelocations) : Size;
Dest->UserPayload = UserPayload;
Stats.NumBytesRelocated += RelocateSize;
Stats.NumRelocations++;
}
/**
* Returns the sync index to be completed by the next call to FinishAllRelocations().
*/
uint32 GetCurrentSyncIndex() const
{
return CurrentSyncIndex;
}
/**
* Performs a partial defrag doing fast checks only. Adjacency and freelist walk.
*
* @param Stats [out] Stats
* @param StartTime Start time, used for limiting the Tick() time
*/
void PartialDefragmentationFast(FRelocationStats& Stats, double StartTime);
/**
* Performs a partial defrag doing slow all chunk search to find used chunks to move that are surrounded by other used chunks
* That a freechunk walk won't find.
*
* @param Stats [out] Stats
* @param StartTime Start time, used for limiting the Tick() time
*/
void PartialDefragmentationSlow(FRelocationStats& Stats, double StartTime);
/**
* Performs a partial defrag by shifting down memory to fill holes, in a brute-force manner.
* Takes consideration to async reallocations, but processes the all memory in order.
*
* @param Stats [out] Stats
* @param StartTime Start time, used for limiting the Tick() time
*/
void PartialDefragmentationDownshift(FRelocationStats& Stats, double StartTime);
/**
* Performs a full defrag and ignores all reallocation requests.
*
* @param Stats [out] Stats
*/
void FullDefragmentation(FRelocationStats& Stats);
/**
* Tries to immediately grow a memory chunk by moving the base address, without relocating any memory.
*
* @param Chunk Chunk to grow
* @param GrowAmount Number of uint8s to grow by
* @return nullptr if it failed, otherwise the new grown chunk
*/
FMemoryChunk* Grow(FMemoryChunk* Chunk, int64 GrowAmount);
/**
* Immediately shrinks a memory chunk by moving the base address, without relocating any memory.
* Always succeeds.
*
* @param Chunk Chunk to shrink
* @param ShrinkAmount Number of uint8s to shrink by
* @return The new shrunken chunk
*/
FMemoryChunk* Shrink(FMemoryChunk* Chunk, int64 ShrinkAmount);
/**
* Checks the int32ernal state for errors. (Slow)
*
* @param bCheckSortedFreeList If true, also checks that the freelist is sorted
*/
void CheckForErrors(bool bCheckSortedFreeList);
/**
* Returns true if the specified chunk is allowed to relocate at this time.
* Will also call PlatformCanRelocate().
*
* @param Chunk Chunk to check
* @return true if the allocation can be relocated at this time
*/
bool CanRelocate(const FMemoryChunk* Chunk) const;
/**
* Inserts a platform fence and updates the allocator sync index to match.
*/
void InsertFence();
/**
* Blocks the calling thread until the current sync fence has been completed.
*/
void BlockOnFence();
/**
* Blocks the calling thread until the specified sync index has been completed.
*
* @param SyncIndex Sync index to wait for
*/
void BlockOnSyncIndex(uint32 SyncIndex);
/**
* Split allocation into two, first chunk being used and second being available.
* Maint32ains the free-list order if bSortedFreeList is true.
*
* @param BaseChunk Chunk to split
* @param FirstSize New size of first chunk
* @param bSortedFreeList If true, maint32ains the free-list order
*/
void Split(FMemoryChunk* BaseChunk, int64 FirstSize)
{
check(BaseChunk);
check(FirstSize < BaseChunk->Size);
check(FirstSize > 0);
// Don't make any assumptions on the following chunk. Because Reallocate() will make the 1st chunk free
// and the 2nd chunk used, so it's ok to have the following chunk free. Note, this only happens when Reallocate()
// is splitting the very first chunk in the pool.
// Don't make any assumptions for the previous chunk either...
// check( !BaseChunk->NextChunk || !BaseChunk->NextChunk->bIsAvailable );
// check( !BaseChunk->PreviousChunk || !BaseChunk->PreviousChunk->bIsAvailable || !BaseChunk->bIsAvailable );
// Calculate size of second chunk...
int64 SecondSize = BaseChunk->Size - FirstSize;
// ... and create it.
//todo: fix stats
//ensureMsgf(BaseChunk->Stat.IsNone(), TEXT("Free chunk has stat"));
FMemoryChunk* NewFreeChunk = new FMemoryChunk(BaseChunk->Base + FirstSize, SecondSize, *this, BaseChunk, TStatId());
// Keep the original sync index for the new chunk if necessary.
if (BaseChunk->IsRelocating() && BaseChunk->SyncSize > FirstSize)
{
int64 SecondSyncSize = BaseChunk->SyncSize - FirstSize;
NewFreeChunk->SetSyncIndex(BaseChunk->SyncIndex, SecondSyncSize);
}
BaseChunk->SetSyncIndex(BaseChunk->SyncIndex, FMath::Min((int64)FirstSize, BaseChunk->SyncSize));
// Resize base chunk.
BaseChunk->Size = FirstSize;
} //-V773
/**
* Marks the specified chunk as 'allocated' and updates tracking variables.
* Splits the chunk if only a portion of it is allocated.
*
* @param FreeChunk Chunk to allocate
* @param AllocationSize Number of uint8s to allocate
* @param bAsync If true, allows allocating from relocating chunks and maint32ains the free-list sort order.
* @return The memory chunk that was allocated (the original chunk could've been split).
*/
FMemoryChunk* AllocateChunk(FMemoryChunk* FreeChunk, int64 AllocationSize, bool bAsync, bool bDoValidation = true);
/**
* Marks the specified chunk as 'free' and updates tracking variables.
* Calls LinkFreeChunk() to coalesce adjacent free memory.
*
* @param Chunk Chunk to free
*/
void FreeChunk(FMemoryChunk* Chunk);
/**
* Frees the passed in chunk and coalesces adjacent free chunks into 'Chunk' if possible.
* Maint32ains the free-list order if bSortedFreeList is true.
*
* @param Chunk Chunk to mark as available.
* @param bSortedFreeList If true, maintains the free-list sort order
*/
void LinkFreeChunk(FMemoryChunk* Chunk)
{
check(Chunk);
// Mark chunk as available.
Chunk->LinkFree(nullptr);
// Kick of merge pass.
Coalesce(Chunk);
}
/**
* Merges any adjacent free chunks into the specified free chunk.
* Doesn't affect the free-list sort order.
*
* @param FreedChunk Chunk that just became available.
*/
void Coalesce(FMemoryChunk* FreedChunk);
/**
* Sorts the freelist based on increasing base address.
*
* @param NumFreeChunks [out] Number of free chunks
* @param LargestFreeChunk [out] Size of the largest free chunk, in uint8s
*/
void SortFreeList(int32& NumFreeChunks, int64& LargestFreeChunk);
/**
* Defrag helper function. Checks if the specified allocation fits within
* the adjacent free chunk(s).
*
* @param UsedChunk Allocated chunk to check for a fit
* @param bAnyChunkType If false, only succeeds if 'UsedChunk' has a reallocation request and fits
* @return Returns 'UsedChunk' if it fits the criteria, otherwise nullptr
*/
FMemoryChunk* FindAdjacent(FMemoryChunk* UsedChunk, bool bAnyChunkType);
/**
* Searches for an allocated chunk that would fit within the specified free chunk.
* The allocated chunk must be adjacent to a free chunk and have a larger
* base address than 'FreeChunk'.
* Starts searching from the end of the texture pool.
*
* @param FreeChunk Free chunk we're trying to fill up
* @return Pointer to a suitable chunk, or nullptr
*/
FMemoryChunk* FindAdjacentToHole(FMemoryChunk* FreeChunk);
/**
* Searches for an allocated chunk that would fit within the specified free chunk.
* Any chunk that fits and has a larger base address than 'FreeChunk' is accepted.
* Starts searching from the end of the texture pool.
*
* @param FreeChunk Free chunk we're trying to fill up
* @return Pointer to a suitable chunk, or nullptr
*/
FMemoryChunk* FindAny(FMemoryChunk* FreeChunk);
/**
* Initiates an async relocation of an allocated chunk into a free chunk.
* Takes potential reallocation request into account.
*
* @param Stats [out] Stats
* @param FreeChunk Destination chunk (free memory)
* @param UsedChunk Source chunk (allocated memory)
* @return Next Free chunk to try to fill up
*/
FMemoryChunk* RelocateIntoFreeChunk(FRelocationStats& Stats, FMemoryChunk* FreeChunk, FMemoryChunk* UsedChunk);
FMemoryChunk* RelocateAllowed(FMemoryChunk* FreeChunk, FMemoryChunk* UsedChunk);
FCriticalSection SynchronizationObject;
/** Total size of memory pool, in uint8s. */
uint64 MemorySize;
/** Base of memory pool. */
uint8* MemoryBase;
/** Allocation alignment requirements. */
int32 AllocationAlignment;
/** Head of linked list of chunks. Sorted by memory address. */
FMemoryChunk* FirstChunk;
/** Last chunk in the linked list of chunks (see FirstChunk). */
FMemoryChunk* LastChunk;
/** Head of linked list of free chunks. Unsorted. */
FMemoryChunk* FirstFreeChunk;
/** Cumulative time spent in allocator. */
double TimeSpentInAllocator;
/** Allocated memory in uint8s. */
typedef int64 memsize_t;
volatile memsize_t PaddingWasteSize;
volatile memsize_t AllocatedMemorySize;
/** Available memory in uint8s. */
volatile memsize_t AvailableMemorySize;
/** Adjustment to allocated memory, pending all reallocations. */
volatile memsize_t PendingMemoryAdjustment;
/** Mapping from Pointer to chunk for fast removal. */
TMap<void*, FMemoryChunk*> PointerToChunkMap;
/** Allocator settings that affect its behavior. */
FSettings Settings;
/** Ever-increasing index to synchronize all relocations initiated by Tick(). */
uint64 CurrentSyncIndex;
/** Sync index that has been completed, so far. */
uint64 CompletedSyncIndex;
/** Number of async relocations that are currently in progress. */
int32 NumRelocationsInProgress;
/** Platform-specific (GPU) fence, used for synchronizing the Sync Index. */
uint64 PlatformSyncFence;
/** Chunks that couldn't be freed immediately because they were being relocated. */
TDoubleLinkedList<FMemoryChunk*> PendingFreeChunks;
uint64 CurrentLargestHole;
int32 CurrentNumHoles;
// Stats
/** Total number of relocations performed so far. */
uint64 TotalNumRelocations;
/** Total number of uint8s relocated so far. */
uint64 TotalNumBytesRelocated;
/** Smallest consecutive free memory region we've had. */
int64 MinLargestHole;
/** Maximum number of disjoint32 free memory regions we've had. */
int32 MaxNumHoles;
/** Total number of async reallocations successfully completed so far. */
int32 NumFinishedAsyncReallocations;
/** Total number of async allocations successfully completed so far. */
int32 NumFinishedAsyncAllocations;
/** Total number of async requests that has been canceled so far. */
int32 NumCanceledAsyncRequests;
/** Amount of time blocked by a platform fence since the beginning of the last call to Tick(), in appCycles. */
uint32 BlockedCycles;
int32 NumLockedChunks;
#if VALIDATE_MEMORY_PROTECTION
double TimeInMemProtect;
#endif
/** When in benchmark mode, don't call any Platform functions. */
bool bBenchmarkMode;
friend FScopedGPUDefragLock;
};
//FScopedGPUDefragLock can't cover any scope that will add dcb commands or we might deadlock.
class FScopedGPUDefragLock
{
public:
FScopedGPUDefragLock(FGPUDefragAllocator& InDefragAllocator)
: DefragAllocator(InDefragAllocator)
{
DefragAllocator.SynchronizationObject.Lock();
}
~FScopedGPUDefragLock()
{
DefragAllocator.SynchronizationObject.Unlock();
}
private:
FGPUDefragAllocator& DefragAllocator;
};
/**
* Asynchronous reallocation request.
* Requests are created and deleted by the user, but it must stick around until the allocator is done with it.
* Requests may be fulfilled immediately, check HasCompleted() after making the request.
*/
class FAsyncReallocationRequest
{
public:
/**
* Creates a new reallocation request.
*
* @param InCurrentBaseAddress Current base address
* @param InNewSize Requested new size, in uint8s
* @param InRequestStatus Will be decremented by one when the request has been completed. Can be nullptr.
*/
FAsyncReallocationRequest(void* InCurrentBaseAddress, int32 InNewSize, FThreadSafeCounter* InRequestStatus)
: OldAddress(InCurrentBaseAddress)
, NewAddress(nullptr)
, OldSize(0) // Set by AsyncReallocate()
, NewSize(InNewSize)
, int32ernalRequestStatus(1)
, ExternalRequestStatus(InRequestStatus)
, bIsCanceled(false)
, MemoryChunk(nullptr)
{
}
/** Destructor. */
~FAsyncReallocationRequest()
{
check(!HasStarted() || IsCanceled() || HasCompleted());
}
/** Returns true if the request is for a new allocation. */
bool IsAllocation() const
{
return OldAddress == nullptr && OldSize == 0;
}
/** Returns true if the request is for a reallocation. */
bool IsReallocation() const
{
return OldAddress != nullptr;
}
/** Returns true if the request has been canceled. */
bool IsCanceled() const
{
return bIsCanceled;
}
/** Returns true if the request has been completed. */
bool HasCompleted() const
{
bool bHasCompleted = int32ernalRequestStatus.GetValue() == 0;
check(!bHasCompleted || NewAddress || bIsCanceled);
return bHasCompleted;
}
/** Returns true if the allocator has started processing the request (true for completed requests as well). */
bool HasStarted() const
{
return NewAddress ? true : false;
}
/** Returns the original base address. */
void* GetOldBaseAddress() const
{
return OldAddress;
}
/** Returns the new base address, or nullptr if the request hasn't started yet. */
void* GetNewBaseAddress() const
{
return NewAddress;
}
/** Returns the requested new memory size (in uint8s). */
int32 GetNewSize() const
{
return NewSize;
}
private:
// Hidden on purpose since outside usage isn't necessarily thread-safe.
FAsyncReallocationRequest(const FAsyncReallocationRequest& Other) { FMemory::Memcpy(this, &Other, sizeof(FAsyncReallocationRequest)); }
void operator=(const FAsyncReallocationRequest& Other) { FMemory::Memcpy(this, &Other, sizeof(FAsyncReallocationRequest)); }
/**
* Marks the request as completed. Also decrements the external request status, if it wasn't nullptr.
*/
void MarkCompleted()
{
check(int32ernalRequestStatus.GetValue() == 1);
int32ernalRequestStatus.Decrement();
if (ExternalRequestStatus)
{
ExternalRequestStatus->Decrement();
}
}
/** Original base address. */
void* OldAddress;
/** New base address, or nullptr if the request hasn't started yet. */
void* NewAddress;
/** Original memory size, in uint8s. Set by AsyncReallocate(). */
int32 OldSize;
/** Requested new memory size, in uint8s. */
int32 NewSize;
/** Thread-safe counter that will be decremented by one when the request has been completed. */
FThreadSafeCounter int32ernalRequestStatus;
/** External counter that will be decremented by one when the request has been completed. */
FThreadSafeCounter* ExternalRequestStatus;
/** true if the request has been canceled. */
uint32 bIsCanceled : 1;
/**
* Corresponding memory chunk. Starts out as the chunk that contains the original memory block,
* but is changed to the destination chunk once the allocator starts processing the request.
*/
class FGPUDefragAllocator::FMemoryChunk* MemoryChunk;
friend class FGPUDefragAllocator;
};
/**
* Returns the current size (in uint8s), or the final size if it has a reallocating request.
*/
FORCEINLINE int64 FGPUDefragAllocator::FMemoryChunk::GetFinalSize() const
{
return Size;
}
/**
* Returns true if the specified chunk is allowed to relocate at this time.
* Will also call PlatformCanRelocate().
*
* @param Chunk Chunk to check
* @return true if the allocation can be relocated at this time
*/
FORCEINLINE bool FGPUDefragAllocator::CanRelocate(const FMemoryChunk* Chunk) const
{
if (Chunk->IsLocked())
{
return false;
}
if (!bBenchmarkMode)
{
return PlatformCanRelocate(Chunk->Base, Chunk->UserPayload);
}
else
{
return true;
}
}
/**
* Blocks the calling thread until the specified request has been completed.
*
* @param Request Request to wait for. Must be a valid request.
*/
FORCEINLINE void FGPUDefragAllocator::BlockOnAsyncReallocation(FAsyncReallocationRequest* Request)
{
check(Request->HasStarted());
if (!Request->HasCompleted())
{
BlockOnSyncIndex(Request->MemoryChunk->SyncIndex);
}
}