Files
UnrealEngine/Engine/Source/ThirdParty/Windows/D3DX12/Include/d3dx12residency.h
2025-05-18 13:04:45 +08:00

1726 lines
48 KiB
C++

//*********************************************************
//
// Copyright (c) Microsoft. All rights reserved.
// This code is licensed under the MIT License (MIT).
// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
//
//*********************************************************
#pragma once
namespace D3DX12Residency
{
#if 0
#define RESIDENCY_CHECK(x) \
if((x) == false) { DebugBreak(); }
#define RESIDENCY_CHECK_RESULT(x) \
if((x) != S_OK) { DebugBreak(); }
#else
#define RESIDENCY_CHECK(x)
#define RESIDENCY_CHECK_RESULT(x) x
#endif
// Note: This library automatically runs in a single-threaded mode if ID3D12Device3 is supported.
#define RESIDENCY_SINGLE_THREADED 0
#define RESIDENCY_MIN(x,y) ((x) < (y) ? (x) : (y))
#define RESIDENCY_MAX(x,y) ((x) > (y) ? (x) : (y))
// This size can be tuned to your app in order to save space
#define MAX_NUM_CONCURRENT_CMD_LISTS 1024
namespace Internal
{
class CriticalSection
{
friend class ScopedLock;
public:
CriticalSection()
{
InitializeCriticalSectionAndSpinCount(&CS, 8);
}
~CriticalSection()
{
DeleteCriticalSection(&CS);
}
private:
CRITICAL_SECTION CS;
};
class ScopedLock
{
public:
ScopedLock() : pCS(nullptr) {};
ScopedLock(CriticalSection* pCSIn) : pCS(pCSIn)
{
if (pCS)
{
EnterCriticalSection(&pCS->CS);
}
};
~ScopedLock()
{
if (pCS)
{
LeaveCriticalSection(&pCS->CS);
}
}
private:
CriticalSection* pCS;
};
// One per Residency Manager
class SyncManager
{
public:
SyncManager()
{
for (UINT32 i = 0; i < ARRAYSIZE(AvailableCommandLists); i++)
{
AvailableCommandLists[i] = false;
}
}
Internal::CriticalSection MaskCriticalSection;
static const UINT32 sUnsetValue = UINT32(-1);
// Represents which command lists are currently open for recording
bool AvailableCommandLists[MAX_NUM_CONCURRENT_CMD_LISTS];
};
//Forward Declaration
class ResidencyManagerInternal;
}
// Used to track meta data for each object the app potentially wants
// to make resident or evict.
class ManagedObject
{
public:
enum class RESIDENCY_STATUS
{
RESIDENT,
EVICTED
};
ManagedObject() :
pUnderlying(nullptr),
Size(0),
ResidencyStatus(RESIDENCY_STATUS::RESIDENT),
LastGPUSyncPoint(0),
LastUsedTimestamp(0)
{
memset(CommandListsUsedOn, 0, sizeof(CommandListsUsedOn));
}
void Initialize(ID3D12Pageable* pUnderlyingIn, UINT64 ObjectSize, UINT64 InitialGPUSyncPoint = 0)
{
RESIDENCY_CHECK(pUnderlying == nullptr);
pUnderlying = pUnderlyingIn;
Size = ObjectSize;
LastGPUSyncPoint = InitialGPUSyncPoint;
}
inline bool IsInitialized() { return pUnderlying != nullptr; }
// Wether the object is resident or not
RESIDENCY_STATUS ResidencyStatus;
// The underlying D3D Object being tracked
ID3D12Pageable* pUnderlying;
// The size of the D3D Object in bytes
UINT64 Size;
UINT64 LastGPUSyncPoint;
UINT64 LastUsedTimestamp;
// This is used to track which open command lists this resource is currently used on.
bool CommandListsUsedOn[MAX_NUM_CONCURRENT_CMD_LISTS];
// Linked list entry
LIST_ENTRY ListEntry;
};
// This represents a set of objects which are referenced by a command list i.e. every time a resource
// is bound for rendering, clearing, copy etc. the set must be updated to ensure the it is resident
// for execution.
class ResidencySet
{
friend class ResidencyManager;
friend class Internal::ResidencyManagerInternal;
public:
static const UINT32 InvalidIndex = (UINT32)-1;
ResidencySet() :
CommandListIndex(InvalidIndex),
MaxResidencySetSize(0),
CurrentSetSize(0),
ppSet(nullptr),
IsOpen(false),
OutOfMemory(false),
pSyncManager(nullptr)
{
};
~ResidencySet()
{
delete[](ppSet);
}
// Returns true if the object was inserted, false otherwise
inline bool Insert(ManagedObject* pObject)
{
RESIDENCY_CHECK(IsOpen);
RESIDENCY_CHECK(CommandListIndex != InvalidIndex);
// If we haven't seen this object on this command list mark it
if (pObject->CommandListsUsedOn[CommandListIndex] == false)
{
pObject->CommandListsUsedOn[CommandListIndex] = true;
if (ppSet == nullptr || CurrentSetSize >= MaxResidencySetSize)
{
Realloc();
}
if (ppSet == nullptr)
{
OutOfMemory = true;
return false;
}
ppSet[CurrentSetSize++] = pObject;
return true;
}
else
{
return false;
}
}
HRESULT Open()
{
Internal::ScopedLock Lock(&pSyncManager->MaskCriticalSection);
// It's invalid to open a set that is already open
if (IsOpen)
{
return E_INVALIDARG;
}
RESIDENCY_CHECK(CommandListIndex == InvalidIndex);
bool CommandlistAvailable = false;
// Find the first available command list by bitscanning
for (UINT32 i = 0; i < ARRAYSIZE(pSyncManager->AvailableCommandLists); i++)
{
if (pSyncManager->AvailableCommandLists[i] == false)
{
CommandListIndex = i;
pSyncManager->AvailableCommandLists[i] = true;
CommandlistAvailable = true;
break;
}
}
if (CommandlistAvailable == false)
{
// There are too many open residency sets, consider using less or increasing the value of MAX_NUM_CONCURRENT_CMD_LISTS
RESIDENCY_CHECK(false);
return E_OUTOFMEMORY;
}
CurrentSetSize = 0;
IsOpen = true;
OutOfMemory = false;
return S_OK;
}
HRESULT Close()
{
if (IsOpen == false)
{
return E_INVALIDARG;
}
if (OutOfMemory == true)
{
return E_OUTOFMEMORY;
}
for (INT32 i = 0; i < CurrentSetSize; i++)
{
Remove(ppSet[i]);
}
ReturnCommandListReservation();
IsOpen = false;
return S_OK;
}
private:
inline void Remove(ManagedObject* pObject)
{
pObject->CommandListsUsedOn[CommandListIndex] = false;
}
inline void ReturnCommandListReservation()
{
Internal::ScopedLock Lock(&pSyncManager->MaskCriticalSection);
pSyncManager->AvailableCommandLists[CommandListIndex] = false;
CommandListIndex = ResidencySet::InvalidIndex;
IsOpen = false;
}
void Initialize(Internal::SyncManager* pSyncManagerIn)
{
pSyncManager = pSyncManagerIn;
}
bool Initialize(Internal::SyncManager* pSyncManagerIn, UINT32 MaxSize)
{
pSyncManager = pSyncManagerIn;
MaxResidencySetSize = MaxSize;
if (MaxSize)
{
ppSet = new ManagedObject*[MaxResidencySetSize];
return ppSet != nullptr;
}
// Empty set may be indicative of errors higher up, e.g. that something doesn't have its ResidencySet initialized. Unfortunately, such errors are generally masked
// by everything else that has it properly initialized, so it generally only hits on super tiny commandlists.
// That is not a fatal error though, so we accept the zero case here, otherwise new[] below returns nullptr, and a spurious E_OUTOFMEMORY is returned from ExecuteSubset()
// resulting in the termination of the process with a confusing "driver crash" end user message.
return true;
}
inline void Realloc()
{
MaxResidencySetSize = (MaxResidencySetSize == 0) ? 4096 : INT32(MaxResidencySetSize + (MaxResidencySetSize / 2.0f));
ManagedObject** ppNewAlloc = new ManagedObject*[MaxResidencySetSize];
if (ppSet && ppNewAlloc)
{
memcpy(ppNewAlloc, ppSet, CurrentSetSize * sizeof(ManagedObject*));
delete[](ppSet);
}
ppSet = ppNewAlloc;
}
UINT32 CommandListIndex;
ManagedObject** ppSet;
INT32 MaxResidencySetSize;
INT32 CurrentSetSize;
bool IsOpen;
bool OutOfMemory;
Internal::SyncManager* pSyncManager;
};
namespace Internal
{
/* List Helpers */
inline void InitializeListHead(LIST_ENTRY* pHead)
{
pHead->Flink = pHead->Blink = pHead;
}
inline void InsertHeadList(LIST_ENTRY* pHead, LIST_ENTRY* pEntry)
{
pEntry->Blink = pHead;
pEntry->Flink = pHead->Flink;
pHead->Flink->Blink = pEntry;
pHead->Flink = pEntry;
}
inline void InsertTailList(LIST_ENTRY* pHead, LIST_ENTRY* pEntry)
{
pEntry->Flink = pHead;
pEntry->Blink = pHead->Blink;
pHead->Blink->Flink = pEntry;
pHead->Blink = pEntry;
}
inline void RemoveEntryList(LIST_ENTRY* pEntry)
{
pEntry->Blink->Flink = pEntry->Flink;
pEntry->Flink->Blink = pEntry->Blink;
}
inline LIST_ENTRY* RemoveHeadList(LIST_ENTRY* pHead)
{
LIST_ENTRY* pEntry = pHead->Flink;
Internal::RemoveEntryList(pEntry);
return pEntry;
}
inline LIST_ENTRY* RemoveTailList(LIST_ENTRY* pHead)
{
LIST_ENTRY* pEntry = pHead->Blink;
Internal::RemoveEntryList(pEntry);
return pEntry;
}
inline bool IsListEmpty(LIST_ENTRY* pEntry)
{
return pEntry->Flink == pEntry;
}
struct Fence
{
Fence(UINT64 StartingValue) : pFence(nullptr), FenceValue(StartingValue)
{
Internal::InitializeListHead(&ListEntry);
};
HRESULT Initialize(ID3D12Device* pDevice)
{
HRESULT hr = pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pFence));
RESIDENCY_CHECK_RESULT(hr);
return hr;
}
void Destroy()
{
if (pFence)
{
pFence->Release();
pFence = nullptr;
}
}
HRESULT GPUWait(ID3D12CommandQueue* pQueue)
{
HRESULT hr = pQueue->Wait(pFence, FenceValue);
RESIDENCY_CHECK_RESULT(hr);
return hr;
}
HRESULT GPUSignal(ID3D12CommandQueue* pQueue)
{
HRESULT hr = pQueue->Signal(pFence, FenceValue);
RESIDENCY_CHECK_RESULT(hr);
return hr;
}
inline void Increment()
{
FenceValue++;
}
ID3D12Fence* pFence;
UINT64 FenceValue;
LIST_ENTRY ListEntry;
};
// Represents a time on a particular queue that a resource was used
struct QueueSyncPoint
{
QueueSyncPoint() : pFence(nullptr), LastUsedValue(0) {};
inline bool IsCompleted() { return LastUsedValue <= pFence->pFence->GetCompletedValue(); }
inline void WaitForCompletion(HANDLE Event)
{
RESIDENCY_CHECK_RESULT(pFence->pFence->SetEventOnCompletion(LastUsedValue, Event));
RESIDENCY_CHECK_RESULT(WaitForSingleObject(Event, INFINITE));
}
Fence* pFence;
UINT64 LastUsedValue;
};
struct DeviceWideSyncPoint
{
DeviceWideSyncPoint(UINT32 NumQueues, UINT64 Generation) :
GenerationID(Generation), NumQueueSyncPoints(NumQueues) {};
// Create the whole structure in one allocation for locality
static DeviceWideSyncPoint* CreateSyncPoint(UINT32 NumQueues, UINT64 Generation)
{
DeviceWideSyncPoint* pSyncPoint = nullptr;
const SIZE_T Size = sizeof(DeviceWideSyncPoint) + (sizeof(QueueSyncPoint) * (NumQueues - 1));
BYTE* pAlloc = new BYTE[Size];
if (pAlloc && Size >= sizeof(DeviceWideSyncPoint))
{
pSyncPoint = new (pAlloc) DeviceWideSyncPoint(NumQueues, Generation);
}
return pSyncPoint;
}
// A device wide fence is completed if all of the queues that were active at that point are completed
inline bool IsCompleted()
{
for (UINT32 i = 0; i < NumQueueSyncPoints; i++)
{
if (pQueueSyncPoints[i].IsCompleted() == false)
{
return false;
}
}
return true;
}
inline void WaitForCompletion(HANDLE Event)
{
for (UINT32 i = 0; i < NumQueueSyncPoints; i++)
{
if (pQueueSyncPoints[i].IsCompleted() == false)
{
pQueueSyncPoints[i].WaitForCompletion(Event);
}
}
}
const UINT64 GenerationID;
const UINT32 NumQueueSyncPoints;
LIST_ENTRY ListEntry;
// NumQueueSyncPoints QueueSyncPoints will be placed below here
QueueSyncPoint pQueueSyncPoints[1];
};
// A Least Recently Used Cache. Tracks all of the objects requested by the app so that objects
// that aren't used freqently can get evicted to help the app stay under buget.
class LRUCache
{
public:
LRUCache() :
NumResidentObjects(0),
NumEvictedObjects(0),
ResidentSize(0)
{
Internal::InitializeListHead(&ResidentObjectListHead);
Internal::InitializeListHead(&EvictedObjectListHead);
};
void Insert(ManagedObject* pObject)
{
if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT)
{
Internal::InsertHeadList(&ResidentObjectListHead, &pObject->ListEntry);
NumResidentObjects++;
ResidentSize += pObject->Size;
}
else
{
Internal::InsertHeadList(&EvictedObjectListHead, &pObject->ListEntry);
NumEvictedObjects++;
}
}
void Remove(ManagedObject* pObject)
{
Internal::RemoveEntryList(&pObject->ListEntry);
if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT)
{
NumResidentObjects--;
ResidentSize -= pObject->Size;
}
else
{
NumEvictedObjects--;
}
}
// When an object is used by the GPU we move it to the end of the list.
// This way things closer to the head of the list are the objects which
// are stale and better candidates for eviction
void ObjectReferenced(ManagedObject* pObject)
{
RESIDENCY_CHECK(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT);
Internal::RemoveEntryList(&pObject->ListEntry);
Internal::InsertTailList(&ResidentObjectListHead, &pObject->ListEntry);
}
void MakeResident(ManagedObject* pObject)
{
RESIDENCY_CHECK(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::EVICTED);
pObject->ResidencyStatus = ManagedObject::RESIDENCY_STATUS::RESIDENT;
Internal::RemoveEntryList(&pObject->ListEntry);
Internal::InsertTailList(&ResidentObjectListHead, &pObject->ListEntry);
NumEvictedObjects--;
NumResidentObjects++;
ResidentSize += pObject->Size;
}
void Evict(ManagedObject* pObject)
{
RESIDENCY_CHECK(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT);
pObject->ResidencyStatus = ManagedObject::RESIDENCY_STATUS::EVICTED;
Internal::RemoveEntryList(&pObject->ListEntry);
Internal::InsertTailList(&EvictedObjectListHead, &pObject->ListEntry);
NumResidentObjects--;
ResidentSize -= pObject->Size;
NumEvictedObjects++;
}
// Evict all of the resident objects used in sync points up to the specficied one (inclusive)
void TrimToSyncPointInclusive(INT64 CurrentUsage, INT64 CurrentBudget, ID3D12Pageable** EvictionList, UINT32& NumObjectsToEvict, UINT64 SyncPoint)
{
NumObjectsToEvict = 0;
LIST_ENTRY* pResourceEntry = ResidentObjectListHead.Flink;
while (pResourceEntry != &ResidentObjectListHead)
{
ManagedObject* pObject = CONTAINING_RECORD(pResourceEntry, ManagedObject, ListEntry);
if (pObject->LastGPUSyncPoint > SyncPoint || CurrentUsage < CurrentBudget)
{
break;
}
RESIDENCY_CHECK(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT);
EvictionList[NumObjectsToEvict++] = pObject->pUnderlying;
Evict(pObject);
CurrentUsage -= pObject->Size;
pResourceEntry = ResidentObjectListHead.Flink;
}
}
// Trim all objects which are older than the specified time
void TrimAgedAllocations(DeviceWideSyncPoint* MaxSyncPoint, ID3D12Pageable** EvictionList, UINT32& NumObjectsToEvict, UINT64 CurrentTimeStamp, UINT64 MinDelta)
{
LIST_ENTRY* pResourceEntry = ResidentObjectListHead.Flink;
while (pResourceEntry != &ResidentObjectListHead)
{
ManagedObject* pObject = CONTAINING_RECORD(pResourceEntry, ManagedObject, ListEntry);
if ((MaxSyncPoint && pObject->LastGPUSyncPoint >= MaxSyncPoint->GenerationID) || // Only trim allocations done on the GPU
CurrentTimeStamp - pObject->LastUsedTimestamp <= MinDelta) // Don't evict things which have been used recently
{
break;
}
RESIDENCY_CHECK(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT);
EvictionList[NumObjectsToEvict++] = pObject->pUnderlying;
Evict(pObject);
pResourceEntry = ResidentObjectListHead.Flink;
}
}
ManagedObject* GetResidentListHead()
{
if (IsListEmpty(&ResidentObjectListHead))
{
return nullptr;
}
return CONTAINING_RECORD(ResidentObjectListHead.Flink, ManagedObject, ListEntry);
}
LIST_ENTRY ResidentObjectListHead;
LIST_ENTRY EvictedObjectListHead;
UINT32 NumResidentObjects;
UINT32 NumEvictedObjects;
UINT64 ResidentSize;
};
class ResidencyManagerInternal
{
public:
ResidencyManagerInternal(SyncManager* pSyncManagerIn) :
Device(nullptr),
Device3(nullptr),
#ifdef __ID3D12DeviceDownlevel_INTERFACE_DEFINED__
DeviceDownlevel(nullptr),
#endif
AsyncThreadFence(0),
CompletionEvent(INVALID_HANDLE_VALUE),
AsyncThreadWorkCompletionEvent(INVALID_HANDLE_VALUE),
Adapter(nullptr),
AsyncWorkEvent(INVALID_HANDLE_VALUE),
AsyncWorkThread(INVALID_HANDLE_VALUE),
FinishAsyncWork(false),
cStartEvicted(false),
CurrentSyncPointGeneration(0),
NumQueuesSeen(0),
NodeIndex(0),
CurrentAsyncWorkloadHead(0),
CurrentAsyncWorkloadTail(0),
cMinEvictionGracePeriod(1.0f),
cMaxEvictionGracePeriod(60.0f),
cTrimPercentageMemoryUsageThreshold(0.7f),
AsyncWorkQueue(nullptr),
MaxSoftwareQueueLatency(6),
AsyncWorkQueueSize(7),
pSyncManager(pSyncManagerIn)
{
Internal::InitializeListHead(&QueueFencesListHead);
Internal::InitializeListHead(&InFlightSyncPointsHead);
BOOL LuidSuccess = AllocateLocallyUniqueId(&ResidencyManagerUniqueID);
RESIDENCY_CHECK(LuidSuccess);
UNREFERENCED_PARAMETER(LuidSuccess);
};
// NOTE: DeviceNodeIndex is an index not a mask. The majority of D3D12 uses bit masks to identify a GPU node whereas DXGI uses 0 based indices.
HRESULT Initialize(ID3D12Device* ParentDevice, UINT DeviceNodeIndex, IDXGIAdapter* ParentAdapter, UINT32 MaxLatency)
{
Device = ParentDevice;
NodeIndex = DeviceNodeIndex;
MaxSoftwareQueueLatency = MaxLatency;
// Try to query for the device interface with a queued MakeResident API.
if (FAILED(Device->QueryInterface(&Device3)))
{
// The queued MakeResident API is not available. Start the paging fence at 1.
AsyncThreadFence.Increment();
}
#ifdef __ID3D12DeviceDownlevel_INTERFACE_DEFINED__
Device->QueryInterface(&DeviceDownlevel);
#endif
if (ParentAdapter)
{
ParentAdapter->QueryInterface(&Adapter);
}
AsyncWorkQueueSize = MaxLatency + 1;
AsyncWorkQueue = new AsyncWorkload[AsyncWorkQueueSize];
if (AsyncWorkQueue == nullptr)
{
return E_OUTOFMEMORY;
}
LARGE_INTEGER Frequency;
QueryPerformanceFrequency(&Frequency);
// Calculate how many QPC ticks are equivalent to the given time in seconds
MinEvictionGracePeriodTicks = UINT64(Frequency.QuadPart * cMinEvictionGracePeriod);
MaxEvictionGracePeriodTicks = UINT64(Frequency.QuadPart * cMaxEvictionGracePeriod);
HRESULT hr = S_OK;
hr = AsyncThreadFence.Initialize(Device);
if (SUCCEEDED(hr))
{
CompletionEvent = CreateEvent(nullptr, false, false, nullptr);
if (CompletionEvent == INVALID_HANDLE_VALUE)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
}
if (SUCCEEDED(hr))
{
AsyncThreadWorkCompletionEvent = CreateEvent(nullptr, false, false, nullptr);
if (AsyncThreadWorkCompletionEvent == INVALID_HANDLE_VALUE)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
}
if (SUCCEEDED(hr))
{
AsyncWorkEvent = CreateEvent(nullptr, true, false, nullptr);
if (AsyncWorkEvent == INVALID_HANDLE_VALUE)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
}
#if !RESIDENCY_SINGLE_THREADED
if (SUCCEEDED(hr) && !Device3)
{
AsyncWorkThread = CreateThread(nullptr, 0, AsyncThreadStart, (void*) this, 0, nullptr);
if (AsyncWorkThread == INVALID_HANDLE_VALUE)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
}
#endif
return hr;
}
void Destroy()
{
AsyncThreadFence.Destroy();
if (CompletionEvent != INVALID_HANDLE_VALUE)
{
CloseHandle(CompletionEvent);
CompletionEvent = INVALID_HANDLE_VALUE;
}
#if !RESIDENCY_SINGLE_THREADED
AsyncWorkload* pWork = DequeueAsyncWork();
while (pWork)
{
pWork = DequeueAsyncWork();
}
FinishAsyncWork = true;
if (SetEvent(AsyncWorkEvent) == false)
{
RESIDENCY_CHECK_RESULT(HRESULT_FROM_WIN32(GetLastError()));
}
// Make sure the async worker thread is finished to prevent dereferencing
// dangling pointers to ResidencyManagerInternal
if (AsyncWorkThread != INVALID_HANDLE_VALUE)
{
WaitForSingleObject(AsyncWorkThread, INFINITE);
CloseHandle(AsyncWorkThread);
AsyncWorkThread = INVALID_HANDLE_VALUE;
}
if (AsyncWorkEvent != INVALID_HANDLE_VALUE)
{
CloseHandle(AsyncWorkEvent);
AsyncWorkEvent = INVALID_HANDLE_VALUE;
}
#endif
if (AsyncThreadWorkCompletionEvent != INVALID_HANDLE_VALUE)
{
CloseHandle(AsyncThreadWorkCompletionEvent);
AsyncThreadWorkCompletionEvent = INVALID_HANDLE_VALUE;
}
while (Internal::IsListEmpty(&QueueFencesListHead) == false)
{
Internal::Fence* pObject =
CONTAINING_RECORD(QueueFencesListHead.Flink, Internal::Fence, ListEntry);
pObject->Destroy();
Internal::RemoveHeadList(&QueueFencesListHead);
delete(pObject);
}
while (Internal::IsListEmpty(&InFlightSyncPointsHead) == false)
{
Internal::DeviceWideSyncPoint* pPoint =
CONTAINING_RECORD(InFlightSyncPointsHead.Flink, Internal::DeviceWideSyncPoint, ListEntry);
Internal::RemoveHeadList(&InFlightSyncPointsHead);
delete pPoint;
}
delete [] AsyncWorkQueue;
if (Device3)
{
Device3->Release();
Device3 = nullptr;
}
#ifdef __ID3D12DeviceDownlevel_INTERFACE_DEFINED__
if (DeviceDownlevel)
{
DeviceDownlevel->Release();
DeviceDownlevel = nullptr;
}
#endif
if (Adapter)
{
Adapter->Release();
Adapter = nullptr;
}
}
void BeginTrackingObject(ManagedObject* pObject)
{
Internal::ScopedLock Lock(&Mutex);
if (pObject)
{
RESIDENCY_CHECK(pObject->pUnderlying != nullptr);
if (cStartEvicted)
{
pObject->ResidencyStatus = ManagedObject::RESIDENCY_STATUS::EVICTED;
RESIDENCY_CHECK_RESULT(Device->Evict(1, &pObject->pUnderlying));
}
LRU.Insert(pObject);
}
}
void EndTrackingObject(ManagedObject* pObject)
{
Internal::ScopedLock Lock(&Mutex);
LRU.Remove(pObject);
}
// One residency set per command-list
HRESULT ExecuteCommandLists(ID3D12CommandQueue* Queue, ID3D12CommandList** CommandLists, ResidencySet** ResidencySets, UINT32 Count)
{
return ExecuteSubset(Queue, CommandLists, ResidencySets, Count);
}
// BEGIN EPIC MOD
// Attempt to make a specific set of resources resident, independently of a command list execution.
// This is useful for Queue-level operations on resources, such as UpdateTileMappings().
// NOTE: code duplicated from ExecuteCommandLists() to avoid significant divergence.
HRESULT MakeResident(ID3D12CommandQueue* Queue, ResidencySet* MasterSet)
{
HRESULT hr = S_OK;
Internal::Fence* QueueFence = nullptr;
hr = GetFence(Queue, QueueFence);
if (SUCCEEDED(hr))
{
// The following code must be atomic so that things get ordered correctly
Internal::ScopedLock Lock(&ExecutionCS);
// Evict or make resident all of the objects we identified above.
// This will run on an async thread, allowing the current to continue while still blocking the GPU if required
// If a native async MakeResident is supported, this will run on this thread - it will only block until work referencing
// resources which need to be evicted is completed, and does not need to wait for MakeResident to complete.
hr = EnqueueAsyncWork(MasterSet, AsyncThreadFence.FenceValue, CurrentSyncPointGeneration);
#if !RESIDENCY_SINGLE_THREADED
if (Device3)
#endif
{
AsyncWorkload* pWorkload = DequeueAsyncWork();
ProcessPagingWork(pWorkload);
}
// If there are some things that need to be made resident we need to make sure that the GPU
// doesn't execute until the async thread signals that the MakeResident call has returned.
if (SUCCEEDED(hr))
{
hr = AsyncThreadFence.GPUWait(Queue);
// If we're using a queued MakeResident, then ProcessPagingWork may increment the fence multiple times instead of
// signaling a pre-defined value.
if (!Device3)
{
AsyncThreadFence.Increment();
}
}
}
return hr;
}
HRESULT SignalFence(ID3D12CommandQueue* Queue)
{
UINT64 GPUSyncPoint = 0; // not used
return GetCurrentGPUSyncPoint(Queue, &GPUSyncPoint);
}
// END EPIC MOD
HRESULT GetCurrentGPUSyncPoint(ID3D12CommandQueue* Queue, UINT64 *pGPUSyncPoint)
{
Internal::Fence* QueueFence = nullptr;
HRESULT hr = GetFence(Queue, QueueFence);
// The signal and increment need to be atomic
if(SUCCEEDED(hr))
{
Internal::ScopedLock Lock(&ExecutionCS);
*pGPUSyncPoint = QueueFence->FenceValue;
hr = SignalFence(Queue, QueueFence);
}
return hr;
}
// BEGIN EPIC MOD
UINT64 LocalMemoryBudgetLimit = ~0ull;
// END EPIC MOD
private:
HRESULT GetFence(ID3D12CommandQueue *Queue, Internal::Fence *&QueueFence)
{
// We have to track each object on each queue so we know when it is safe to evict them. Therefore, for every queue that we
// see, associate a fence with it
GUID FenceGuid = { 0xf0, 0, 0xd, { 0, 0, 0, 0, 0, 0, 0, 0 } };
memcpy(&FenceGuid.Data4, &ResidencyManagerUniqueID, sizeof(ResidencyManagerUniqueID));
QueueFence = nullptr;
HRESULT hr = S_OK;
struct
{
Internal::Fence* pFence;
} CommandQueuePrivateData;
// Find or create the fence for this queue
{
UINT32 Size = sizeof(CommandQueuePrivateData);
hr = Queue->GetPrivateData(FenceGuid, &Size, &CommandQueuePrivateData);
if (FAILED(hr))
{
QueueFence = new Internal::Fence(1);
hr = QueueFence->Initialize(Device);
Internal::InsertTailList(&QueueFencesListHead, &QueueFence->ListEntry);
InterlockedIncrement(&NumQueuesSeen);
if (SUCCEEDED(hr))
{
CommandQueuePrivateData = { QueueFence };
hr = Queue->SetPrivateData(FenceGuid, UINT32(sizeof(CommandQueuePrivateData)), &CommandQueuePrivateData);
RESIDENCY_CHECK_RESULT(hr);
}
}
QueueFence = CommandQueuePrivateData.pFence;
RESIDENCY_CHECK(QueueFence != nullptr);
}
return hr;
}
HRESULT SignalFence(ID3D12CommandQueue *Queue, Internal::Fence *QueueFence)
{
// When this fence is passed it is safe to evict the resources used in the list just submitted
HRESULT hr = QueueFence->GPUSignal(Queue);
QueueFence->Increment();
if (SUCCEEDED(hr))
{
hr = EnqueueSyncPoint();
RESIDENCY_CHECK_RESULT(hr);
}
CurrentSyncPointGeneration++;
return hr;
}
HRESULT ExecuteSubset(ID3D12CommandQueue* Queue, ID3D12CommandList** CommandLists, ResidencySet** ResidencySets, UINT32 Count)
{
HRESULT hr = S_OK;
DXGI_QUERY_VIDEO_MEMORY_INFO LocalMemory;
ZeroMemory(&LocalMemory, sizeof(LocalMemory));
GetCurrentBudget(&LocalMemory, DXGI_MEMORY_SEGMENT_GROUP_LOCAL);
DXGI_QUERY_VIDEO_MEMORY_INFO NonLocalMemory;
ZeroMemory(&NonLocalMemory, sizeof(NonLocalMemory));
GetCurrentBudget(&NonLocalMemory, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL);
UINT64 TotalSizeNeeded = 0;
UINT32 MaxObjectsReferenced = 0;
for (UINT32 i = 0; i < Count; i++)
{
if (ResidencySets[i])
{
if (ResidencySets[i]->IsOpen)
{
// Residency Sets must be closed before execution just like Command Lists
return E_INVALIDARG;
}
MaxObjectsReferenced += ResidencySets[i]->CurrentSetSize;
}
}
// Create a set to gather up all unique resources required by this call
ResidencySet* pMasterSet = new ResidencySet();
if (pMasterSet == nullptr || pMasterSet->Initialize(pSyncManager, MaxObjectsReferenced) == false)
{
return E_OUTOFMEMORY;
}
hr = pMasterSet->Open();
if (FAILED(hr))
{
return hr;
}
// For each residency set
for (UINT32 i = 0; i < Count; i++)
{
if (ResidencySets[i])
{
// For each object in this set
for (INT32 x = 0; x < ResidencySets[i]->CurrentSetSize; x++)
{
if (pMasterSet->Insert(ResidencySets[i]->ppSet[x]))
{
TotalSizeNeeded += ResidencySets[i]->ppSet[x]->Size;
}
}
}
}
// Close this set to free it's slot up for the app
hr = pMasterSet->Close();
if (FAILED(hr))
{
return hr;
}
// This set of commandlists can't possibly fit within the budget, they need to be split up. If the number of command lists is 1 there is
// nothing we can do
if (Count > 1 && TotalSizeNeeded > LocalMemory.Budget + NonLocalMemory.Budget)
{
delete(pMasterSet);
// Recursively try to find a small enough set to fit in memory
const UINT32 Half = Count / 2;
const HRESULT LowerHR = ExecuteSubset(Queue, CommandLists, ResidencySets, Half);
const HRESULT UpperHR = ExecuteSubset(Queue, &CommandLists[Half], &ResidencySets[Half], Count - Half);
// BEGIN EPIC MOD
return LowerHR != S_OK ? LowerHR : UpperHR;
// END EPIC MOD
}
Internal::Fence* QueueFence = nullptr;
hr = GetFence(Queue, QueueFence);
if (SUCCEEDED(hr))
{
// The following code must be atomic so that things get ordered correctly
Internal::ScopedLock Lock(&ExecutionCS);
// Evict or make resident all of the objects we identified above.
// This will run on an async thread, allowing the current to continue while still blocking the GPU if required
// If a native async MakeResident is supported, this will run on this thread - it will only block until work referencing
// resources which need to be evicted is completed, and does not need to wait for MakeResident to complete.
hr = EnqueueAsyncWork(pMasterSet, AsyncThreadFence.FenceValue, CurrentSyncPointGeneration);
#if !RESIDENCY_SINGLE_THREADED
if (Device3)
#endif
{
AsyncWorkload* pWorkload = DequeueAsyncWork();
ProcessPagingWork(pWorkload);
}
// If there are some things that need to be made resident we need to make sure that the GPU
// doesn't execute until the async thread signals that the MakeResident call has returned.
if (SUCCEEDED(hr))
{
hr = AsyncThreadFence.GPUWait(Queue);
// If we're using a queued MakeResident, then ProcessPagingWork may increment the fence multiple times instead of
// signaling a pre-defined value.
if (!Device3)
{
AsyncThreadFence.Increment();
}
}
Queue->ExecuteCommandLists(Count, CommandLists);
if (SUCCEEDED(hr))
{
hr = SignalFence(Queue, QueueFence);
}
}
return hr;
}
struct AsyncWorkload
{
AsyncWorkload() :
pMasterSet(nullptr),
FenceValueToSignal(0),
SyncPointGeneration(0)
{}
UINT64 SyncPointGeneration;
// List of objects to make resident
ResidencySet* pMasterSet;
// The GPU will wait on this value so that it doesn't execute until the objects are made resident
UINT64 FenceValueToSignal;
};
SIZE_T AsyncWorkQueueSize;
AsyncWorkload* AsyncWorkQueue;
HANDLE AsyncWorkEvent;
HANDLE AsyncWorkThread;
Internal::CriticalSection AsyncWorkMutex;
volatile bool FinishAsyncWork;
volatile SIZE_T CurrentAsyncWorkloadHead;
volatile SIZE_T CurrentAsyncWorkloadTail;
static unsigned long WINAPI AsyncThreadStart(void* pData)
{
ResidencyManagerInternal* pManager = (ResidencyManagerInternal*)pData;
while (1)
{
AsyncWorkload* pWork = pManager->DequeueAsyncWork();
while (pWork)
{
// Submit the work
pManager->ProcessPagingWork(pWork);
if (SetEvent(pManager->AsyncThreadWorkCompletionEvent) == false)
{
RESIDENCY_CHECK_RESULT(HRESULT_FROM_WIN32(GetLastError()));
}
// Get more work
pWork = pManager->DequeueAsyncWork();
}
//Wait until there is more work do be done
WaitForSingleObject(pManager->AsyncWorkEvent, INFINITE);
if (ResetEvent(pManager->AsyncWorkEvent) == false)
{
RESIDENCY_CHECK_RESULT(HRESULT_FROM_WIN32(GetLastError()));
}
if (pManager->FinishAsyncWork)
{
return 0;
}
}
return 0;
}
// This will be run from a worker thread and will emulate a software queue for making gpu resources resident or evicted.
// The GPU will be synchronized by this queue to ensure that it never executes using an evicted resource.
void ProcessPagingWork(AsyncWorkload* pWork)
{
Internal::DeviceWideSyncPoint* FirstUncompletedSyncPoint = DequeueCompletedSyncPoints();
// Use a union so that we only need 1 allocation
union ResidentScratchSpace
{
ManagedObject* pManagedObject;
ID3D12Pageable* pUnderlying;
};
ResidentScratchSpace* pMakeResidentList = nullptr;
UINT32 NumObjectsToMakeResident = 0;
ID3D12Pageable** pEvictionList = nullptr;
UINT32 NumObjectsToEvict = 0;
// the size of all the objects which will need to be made resident in order to execute this set.
UINT64 SizeToMakeResident = 0;
LARGE_INTEGER CurrentTime;
QueryPerformanceCounter(&CurrentTime);
{
// A lock must be taken here as the state of the objects will be altered
Internal::ScopedLock Lock(&Mutex);
pMakeResidentList = new ResidentScratchSpace[pWork->pMasterSet->CurrentSetSize];
pEvictionList = new ID3D12Pageable*[LRU.NumResidentObjects];
// Mark the objects used by this command list to be made resident
for (INT32 i = 0; i < pWork->pMasterSet->CurrentSetSize; i++)
{
ManagedObject*& pObject = pWork->pMasterSet->ppSet[i];
// If it's evicted we need to make it resident again
if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::EVICTED)
{
pMakeResidentList[NumObjectsToMakeResident++].pManagedObject = pObject;
LRU.MakeResident(pObject);
SizeToMakeResident += pObject->Size;
}
// Update the last sync point that this was used on
pObject->LastGPUSyncPoint = pWork->SyncPointGeneration;
pObject->LastUsedTimestamp = CurrentTime.QuadPart;
LRU.ObjectReferenced(pObject);
}
DXGI_QUERY_VIDEO_MEMORY_INFO LocalMemory;
ZeroMemory(&LocalMemory, sizeof(LocalMemory));
GetCurrentBudget(&LocalMemory, DXGI_MEMORY_SEGMENT_GROUP_LOCAL);
UINT64 EvictionGracePeriod = GetCurrentEvictionGracePeriod(&LocalMemory);
// BEGIN EPIC MOD
if (LocalMemoryBudgetLimit == 0)
{
EvictionGracePeriod = 0;
}
// END EPIC MOD
LRU.TrimAgedAllocations(FirstUncompletedSyncPoint, pEvictionList, NumObjectsToEvict, CurrentTime.QuadPart, EvictionGracePeriod);
if (NumObjectsToEvict)
{
RESIDENCY_CHECK_RESULT(Device->Evict(NumObjectsToEvict, pEvictionList));
NumObjectsToEvict = 0;
}
if (NumObjectsToMakeResident)
{
UINT32 ObjectsMadeResident = 0;
UINT32 MakeResidentIndex = 0;
while (true)
{
ZeroMemory(&LocalMemory, sizeof(LocalMemory));
GetCurrentBudget(&LocalMemory, DXGI_MEMORY_SEGMENT_GROUP_LOCAL);
DXGI_QUERY_VIDEO_MEMORY_INFO NonLocalMemory;
ZeroMemory(&NonLocalMemory, sizeof(NonLocalMemory));
GetCurrentBudget(&NonLocalMemory, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL);
INT64 TotalUsage = LocalMemory.CurrentUsage + NonLocalMemory.CurrentUsage;
INT64 TotalBudget = LocalMemory.Budget + NonLocalMemory.Budget;
INT64 AvailableSpace = TotalBudget - TotalUsage;
UINT64 BatchSize = 0;
UINT32 NumObjectsInBatch = 0;
UINT32 BatchStart = MakeResidentIndex;
HRESULT hr = S_OK;
if (AvailableSpace > 0)
{
for (UINT32 i = MakeResidentIndex; i < NumObjectsToMakeResident; i++)
{
// If we try to make this object resident, will we go over budget?
if (BatchSize + pMakeResidentList[i].pManagedObject->Size > UINT64(AvailableSpace))
{
// Next time we will start here
MakeResidentIndex = i;
break;
}
else
{
BatchSize += pMakeResidentList[i].pManagedObject->Size;
NumObjectsInBatch++;
ObjectsMadeResident++;
pMakeResidentList[i].pUnderlying = pMakeResidentList[i].pManagedObject->pUnderlying;
}
}
if (Device3)
{
hr = Device3->EnqueueMakeResident(D3D12_RESIDENCY_FLAG_NONE,
NumObjectsInBatch,
&pMakeResidentList[BatchStart].pUnderlying,
AsyncThreadFence.pFence,
AsyncThreadFence.FenceValue + 1);
if (SUCCEEDED(hr))
{
AsyncThreadFence.Increment();
}
}
else
{
hr = Device->MakeResident(NumObjectsInBatch, &pMakeResidentList[BatchStart].pUnderlying);
}
if (SUCCEEDED(hr))
{
SizeToMakeResident -= BatchSize;
}
}
if (FAILED(hr) || ObjectsMadeResident != NumObjectsToMakeResident)
{
ManagedObject* pResidentHead = LRU.GetResidentListHead();
// Get the next sync point to wait for
FirstUncompletedSyncPoint = DequeueCompletedSyncPoints();
// If there is nothing to trim OR the only objects 'Resident' are the ones about to be used by this execute.
if (pResidentHead == nullptr ||
pResidentHead->LastGPUSyncPoint >= pWork->SyncPointGeneration ||
FirstUncompletedSyncPoint == nullptr)
{
// Make resident the rest of the objects as there is nothing left to trim
UINT32 NumObjects = NumObjectsToMakeResident - ObjectsMadeResident;
// Gather up the remaining underlying objects
for (UINT32 i = MakeResidentIndex; i < NumObjectsToMakeResident; i++)
{
pMakeResidentList[i].pUnderlying = pMakeResidentList[i].pManagedObject->pUnderlying;
}
if (Device3)
{
hr = Device3->EnqueueMakeResident(D3D12_RESIDENCY_FLAG_NONE,
NumObjects,
&pMakeResidentList[MakeResidentIndex].pUnderlying,
AsyncThreadFence.pFence,
AsyncThreadFence.FenceValue + 1);
if (SUCCEEDED(hr))
{
AsyncThreadFence.Increment();
}
}
else
{
hr = Device->MakeResident(NumObjects, &pMakeResidentList[MakeResidentIndex].pUnderlying);
}
if (FAILED(hr))
{
// TODO: What should we do if this fails? This is a catastrophic failure in which the app is trying to use more memory
// in 1 command list than can possibly be made resident by the system.
RESIDENCY_CHECK_RESULT(hr);
}
break;
}
UINT64 GenerationToWaitFor = FirstUncompletedSyncPoint->GenerationID;
// We can't wait for the sync-point that this work is intended for
if (GenerationToWaitFor == pWork->SyncPointGeneration)
{
RESIDENCY_CHECK(GenerationToWaitFor >= 0);
GenerationToWaitFor -= 1;
}
// Wait until the GPU is done
WaitForSyncPoint(GenerationToWaitFor);
// BEGIN EPIC MOD
LRU.TrimToSyncPointInclusive(TotalUsage + INT64(SizeToMakeResident),
LocalMemoryBudgetLimit == 0 ? 0 : TotalBudget,
pEvictionList, NumObjectsToEvict, GenerationToWaitFor);
// END EPIC MOD
RESIDENCY_CHECK_RESULT(Device->Evict(NumObjectsToEvict, pEvictionList));
}
else
{
// We made everything resident, mission accomplished
break;
}
}
}
delete[](pMakeResidentList);
delete[](pEvictionList);
}
if (!Device3)
{
// Tell the GPU that it's safe to execute since we made things resident
RESIDENCY_CHECK_RESULT(AsyncThreadFence.pFence->Signal(pWork->FenceValueToSignal));
}
delete(pWork->pMasterSet);
pWork->pMasterSet = nullptr;
}
// The Enqueue and Dequeue Async Work functions are threadsafe as there is only 1 producer and 1 consumer, if that changes
// Synchronisation will be required
HRESULT EnqueueAsyncWork(ResidencySet* pMasterSet, UINT64 FenceValueToSignal, UINT64 SyncPointGeneration)
{
// We can't get too far ahead of the worker thread otherwise huge hitches occur
while ((CurrentAsyncWorkloadTail - CurrentAsyncWorkloadHead) >= MaxSoftwareQueueLatency)
{
WaitForSingleObject(AsyncThreadWorkCompletionEvent, INFINITE);
}
RESIDENCY_CHECK(CurrentAsyncWorkloadTail >= CurrentAsyncWorkloadHead);
const SIZE_T currentIndex = CurrentAsyncWorkloadTail % AsyncWorkQueueSize;
AsyncWorkQueue[currentIndex].pMasterSet = pMasterSet;
AsyncWorkQueue[currentIndex].FenceValueToSignal = FenceValueToSignal;
AsyncWorkQueue[currentIndex].SyncPointGeneration = SyncPointGeneration;
CurrentAsyncWorkloadTail++;
if (SetEvent(AsyncWorkEvent) == false)
{
return HRESULT_FROM_WIN32(GetLastError());
}
return S_OK;
}
AsyncWorkload* DequeueAsyncWork()
{
if (CurrentAsyncWorkloadHead == CurrentAsyncWorkloadTail)
{
return nullptr;
}
const SIZE_T currentHead = CurrentAsyncWorkloadHead % AsyncWorkQueueSize;
AsyncWorkload* pWork = &AsyncWorkQueue[currentHead];
CurrentAsyncWorkloadHead++;
return pWork;
}
void GetCurrentBudget(DXGI_QUERY_VIDEO_MEMORY_INFO* InfoOut, DXGI_MEMORY_SEGMENT_GROUP Segment)
{
if (Adapter)
{
RESIDENCY_CHECK_RESULT(Adapter->QueryVideoMemoryInfo(NodeIndex, Segment, InfoOut));
}
#ifdef __ID3D12DeviceDownlevel_INTERFACE_DEFINED__
else if (DeviceDownlevel)
{
RESIDENCY_CHECK_RESULT(DeviceDownlevel->QueryVideoMemoryInfo(NodeIndex, Segment, InfoOut));
}
#endif
// BEGIN EPIC MOD
if (Segment == DXGI_MEMORY_SEGMENT_GROUP_LOCAL)
{
InfoOut->Budget = RESIDENCY_MIN(LocalMemoryBudgetLimit, InfoOut->Budget);
}
// END EPIC MOD
}
HRESULT EnqueueSyncPoint()
{
Internal::ScopedLock Lock(&AsyncWorkMutex);
Internal::DeviceWideSyncPoint* pPoint = Internal::DeviceWideSyncPoint::CreateSyncPoint(NumQueuesSeen, CurrentSyncPointGeneration);
if (pPoint == nullptr)
{
return E_OUTOFMEMORY;
}
UINT32 i = 0;
LIST_ENTRY* pFenceEntry = QueueFencesListHead.Flink;
// Record the current state of each queue we track into this sync point
while (pFenceEntry != &QueueFencesListHead)
{
Internal::Fence* pFence = CONTAINING_RECORD(pFenceEntry, Internal::Fence, ListEntry);
pFenceEntry = pFenceEntry->Flink;
pPoint->pQueueSyncPoints[i].pFence = pFence;
pPoint->pQueueSyncPoints[i].LastUsedValue = pFence->FenceValue - 1;//Minus one as we want the last submitted
i++;
}
Internal::InsertTailList(&InFlightSyncPointsHead, &pPoint->ListEntry);
return S_OK;
}
// Returns a pointer to the first synch point which is not completed
Internal::DeviceWideSyncPoint* DequeueCompletedSyncPoints()
{
Internal::ScopedLock Lock(&AsyncWorkMutex);
while (Internal::IsListEmpty(&InFlightSyncPointsHead) == false)
{
Internal::DeviceWideSyncPoint* pPoint =
CONTAINING_RECORD(InFlightSyncPointsHead.Flink, Internal::DeviceWideSyncPoint, ListEntry);
if (pPoint->IsCompleted())
{
Internal::RemoveHeadList(&InFlightSyncPointsHead);
delete pPoint;
}
else
{
return pPoint;
}
}
return nullptr;
}
void WaitForSyncPoint(UINT64 SyncPointID)
{
Internal::ScopedLock Lock(&AsyncWorkMutex);
LIST_ENTRY* pPointEntry = InFlightSyncPointsHead.Flink;
while (pPointEntry != &InFlightSyncPointsHead)
{
Internal::DeviceWideSyncPoint* pPoint =
CONTAINING_RECORD(InFlightSyncPointsHead.Flink, Internal::DeviceWideSyncPoint, ListEntry);
if (pPoint->GenerationID > SyncPointID)
{
// this point is already done
return;
}
else if (pPoint->GenerationID < SyncPointID)
{
// Keep popping off until we find the one to wait on
Internal::RemoveHeadList(&InFlightSyncPointsHead);
delete(pPoint);
}
else
{
pPoint->WaitForCompletion(CompletionEvent);
Internal::RemoveHeadList(&InFlightSyncPointsHead);
delete(pPoint);
return;
}
}
}
// Generate a result between the minimum period and the maximum period based on the current
// local memory pressure. I.e. when memory pressure is low, objects will persist longer before
// being evicted.
UINT64 GetCurrentEvictionGracePeriod(DXGI_QUERY_VIDEO_MEMORY_INFO* LocalMemoryState)
{
// 1 == full pressure, 0 == no pressure
double Pressure = (double(LocalMemoryState->CurrentUsage) / double(LocalMemoryState->Budget));
Pressure = RESIDENCY_MIN(Pressure, 1.0);
if (Pressure > cTrimPercentageMemoryUsageThreshold)
{
// Normalize the pressure for the range 0 to cTrimPercentageMemoryUsageThreshold
Pressure = (Pressure - cTrimPercentageMemoryUsageThreshold) / (1.0 - cTrimPercentageMemoryUsageThreshold);
// Linearly interpolate between the min period and the max period based on the pressure
return UINT64((MaxEvictionGracePeriodTicks - MinEvictionGracePeriodTicks) * (1.0 - Pressure)) + MinEvictionGracePeriodTicks;
}
else
{
// Essentially don't trim at all
return MAXUINT64;
}
}
LIST_ENTRY QueueFencesListHead;
UINT32 NumQueuesSeen;
Internal::Fence AsyncThreadFence;
LIST_ENTRY InFlightSyncPointsHead;
UINT64 CurrentSyncPointGeneration;
HANDLE CompletionEvent;
HANDLE AsyncThreadWorkCompletionEvent;
ID3D12Device* Device;
ID3D12Device3* Device3;
#ifdef __ID3D12DeviceDownlevel_INTERFACE_DEFINED__
ID3D12DeviceDownlevel* DeviceDownlevel;
#endif
// NOTE: This is an index not a mask. The majority of D3D12 uses bit masks to identify a GPU node whereas DXGI uses 0 based indices.
UINT NodeIndex;
IDXGIAdapter3* Adapter;
Internal::LRUCache LRU;
Internal::CriticalSection Mutex;
Internal::CriticalSection ExecutionCS;
const bool cStartEvicted;
const float cMinEvictionGracePeriod;
UINT64 MinEvictionGracePeriodTicks;
const float cMaxEvictionGracePeriod;
UINT64 MaxEvictionGracePeriodTicks;
// When the app is using more than this % of its budgeted local VidMem trimming will occur
// (valid between 0.0 - 1.0)
const float cTrimPercentageMemoryUsageThreshold;
UINT32 MaxSoftwareQueueLatency;
LUID ResidencyManagerUniqueID;
SyncManager* pSyncManager;
};
}
class ResidencyManager
{
public:
ResidencyManager() :
Manager(&SyncManager)
{
}
// NOTE: DeviceNodeIndex is an index not a mask. The majority of D3D12 uses bit masks to identify a GPU node whereas DXGI uses 0 based indices.
FORCEINLINE HRESULT Initialize(ID3D12Device* ParentDevice, UINT DeviceNodeIndex, IDXGIAdapter* ParentAdapter, UINT32 MaxLatency)
{
return Manager.Initialize(ParentDevice, DeviceNodeIndex, ParentAdapter, MaxLatency);
}
FORCEINLINE void Destroy()
{
Manager.Destroy();
}
FORCEINLINE void BeginTrackingObject(ManagedObject* pObject)
{
Manager.BeginTrackingObject(pObject);
}
FORCEINLINE void EndTrackingObject(ManagedObject* pObject)
{
Manager.EndTrackingObject(pObject);
}
HRESULT GetCurrentGPUSyncPoint(ID3D12CommandQueue* Queue, UINT64 *pCurrentGPUSyncPoint)
{
return Manager.GetCurrentGPUSyncPoint(Queue, pCurrentGPUSyncPoint);
}
// One residency set per command-list
FORCEINLINE HRESULT ExecuteCommandLists(ID3D12CommandQueue* Queue, ID3D12CommandList** CommandLists, ResidencySet** ResidencySets, UINT32 Count)
{
return Manager.ExecuteCommandLists(Queue, CommandLists, ResidencySets, Count);
}
// BEGIN EPIC MOD
HRESULT MakeResident(ID3D12CommandQueue* Queue, ResidencySet*&& MasterSet)
{
HRESULT hr = Manager.MakeResident(Queue, MasterSet);
MasterSet = nullptr; // Ownerhip is taken over by the manager, which will destroy the set later
return hr;
}
HRESULT SignalFence(ID3D12CommandQueue* Queue)
{
return Manager.SignalFence(Queue);
}
// END EPIC MOD
FORCEINLINE ResidencySet* CreateResidencySet()
{
ResidencySet* pSet = new ResidencySet();
if (pSet)
{
pSet->Initialize(&SyncManager);
}
return pSet;
}
FORCEINLINE void DestroyResidencySet(ResidencySet* pSet)
{
delete(pSet);
}
// BEGIN EPIC MOD
void SetLocalMemoryBudgetLimit(UINT64 InLocalMemoryBudgetLimit)
{
Manager.LocalMemoryBudgetLimit = InLocalMemoryBudgetLimit;
}
// END EPIC MOD
private:
Internal::ResidencyManagerInternal Manager;
Internal::SyncManager SyncManager;
};
};