1296 lines
49 KiB
HLSL
1296 lines
49 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
VirtualShadowMapPhysicalPageManagement.usf:
|
|
=============================================================================*/
|
|
|
|
#include "../Common.ush"
|
|
#include "../WaveOpUtil.ush"
|
|
#include "../ReductionCommon.ush"
|
|
#include "../GPUMessaging.ush"
|
|
#include "../ShaderPrint.ush"
|
|
#include "/Engine/Shared/VirtualShadowMapDefinitions.h"
|
|
#include "VirtualShadowMapProjectionStructs.ush"
|
|
#include "VirtualShadowMapProjectionCommon.ush"
|
|
#include "VirtualShadowMapPageAccessCommon.ush"
|
|
#include "VirtualShadowMapStats.ush"
|
|
#include "VirtualShadowMapPerPageDispatch.ush"
|
|
|
|
#ifndef HAS_CACHE_DATA
|
|
#define HAS_CACHE_DATA 1
|
|
#endif //HAS_CACHE_DATA
|
|
|
|
// Page flags generated by page allocation to indicate state to rendering passes (i.e., present / invalid)
|
|
Texture2D<uint> PageRequestFlags;
|
|
RWTexture2D<uint> OutPageFlags;
|
|
RWTexture2D<uint> OutPageTable;
|
|
RWStructuredBuffer<FPhysicalPageMetaData> OutPhysicalPageMetaData;
|
|
|
|
// A series of lists used to track various page states (free, used)
|
|
// Each list is MaxPhysicalPages + 1 uint counter
|
|
RWStructuredBuffer<int> OutPhysicalPageLists;
|
|
|
|
// Stores available pages (i.e. ones not used this frame) for allocation in LRU order
|
|
#define PHYSICAL_PAGE_LIST_LRU 0
|
|
// Packed available list
|
|
// Pages invalidated this frame will be added to the end. Allocations come from the end.
|
|
#define PHYSICAL_PAGE_LIST_AVAILABLE 1
|
|
// Stores invalidated/empty pages temporarily before they are re-added to the AVAILABLE list
|
|
#define PHYSICAL_PAGE_LIST_EMPTY 2
|
|
// Stores pages requested/used this frame, not available for allocation
|
|
#define PHYSICAL_PAGE_LIST_REQUESTED 3
|
|
// Number of page lists
|
|
#define PHYSICAL_PAGE_LIST_COUNT 4
|
|
|
|
int GetPhysicalPageListStart(int PageList)
|
|
{
|
|
return PageList * (VirtualShadowMap.MaxPhysicalPages + 1);
|
|
}
|
|
|
|
int GetPhysicalPageListItem(uint PageList, int Index)
|
|
{
|
|
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index];
|
|
}
|
|
|
|
void SetPhysicalPageListItem(uint PageList, int Index, int Value)
|
|
{
|
|
//check(Index < VirtualShadowMap.MaxPhysicalPages);
|
|
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index] = Value;
|
|
}
|
|
|
|
int GetPhysicalPageListCount(int PageList)
|
|
{
|
|
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages];
|
|
}
|
|
|
|
void SetPhysicalPageListCount(int PageList, int NewCount)
|
|
{
|
|
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages] = NewCount;
|
|
}
|
|
|
|
bool PushPhysicalPageList(uint PageList, int PhysicalPageIndex)
|
|
{
|
|
uint PageListStart = GetPhysicalPageListStart(PageList);
|
|
// NOTE: Counter is the final element of the list
|
|
int Offset = 0;
|
|
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], 1, Offset);
|
|
// We have to guard against overflow as it will overwrite the counter and potentially into other lists
|
|
if (Offset < VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
OutPhysicalPageLists[PageListStart + Offset] = PhysicalPageIndex;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Returns <0 if none available, otherwise returns the actual value
|
|
int PopPhysicalPageList(uint PageList)
|
|
{
|
|
uint PageListStart = GetPhysicalPageListStart(PageList);
|
|
int Offset = 0;
|
|
|
|
#if 1
|
|
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
|
|
#else
|
|
// Need negative numbers here...
|
|
InterlockedAdd(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
|
|
#endif
|
|
|
|
// We want the value *after* decrement in this case
|
|
--Offset;
|
|
return Offset < 0 ? INDEX_NONE : OutPhysicalPageLists[PageListStart + Offset];
|
|
}
|
|
|
|
StructuredBuffer<int> PrevPhysicalPageLists;
|
|
|
|
RWStructuredBuffer<uint4> OutUncachedPageRectBounds;
|
|
RWStructuredBuffer<uint4> OutAllocatedPageRectBounds;
|
|
uint NumPageRectsToClear;
|
|
|
|
// This is admitadly a weird fusion of several initializations but it is the first thing
|
|
// we run in a given analysis phase so it's more efficient to do it all here rather than
|
|
// have several small passes later.
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void InitPageRectBounds(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
// The X thread id maps to the range NumPageRectsToClear which is (GetNumFullShadowMaps() + GetNumSinglePageShadowMaps()) * FVirtualShadowMap::MaxMipLevels
|
|
// this avoids clearing unused slots.
|
|
if (Index.x < NumPageRectsToClear)
|
|
{
|
|
uint RectOffset = Index.x;
|
|
// The full shadow maps are offset to a distant part of the ID range
|
|
if (Index.x >= VirtualShadowMap.NumSinglePageShadowMaps * VSM_MAX_MIP_LEVELS )
|
|
{
|
|
RectOffset += VSM_MAX_SINGLE_PAGE_SHADOW_MAPS * VSM_MAX_MIP_LEVELS - VirtualShadowMap.NumSinglePageShadowMaps * VSM_MAX_MIP_LEVELS;
|
|
}
|
|
uint4 Empty = uint4(VSM_LEVEL0_DIM_PAGES_XY, VSM_LEVEL0_DIM_PAGES_XY, 0, 0);
|
|
OutUncachedPageRectBounds[RectOffset] = Empty;
|
|
OutAllocatedPageRectBounds[RectOffset] = Empty;
|
|
}
|
|
|
|
// Clear the various list counters
|
|
if (Index.x == 0)
|
|
{
|
|
// Thiese lists are going to start "full" before packing
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_LRU, VirtualShadowMap.MaxPhysicalPages);
|
|
// These start empty and are added to as elements are removed from the LRU one
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, 0);
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY, 0);
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED, 0);
|
|
}
|
|
}
|
|
|
|
|
|
// Mapping of previous frame/update data to current frame
|
|
StructuredBuffer<FNextVirtualShadowMapData> NextVirtualShadowMapData;
|
|
uint NextVirtualShadowMapDataCount;
|
|
|
|
// To propogate any invalidation flags to the physical page flags
|
|
Texture2D<uint> PrevPageRequestFlags;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdatePhysicalPageAddresses(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
// TODO: Make this a loose constant probably to remove dependency of this shader on VSM UB
|
|
// Still needs the VSM defines for IsVirtualShadowMapPageAddressValid addressing math though!
|
|
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Use identity mapping by default
|
|
int PhysicalPageIndex = Index.x;
|
|
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
|
|
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
FVSMPageOffset PrevGlobalPageOffset = FVSMPageOffset::Unpack(0u);
|
|
bool bKeepPage = false;
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
// Update virtual shadow map ID to the equivalent one this frame if present
|
|
// NOTE: We need a range check as we only add elements to this mapping if they exist this frame
|
|
FVirtualShadowMapHandle PrevVirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId);
|
|
if (PrevVirtualShadowMapHandle.IsValid() && PrevVirtualShadowMapHandle.GetDataIndex() < NextVirtualShadowMapDataCount)
|
|
{
|
|
PrevGlobalPageOffset = CalcPageOffset(PrevVirtualShadowMapHandle, PrevMetaData.MipLevel, PrevMetaData.PageAddress);
|
|
|
|
FNextVirtualShadowMapData NextData = NextVirtualShadowMapData[PrevVirtualShadowMapHandle.GetDataIndex()];
|
|
FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(NextData.NextVirtualShadowMapId);
|
|
|
|
// Check if it maps to a valid virtual shadow map this frame
|
|
if (VirtualShadowMapHandle.IsValid())
|
|
{
|
|
// Clipmap panning; zeroed otherwise so safe
|
|
int2 TestPageAddress = int2(PrevMetaData.PageAddress) + NextData.PageAddressOffset;
|
|
if (IsVirtualShadowMapPageAddressValid(TestPageAddress, PrevMetaData.MipLevel))
|
|
{
|
|
// Valid physical page in the cache!
|
|
// It may still be invalidated by flags or over-written by new requests this frame, but for now we will maintain it
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = uint2(TestPageAddress);
|
|
// No changes to other fields
|
|
bKeepPage = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bKeepPage)
|
|
{
|
|
#if HAS_CACHE_DATA
|
|
// Propogate any invalidation flags from the previous page requests to the physical page
|
|
const uint PrevFlags = PrevPageRequestFlags[PrevGlobalPageOffset.GetResourceAddress()];
|
|
const uint InvalidationFlags = PrevFlags & VSM_EXTENDED_FLAG_ANY_INVALIDATED;
|
|
if (InvalidationFlags != 0)
|
|
{
|
|
// Add them to any previous flags
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = PrevMetaData.Flags | InvalidationFlags;
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
// Only need to zero out flags for it to be considered invalid
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = 0;
|
|
}
|
|
}
|
|
|
|
int bDynamicPageInvalidation;
|
|
int bAllocateViaLRU;
|
|
int MaxPageAgeSinceLastRequest;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdatePhysicalPages(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
// Because of launch size rounding we might get here.
|
|
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// This is the index in the PhysicalPageList
|
|
const uint PhysicalPageListIndex = Index.x;
|
|
bool bRemovedPageFromList = false;
|
|
|
|
// Use identity mapping by default
|
|
int PhysicalPageIndex = PhysicalPageListIndex;
|
|
|
|
#if HAS_CACHE_DATA
|
|
if (bAllocateViaLRU)
|
|
{
|
|
// If available, use last frame's LRU ordering as the input here so we can maintain that order
|
|
// NOTE: These end up sorted into the PHYSICAL_PAGE_LIST_REQUESTED list at the end of the frame
|
|
// LastFrameLRUList[PhysicalPageListIndex]
|
|
const int PrevPageListStart = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_REQUESTED);
|
|
PhysicalPageIndex = PrevPhysicalPageLists[PrevPageListStart + PhysicalPageListIndex];
|
|
checkSlow(PhysicalPageIndex >= INDEX_NONE);
|
|
checkSlow(PhysicalPageIndex < VirtualShadowMap.MaxPhysicalPages);
|
|
}
|
|
#endif
|
|
|
|
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
|
|
|
|
// 1:1 read modify write is safe
|
|
uint NextPhysicalFlags = 0;
|
|
|
|
#if HAS_CACHE_DATA
|
|
{
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
uint MipLevel = PrevMetaData.MipLevel;
|
|
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
// Convenience
|
|
const FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId);
|
|
const uint2 PageAddress = PrevMetaData.PageAddress;
|
|
|
|
// Look up the request flags for this frame to see if this page was requested again
|
|
const FVSMPageOffset GlobalPageOffset = CalcPageOffset(VirtualShadowMapHandle, MipLevel, PageAddress);
|
|
const uint RequestFlags = PageRequestFlags[GlobalPageOffset.GetResourceAddress()];
|
|
const bool bRequestedThisFrame = RequestFlags != 0;
|
|
|
|
const int PhysicalPageRequestedAge = int(VirtualShadowMap.SceneFrameNumber - PrevMetaData.LastRequestedSceneFrameNumber);
|
|
|
|
// If the light is unreferenced we also allow its pages to live (unless reallocated) regardless of age for now
|
|
// since we won't be rendering into them so they don't do a lot of harm being present.
|
|
// TODO: Revisit this... probably make it just age-based now
|
|
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapHandle);
|
|
if (bRequestedThisFrame || Projection.bUnreferenced || PhysicalPageRequestedAge <= MaxPageAgeSinceLastRequest)
|
|
{
|
|
const uint PrevPhysicalFlags = PrevMetaData.Flags;
|
|
|
|
// Update the mapping data for any valid cached pages so we don't lose it
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
|
|
|
|
if (!bRequestedThisFrame || Projection.bUnreferenced)
|
|
{
|
|
// If the page is unrefereced (i.e. we are not going to render to it this frame) we want to leave the physical
|
|
// metadata alone, *specifically* the invalidation flags. Since an unreferenced page will not get
|
|
// rendered to this frame, we can't clear these flags and instead want to maintain them until a potential
|
|
// future frame when this page might be referenced again.
|
|
// Tag the page so we can skip it in rendering-related tasks like clearing and merging
|
|
NextPhysicalFlags = PrevPhysicalFlags | VSM_EXTENDED_FLAG_UNREFERENCED;
|
|
|
|
// NOTE: This should be unused during this render, but may be used by invalidation between frames/renders
|
|
// We only want to set ALLOCATED so that it gets picked up by invalidation, but not by
|
|
// any rendering this frame. Any invalidation flags already on the physical page remain there,
|
|
// so this is just for new ones generated this frame to ensure we invalidate any cached-but-currently-unused
|
|
// pages still in the pool
|
|
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = VSM_FLAG_ALLOCATED;
|
|
}
|
|
else
|
|
{
|
|
uint NextPageFlags = VSM_FLAG_ALLOCATED;
|
|
|
|
// Distant lights ignore invalidations as they are round-robin invalidated
|
|
if (bDynamicPageInvalidation && !VirtualShadowMapHandle.IsSinglePage())
|
|
{
|
|
if ((PrevPhysicalFlags & VSM_EXTENDED_FLAG_ANY_INVALIDATED) != 0)
|
|
{
|
|
if ((PrevPhysicalFlags & VSM_EXTENDED_FLAG_INVALIDATE_STATIC) == 0)
|
|
{
|
|
// ONLY dynamic is invalidated, static can remain cached
|
|
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
|
|
}
|
|
else
|
|
{
|
|
// Invalidate both
|
|
NextPageFlags |= VSM_FLAG_ANY_UNCACHED;
|
|
}
|
|
}
|
|
|
|
// Always invalidate dynamic when using receiver mask, as the page may be incomplete
|
|
if (Projection.bUseReceiverMask)
|
|
{
|
|
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
|
|
}
|
|
}
|
|
|
|
uint PhysicalPageDetailGeometryFlag = (PrevPhysicalFlags & VSM_FLAG_DETAIL_GEOMETRY);
|
|
if (bRequestedThisFrame)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
|
|
// Remove from LRU list and add to requested list
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
|
|
bRemovedPageFromList = true;
|
|
|
|
// If the detail geometry flag doesn't match the cached page we treat it as a full invalidation.
|
|
// TODO: This could potentially be a problem for interleaved multiview rendering;
|
|
// If the flag differs in the two views it will cause cache thrashing.
|
|
const uint RequestDetailGeometryFlag = (RequestFlags & VSM_FLAG_DETAIL_GEOMETRY);
|
|
if (RequestDetailGeometryFlag != PhysicalPageDetailGeometryFlag)
|
|
{
|
|
NextPageFlags |= (VSM_FLAG_STATIC_UNCACHED | VSM_FLAG_DYNAMIC_UNCACHED);
|
|
PhysicalPageDetailGeometryFlag = RequestDetailGeometryFlag;
|
|
}
|
|
|
|
// Only increment the stats for pages requested this render, otherwise it gets confusing
|
|
if (NextPageFlags & VSM_FLAG_STATIC_UNCACHED)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_STATIC_INVALIDATED_PAGES);
|
|
}
|
|
else
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_STATIC_CACHED_PAGES);
|
|
}
|
|
if (NextPageFlags & VSM_FLAG_DYNAMIC_UNCACHED)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_INVALIDATED_PAGES);
|
|
}
|
|
else
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_CACHED_PAGES);
|
|
}
|
|
}
|
|
NextPageFlags |= PhysicalPageDetailGeometryFlag;
|
|
|
|
const uint PhysicalFlags = (Projection.bUnCached ? VSM_EXTENDED_FLAG_VIEW_UNCACHED : 0U);
|
|
NextPhysicalFlags = NextPageFlags | PhysicalFlags;
|
|
|
|
// If the page is going to be fully cached, but the VSM_EXTENDED_FLAG_FORCE_CACHED flag is on,
|
|
// we want to do something special here. We want to consider this page for any rendering in case
|
|
// WPO distance disable has changed (which we want to start causing invalidations), but not actually
|
|
// invalidate or render anything into it. Currently we accomplish this by setitng the DYNAMIC_UNCACHED
|
|
// flag in the hierarchical page flags (WPO only ever gets rendered into dynamic cache -
|
|
// see ShouldCacheInstanceAsStatic), but NOT on the physical page (which would indicate a real invalidation).
|
|
// This can get significantly cleaned up if/when we free up an additional hierarchical page flag bit.
|
|
// We do NOT want these details showing up in debug visualizations or cache stats though
|
|
bool bPageValidForRendering = (NextPageFlags & VSM_FLAG_ANY_UNCACHED) != 0;
|
|
if (PrevPhysicalFlags & VSM_EXTENDED_FLAG_FORCE_CACHED)
|
|
{
|
|
// NOTE: WPO can only ever be dynamic cached
|
|
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
|
|
StatsBufferInterlockedInc(VSM_STAT_WPO_CONSIDERED_PAGES);
|
|
}
|
|
|
|
// Map the page to the physical page
|
|
// If we later allocate over top of this page (for one requested this frame), we will zero this out again. See AllocateNewPageMappings
|
|
OutPageTable[GlobalPageOffset.GetResourceAddress()] = ShadowEncodePageTable(VSMPhysicalIndexToPageAddress(PhysicalPageIndex), bPageValidForRendering);
|
|
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = NextPageFlags;
|
|
} // Unreferenced
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// If page is invalidated/empty, remove it from the LRU list and add it to the empty list
|
|
// It will be re-added after packing to the end of the AVAILABLE list
|
|
if (NextPhysicalFlags == 0)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_EMPTY_PAGES);
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_EMPTY, PhysicalPageIndex);
|
|
bRemovedPageFromList = true;
|
|
}
|
|
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = NextPhysicalFlags;
|
|
|
|
// Write out the LRU list while maintaining order, with anything we removed marked as INDEX_NONE
|
|
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, PhysicalPageListIndex, bRemovedPageFromList ? INDEX_NONE : PhysicalPageIndex);
|
|
}
|
|
|
|
void AllocateNewPageMappings(FVirtualShadowMapHandle VirtualShadowMapHandle, FVSMPageOffset GlobalPageOffset, uint MipLevel, uint2 PageAddress)
|
|
{
|
|
const uint RequestFlags = PageRequestFlags[GlobalPageOffset.GetResourceAddress()];
|
|
if (RequestFlags != 0)
|
|
{
|
|
// See if we already hooked this up to a mapped page
|
|
const uint PageFlags = (OutPageFlags[GlobalPageOffset.GetResourceAddress()] & VSM_PAGE_FLAGS_BITS_MASK);
|
|
|
|
if (PageFlags == 0u)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
|
|
|
|
int PhysicalPageIndex = PopPhysicalPageList(PHYSICAL_PAGE_LIST_AVAILABLE);
|
|
if (PhysicalPageIndex >= 0)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_ALLOCATED_NEW);
|
|
|
|
// Add back to the end of the requested list
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
|
|
|
|
uint2 PhysicalPageAddress = VSMPhysicalIndexToPageAddress(PhysicalPageIndex);
|
|
|
|
// FIRST, check if there's a valid page already mapped to this physical page
|
|
// If so, we must go back and clear out its page table entry before we reallocate this page
|
|
{
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
FVSMPageOffset PrevGlobalPageOffset = CalcPageOffset(FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId), PrevMetaData.MipLevel, PrevMetaData.PageAddress);
|
|
OutPageTable[PrevGlobalPageOffset.GetResourceAddress()] = 0;
|
|
OutPageFlags[PrevGlobalPageOffset.GetResourceAddress()] = 0;
|
|
}
|
|
}
|
|
|
|
uint RequestDetailGeometryFlag = RequestFlags & VSM_FLAG_DETAIL_GEOMETRY;
|
|
uint Flags = VSM_FLAG_ALLOCATED | VSM_FLAG_DYNAMIC_UNCACHED | VSM_FLAG_STATIC_UNCACHED | RequestDetailGeometryFlag;
|
|
|
|
// Mark this page as allocated and not cached (always valid for rendering)
|
|
OutPageTable[GlobalPageOffset.GetResourceAddress()] = ShadowEncodePageTable(PhysicalPageAddress, true);
|
|
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = Flags;
|
|
|
|
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapHandle);
|
|
const uint PhysicalFlags = (Projection.bUnCached ? VSM_EXTENDED_FLAG_VIEW_UNCACHED : 0U);
|
|
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = Flags | PhysicalFlags;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].MipLevel = MipLevel;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
|
|
}
|
|
else
|
|
{
|
|
// We end up here if we're out of physical pages, this means some parts get no physical backing provided.
|
|
// Post this error condition back to the host somehow!
|
|
// Probably want to know if we're getting close even.
|
|
//OutPageTable[GlobalPageOffset] = 0;
|
|
//OutPageFlags[GlobalPageOffset] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ClearPageTableCS
|
|
|
|
RWTexture2D<uint> OutDestBuffer;
|
|
uint ClearValue;
|
|
uint SampleStride;
|
|
|
|
#define HAS_MIP_LEVELS (NUM_MIP_LEVELS > 1)
|
|
|
|
#if HAS_MIP_LEVELS
|
|
RWTexture2D<uint> OutDestBufferMips_0;
|
|
RWTexture2D<uint> OutDestBufferMips_1;
|
|
RWTexture2D<uint> OutDestBufferMips_2;
|
|
RWTexture2D<uint> OutDestBufferMips_3;
|
|
RWTexture2D<uint> OutDestBufferMips_4;
|
|
RWTexture2D<uint> OutDestBufferMips_5;
|
|
RWTexture2D<uint> OutDestBufferMips_6;
|
|
|
|
void ClearMip(RWTexture2D<uint> OutDestBufferMip, uint HMipLevel, FVirtualSMLevelOffset LevelOffset, uint2 PageCoord, uint SampleStrideLocal, uint MipLevel)
|
|
{
|
|
uint LevelDim = CalcLevelDimsPages(MipLevel + HMipLevel) * SampleStrideLocal;
|
|
uint2 HMipOffset = (LevelOffset.LevelTexelOffset * SampleStrideLocal) >> HMipLevel;
|
|
if (all(PageCoord < LevelDim))
|
|
{
|
|
OutDestBufferMip[HMipOffset + PageCoord] = ClearValue;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
struct FClearPageTableWorker
|
|
{
|
|
void Run(FPerPageDispatchSetup Setup)
|
|
{
|
|
for (uint MipLevel = Setup.MipLevelStart; MipLevel < Setup.MipLevelEnd; ++MipLevel)
|
|
{
|
|
FVirtualSMLevelOffset LevelOffset = CalcPageTableLevelOffset(Setup.VirtualShadowMapHandle, MipLevel);
|
|
|
|
uint LoopEndXY = Setup.GetLoopEnd(MipLevel);
|
|
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
|
|
{
|
|
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
|
|
{
|
|
const FVSMPageOffset PageOffset = CalcPageOffset(LevelOffset, MipLevel, uint2(PageX, PageY));
|
|
BRANCH
|
|
if (SampleStride == 2u)
|
|
{
|
|
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(0u, 0u)] = ClearValue;
|
|
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(1u, 0u)] = ClearValue;
|
|
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(0u, 1u)] = ClearValue;
|
|
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(1u, 1u)] = ClearValue;
|
|
#if HAS_MIP_LEVELS
|
|
|
|
#define DO_MIP_LEVEL(_HLevel_) ClearMip(OutDestBufferMips_##_HLevel_,_HLevel_ + 1, LevelOffset, uint2(PageX, PageY), 2u, MipLevel)
|
|
DO_MIP_LEVEL(0);
|
|
DO_MIP_LEVEL(1);
|
|
DO_MIP_LEVEL(2);
|
|
DO_MIP_LEVEL(3);
|
|
DO_MIP_LEVEL(4);
|
|
DO_MIP_LEVEL(5);
|
|
// Note: the NUM_MIP_LEVELS includes the base level, which is not included in the UAV array
|
|
#if NUM_MIP_LEVELS > 7
|
|
DO_MIP_LEVEL(6);
|
|
#endif
|
|
#undef DO_MIP_LEVEL
|
|
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
OutDestBuffer[PageOffset.GetResourceAddress()] = ClearValue;
|
|
|
|
#if HAS_MIP_LEVELS
|
|
#define DO_MIP_LEVEL(_HLevel_) ClearMip(OutDestBufferMips_##_HLevel_,_HLevel_ + 1, LevelOffset, uint2(PageX, PageY), 1u, MipLevel)
|
|
DO_MIP_LEVEL(0);
|
|
DO_MIP_LEVEL(1);
|
|
DO_MIP_LEVEL(2);
|
|
DO_MIP_LEVEL(3);
|
|
DO_MIP_LEVEL(4);
|
|
DO_MIP_LEVEL(5);
|
|
// Note: the NUM_MIP_LEVELS includes the base level, which is not included in the UAV array
|
|
#if NUM_MIP_LEVELS > 7
|
|
DO_MIP_LEVEL(6);
|
|
#endif
|
|
#undef DO_MIP_LEVEL
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
*/
|
|
[numthreads(PER_PAGE_THREAD_GROUP_SIZE_XY, PER_PAGE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void ClearPageTableCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
FClearPageTableWorker ClearPageTableWorker;
|
|
FPerPageDispatchSetup Setup;
|
|
Setup.Execute(DispatchThreadId, ClearPageTableWorker);
|
|
}
|
|
|
|
#endif // ClearPageTableCS
|
|
|
|
#ifdef AllocateNewPageMappingsCS
|
|
|
|
struct FAllocateNewPageMappingsWorker
|
|
{
|
|
void Run(FPerPageDispatchSetup Setup)
|
|
{
|
|
for (uint MipLevel = Setup.MipLevelStart; MipLevel < Setup.MipLevelEnd; ++MipLevel)
|
|
{
|
|
uint LoopEndXY = Setup.GetLoopEnd(MipLevel);
|
|
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
|
|
{
|
|
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
|
|
{
|
|
const FVSMPageOffset PageOffset = CalcPageOffset(Setup.VirtualShadowMapHandle, MipLevel, uint2(PageX, PageY));
|
|
AllocateNewPageMappings(Setup.VirtualShadowMapHandle, PageOffset, MipLevel, uint2(PageX, PageY));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
*/
|
|
[numthreads(PER_PAGE_THREAD_GROUP_SIZE_XY, PER_PAGE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void AllocateNewPageMappingsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
FAllocateNewPageMappingsWorker AllocateNewPageMappingsWorker;
|
|
FPerPageDispatchSetup Setup;
|
|
Setup.Execute(DispatchThreadId, AllocateNewPageMappingsWorker);
|
|
}
|
|
|
|
#endif // AllocateNewPageMappingsCS
|
|
|
|
// NOTE: We only launch a single group here for now to avoid multi-pass so we really want it as large as possible
|
|
// Can optimize this later if needed for larger physical page counts
|
|
#define NUM_THREADS_PER_GROUP 1024
|
|
#include "../ThreadGroupPrefixSum.ush"
|
|
|
|
[numthreads(NUM_THREADS_PER_GROUP, 1, 1)]
|
|
void PackAvailablePages(uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
int TotalCount = 0;
|
|
|
|
// Must be a uniform loop
|
|
for (int GroupStart = 0; GroupStart < VirtualShadowMap.MaxPhysicalPages; GroupStart += NUM_THREADS_PER_GROUP)
|
|
{
|
|
int ListIndex = GroupStart + GroupIndex;
|
|
|
|
int PhysicalPageIndex = ListIndex < VirtualShadowMap.MaxPhysicalPages ?
|
|
GetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, ListIndex) :
|
|
INDEX_NONE;
|
|
|
|
bool bListItemValid = PhysicalPageIndex != INDEX_NONE;
|
|
|
|
int SumValue = bListItemValid ? 1 : 0;
|
|
// NOTE: Cannot be under any divergent branching!
|
|
int GroupCount = 0;
|
|
int Offset = ThreadGroupPrefixSum(SumValue, GroupIndex, GroupCount);
|
|
|
|
if (bListItemValid)
|
|
{
|
|
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount + Offset, PhysicalPageIndex);
|
|
}
|
|
TotalCount += GroupCount;
|
|
|
|
// This should already be accounted for internally by ThreadGroupPrefixSum, but putting one here
|
|
// to be absolutely sure.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
// Set total number
|
|
if (GroupIndex == 0)
|
|
{
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount);
|
|
}
|
|
}
|
|
|
|
#undef NUM_THREADS_PER_GROUP
|
|
|
|
uint bAppendEmptyToAvailable;
|
|
// If true, simply updates the counts instead of copying items
|
|
// This should be run with the same parameters right after the copy pass, with a single group
|
|
uint bUpdateCounts;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void AppendPhysicalPageLists(uint ThreadId : SV_DispatchThreadID)
|
|
{
|
|
// We only need two variants currently, EMPTY->AVAILABLE and AVAILABLE->REQUESTED
|
|
int InputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_EMPTY : PHYSICAL_PAGE_LIST_AVAILABLE;
|
|
int OutputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_AVAILABLE : PHYSICAL_PAGE_LIST_REQUESTED;
|
|
|
|
// NOTE: This needs to maintain order!
|
|
// It also needs to be robust against physical page pool overflows, ensuring that we never "lose" any
|
|
// items in the final LRU list for the next frame.
|
|
int InputCount = GetPhysicalPageListCount(InputList);
|
|
int OutputCount = GetPhysicalPageListCount(OutputList);
|
|
int CopyCount = max(0, min(InputCount, int(VirtualShadowMap.MaxPhysicalPages) - OutputCount));
|
|
|
|
if (bUpdateCounts)
|
|
{
|
|
// Update pass (after copy pass)
|
|
if (ThreadId == 0)
|
|
{
|
|
int NewOutputCount = OutputCount + CopyCount;
|
|
SetPhysicalPageListCount(OutputList, NewOutputCount);
|
|
SetPhysicalPageListCount(InputList, 0);
|
|
|
|
// The REQUESTED list needs to specifically end up with a single unique copy of each index as
|
|
// this becomes the LRU list for the next update. If we were to lose any indices or list entries
|
|
// then we would also (permanently) lose actual physical pages. Thus we assert that at least the
|
|
// list must end up as the right size after the final append!
|
|
if (!bAppendEmptyToAvailable)
|
|
{
|
|
/*
|
|
PLATFORM_ASSERT4(
|
|
NewOutputCount == VirtualShadowMap.MaxPhysicalPages,
|
|
0xCECC,
|
|
__LINE__,
|
|
InputCount,
|
|
OutputCount,
|
|
NewOutputCount);
|
|
*/
|
|
checkSlow(NewOutputCount == VirtualShadowMap.MaxPhysicalPages);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
// All pages should now be in PHYSICAL_PAGE_LIS_AVAILABLE or REQUESTED
|
|
int AvailableCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE);
|
|
int RequestedCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED);
|
|
int EmptyCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY);
|
|
int TotalPages = AvailableCount + RequestedCount;
|
|
|
|
PLATFORM_ASSERT4(
|
|
TotalPages == VirtualShadowMap.MaxPhysicalPages,
|
|
0xCECC,
|
|
__LINE__,
|
|
AvailableCount,
|
|
RequestedCount,
|
|
EmptyCount);
|
|
*/
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (ThreadId < CopyCount)
|
|
{
|
|
int InputItem = GetPhysicalPageListItem(InputList, ThreadId);
|
|
SetPhysicalPageListItem(OutputList, OutputCount + ThreadId, InputItem);
|
|
}
|
|
}
|
|
}
|
|
|
|
StructuredBuffer<FPhysicalPageMetaData> PhysicalPageMetaData;
|
|
RWTexture2DArray<uint> OutPhysicalPagePool;
|
|
|
|
// Helper function to merge static and dynamic depth.
|
|
void MergePhysicalPixel(uint2 PixelCoord)
|
|
{
|
|
// 1:1 pixels so this is safe RMW
|
|
OutPhysicalPagePool[uint3(PixelCoord, 0)] = max(
|
|
OutPhysicalPagePool[uint3(PixelCoord, 0)],
|
|
OutPhysicalPagePool[uint3(PixelCoord, GetVirtualShadowMapStaticArrayIndex())]);
|
|
}
|
|
|
|
// Log2 2D dimension of thread group size, 2^4 == 16,
|
|
#define LOG2_TILE_THREAD_GROUP_SIZE_XY 4u
|
|
#define TILE_THREAD_GROUP_SIZE_XY (1u << LOG2_TILE_THREAD_GROUP_SIZE_XY)
|
|
|
|
// Each thread takes 2x2 samples to work with, so tile size is 2x thread group size
|
|
#define LOG2_TILE_SIZE_XY (LOG2_TILE_THREAD_GROUP_SIZE_XY + 1u)
|
|
|
|
#if VSM_LOG2_PAGE_SIZE < LOG2_TILE_SIZE_XY
|
|
#error "VSM_LOG2_PAGE_SIZE must be larger than LOG2_TILE_SIZE, either increase one or reduce the other"
|
|
#endif
|
|
|
|
// Number of tiles (thread groups) in each dimension to cover the page
|
|
#define LOG2_TILES_PER_PAGE_XY ( VSM_LOG2_PAGE_SIZE - LOG2_TILE_SIZE_XY )
|
|
// Log2 1D tile count to cover the page LOG2_TILES_PER_PAGE_XY * LOG2_TILES_PER_PAGE_XY
|
|
#define LOG2_TILES_PER_PAGE_1D ( 2U * LOG2_TILES_PER_PAGE_XY )
|
|
// 1D tile count to cover the page
|
|
#define TILES_PER_PAGE_1D ( 1U << LOG2_TILES_PER_PAGE_1D )
|
|
|
|
#define TILES_PER_PAGE_XY_MASK ( ( 1U << LOG2_TILES_PER_PAGE_XY ) - 1U )
|
|
#define TILES_PER_PAGE_1D_MASK ( ( 1U << LOG2_TILES_PER_PAGE_1D ) - 1U )
|
|
|
|
RWBuffer<uint> OutInitializePagesIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesToInitialize;
|
|
|
|
void EmitPageToProcess(RWBuffer<uint> OutIndirectArgsBuffer, RWStructuredBuffer<uint> OutSelectedPhysicalIndexBuffer, uint PhysicalPageIndex)
|
|
{
|
|
int GroupCount = 0;
|
|
// Each page needs TILES_PER_PAGE_1D groups launched
|
|
WaveInterlockedAddScalar_(OutIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
|
|
OutSelectedPhysicalIndexBuffer[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
|
|
}
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesToInitializeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
bool bUnreferenced = (MetaData.Flags & VSM_EXTENDED_FLAG_UNREFERENCED) != 0;
|
|
bool bFullyCached = (MetaData.Flags & VSM_FLAG_ANY_UNCACHED) == 0;
|
|
bool bStaticUncached = (MetaData.Flags & VSM_FLAG_STATIC_UNCACHED) != 0;
|
|
bool bForceCached = (MetaData.Flags & VSM_EXTENDED_FLAG_FORCE_CACHED) != 0;
|
|
|
|
if ((MetaData.Flags & VSM_FLAG_ALLOCATED) == 0)
|
|
{
|
|
// Page not used, we're done
|
|
}
|
|
else if (bUnreferenced || bFullyCached || bForceCached)
|
|
{
|
|
// Page fully cached or unreferenced. Leave the data alone.
|
|
}
|
|
else
|
|
{
|
|
// At least one of the pages is uncached
|
|
// NOTE: Dynamic cached/static uncached is currently an invalid state
|
|
// Since we merge the static stuff over the dynamic stuff after rendering we can't
|
|
// actually maintain separate dynamic cached pages when "only" the (theoretically)
|
|
// static moved. Thus if not fully cached, we always regenerate the dynamic page.
|
|
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex);
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
|
|
|
|
if (bStaticUncached &&
|
|
(MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U)
|
|
{
|
|
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages);
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
|
|
}
|
|
}
|
|
}
|
|
|
|
uint3 GetTileOffset(uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
|
|
{
|
|
const uint PageInputIndex = GroupIndex >> LOG2_TILES_PER_PAGE_1D;
|
|
uint PageIndex = PageIndexBuffer[PageInputIndex];
|
|
int ArrayIndex = 0;
|
|
|
|
if (PageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
// Request to clear the static page
|
|
PageIndex -= VirtualShadowMap.MaxPhysicalPages;
|
|
ArrayIndex = 1;
|
|
}
|
|
|
|
OutMetaData = PhysicalPageMetaData[PageIndex];
|
|
|
|
// Each page has 1 << LOG2_TILES_PER_PAGE_XY groups (aka tiles) assigned to work on it.
|
|
const uint LocalTileIndex = GroupIndex & TILES_PER_PAGE_1D_MASK;
|
|
// wrap to 2D tile coord
|
|
const uint2 LocalTile = uint2(LocalTileIndex & TILES_PER_PAGE_XY_MASK, LocalTileIndex >> LOG2_TILES_PER_PAGE_XY);
|
|
|
|
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
|
|
// Pixel address of tile region for this thread group.
|
|
const uint2 TileOffset = (PhysPageAddress << uint2(VSM_LOG2_PAGE_SIZE, VSM_LOG2_PAGE_SIZE)) + (LocalTile << uint2(LOG2_TILE_SIZE_XY, LOG2_TILE_SIZE_XY));
|
|
|
|
return uint3(TileOffset, ArrayIndex);
|
|
}
|
|
|
|
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
|
|
{
|
|
// Pixel address of tile region for this thread group.
|
|
const uint3 TileOffset = GetTileOffset(GroupIndex, PageIndexBuffer, OutMetaData);
|
|
// Pixel address of 2x2 region to sample for this thread.
|
|
const uint2 BasePos = TileOffset.xy + (TileThreadID.xy << 1u);
|
|
|
|
return uint3(BasePos, TileOffset.z);
|
|
}
|
|
|
|
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer)
|
|
{
|
|
FPhysicalPageMetaData TmpMetaData;
|
|
return GetTileBasePos(TileThreadID, GroupIndex, PageIndexBuffer, TmpMetaData);
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesToInitialize;
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void InitializePhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
|
|
{
|
|
FPhysicalPageMetaData MetaData;
|
|
uint3 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToInitialize, MetaData);
|
|
bool bStaticCached = (MetaData.Flags & VSM_FLAG_STATIC_UNCACHED) == 0U;
|
|
|
|
if (bStaticCached && (MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U)
|
|
{
|
|
// Initialize from the static page data
|
|
checkSlow(BasePos.z == 0U);
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 0U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 0U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 1U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 1U, 1U)];
|
|
}
|
|
else
|
|
{
|
|
// Clear the page to zero
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = 0U;
|
|
}
|
|
}
|
|
|
|
RWBuffer<uint> OutMergePagesIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesToMerge;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesToMergeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
// An uncached view is always exclusively renders into the dynamic pages, and thus require no merging.
|
|
if ((MetaData.Flags & VSM_FLAG_ALLOCATED) != 0U &&
|
|
(MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U &&
|
|
(MetaData.Flags & VSM_EXTENDED_FLAG_DIRTY) != 0U &&
|
|
(MetaData.Flags & VSM_EXTENDED_FLAG_UNREFERENCED) == 0U)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_MERGE);
|
|
EmitPageToProcess(OutMergePagesIndirectArgsBuffer, OutPhysicalPagesToMerge, PhysicalPageIndex);
|
|
}
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesToMerge;
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void MergeStaticPhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
|
|
{
|
|
uint2 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToMerge).xy;
|
|
|
|
// 1:1 pixels so this is safe RMW
|
|
MergePhysicalPixel(BasePos + uint2(0U, 0U));
|
|
MergePhysicalPixel(BasePos + uint2(1U, 0U));
|
|
MergePhysicalPixel(BasePos + uint2(0U, 1U));
|
|
MergePhysicalPixel(BasePos + uint2(1U, 1U));
|
|
}
|
|
|
|
|
|
|
|
// Indirect HZB building:
|
|
RWStructuredBuffer<uint> DirtyPageFlagsInOut;
|
|
|
|
// Returns updated physical page flags
|
|
uint UpdateAndClearDirtyFlags(uint PhysicalPageIndex)
|
|
{
|
|
bool bPageDirty = DirtyPageFlagsInOut[PhysicalPageIndex] != 0U;
|
|
bool bInvalidatesDynamic = DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] != 0U;
|
|
bool bInvalidatesStatic = DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] != 0U;
|
|
bool bWPOAllowed = DirtyPageFlagsInOut[PhysicalPageIndex + 3U * VirtualShadowMap.MaxPhysicalPages] != 0U;
|
|
|
|
// clear the dirty/invalidation flags
|
|
DirtyPageFlagsInOut[PhysicalPageIndex] = 0U;
|
|
DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] = 0U;
|
|
DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] = 0U;
|
|
DirtyPageFlagsInOut[PhysicalPageIndex + 3U * VirtualShadowMap.MaxPhysicalPages] = 0U;
|
|
|
|
uint Flags = OutPhysicalPageMetaData[PhysicalPageIndex].Flags;
|
|
if (Flags != 0)
|
|
{
|
|
Flags |=
|
|
(bPageDirty ? VSM_EXTENDED_FLAG_DIRTY : 0U) |
|
|
(bInvalidatesStatic ? VSM_EXTENDED_FLAG_INVALIDATE_STATIC : 0U) |
|
|
(bInvalidatesDynamic ? VSM_EXTENDED_FLAG_INVALIDATE_DYNAMIC : 0U) |
|
|
(bWPOAllowed ? VSM_EXTENDED_FLAG_FORCE_CACHED : 0U);
|
|
|
|
// Update the metadata on the page
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = Flags;
|
|
}
|
|
|
|
return Flags;
|
|
}
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdateAndClearDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
FPhysicalPageMetaData MetaData;
|
|
UpdateAndClearDirtyFlags(PhysicalPageIndex);
|
|
}
|
|
|
|
RWBuffer<uint> OutPagesForHZBIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesForHZB;
|
|
uint bFirstBuildThisFrame;
|
|
uint bForceFullHZBUpdate;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesForHZBAndUpdateDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
uint PhysicalPageFlags = UpdateAndClearDirtyFlags(PhysicalPageIndex);
|
|
|
|
if ((PhysicalPageFlags & VSM_FLAG_ALLOCATED) != 0)
|
|
{
|
|
bool bRebuildHZB = false;
|
|
|
|
const bool bHasSeparateDynamicHZB = VirtualShadowMap.StaticHZBArrayIndex != 0;
|
|
|
|
const bool bPageDirty = (PhysicalPageFlags & VSM_EXTENDED_FLAG_DIRTY) != 0u;
|
|
const bool bStaticUncached = (PhysicalPageFlags & VSM_FLAG_STATIC_UNCACHED) != 0u;
|
|
const bool bUnreferenced = (PhysicalPageFlags & VSM_EXTENDED_FLAG_UNREFERENCED) != 0u;
|
|
const bool bNeedDynamicBuild = bHasSeparateDynamicHZB
|
|
&& (PhysicalPageFlags & VSM_FLAG_DYNAMIC_UNCACHED) != 0u;
|
|
|
|
// Skip it if it's not referenced; this is usually because we have already done
|
|
// the HZB rebuild but haven't yet cleared these flags, which happens the nexd time
|
|
// a page gets rendered.
|
|
// TODO: We should clear the relevant page flags immediately after doing HZB generation
|
|
bRebuildHZB = (bPageDirty || bStaticUncached || bNeedDynamicBuild) && !bUnreferenced;
|
|
|
|
if (bForceFullHZBUpdate || bRebuildHZB)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_HZB_PAGES_BUILT);
|
|
|
|
int GroupCount = 0;
|
|
// Each page needs TILES_PER_PAGE_1D groups launched
|
|
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
|
|
OutPhysicalPagesForHZB[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
|
|
|
|
// Each top-reduction needs only one group launched
|
|
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0 + 4], 1U, GroupCount);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
SamplerState PhysicalPagePoolSampler;
|
|
Texture2DArray<uint> PhysicalPagePool;
|
|
|
|
float4 Gather4VisZ(uint2 PixelCoord, uint ArrayIndex)
|
|
{
|
|
#if COMPILER_SUPPORTS_GATHER_UINT
|
|
// Offset to 2x2 footprint center and scale to UV space
|
|
float2 UV = float2(PixelCoord + uint2(1U, 1U)) * VirtualShadowMap.RecPhysicalPoolSize.xy;
|
|
return asfloat(PhysicalPagePool.Gather(PhysicalPagePoolSampler, float3(UV, ArrayIndex), 0));
|
|
#else
|
|
uint4 PixelRect = uint4(PixelCoord.xy, PixelCoord.xy + uint2(1U, 1U));
|
|
uint4 UintDepths = uint4(
|
|
PhysicalPagePool[uint3(PixelRect.xw, ArrayIndex)].r, // (-, +)
|
|
PhysicalPagePool[uint3(PixelRect.zw, ArrayIndex)].r, // (+, +)
|
|
PhysicalPagePool[uint3(PixelRect.zy, ArrayIndex)].r, // (+, -)
|
|
PhysicalPagePool[uint3(PixelRect.xy, ArrayIndex)].r // (-, -)
|
|
);
|
|
return asfloat(UintDepths);
|
|
#endif
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesForHzb;
|
|
// out input output
|
|
RWTexture2DArray<float> FurthestHZBArrayOutput_0; // 64 // 1 Group: 32 (16 threads x2) 16
|
|
RWTexture2DArray<float> FurthestHZBArrayOutput_1; // 32 // 1 16 8
|
|
RWTexture2DArray<float> FurthestHZBArrayOutput_2; // 16 8 4
|
|
RWTexture2DArray<float> FurthestHZBArrayOutput_3; // 8 4 2
|
|
RWTexture2DArray<float> FurthestHZBArrayOutput_4; // 4 2 1
|
|
|
|
groupshared float SharedMinDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
|
|
groupshared float SharedMaxDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
|
|
|
|
#define DIM_FURTHEST 1
|
|
#define DIM_CLOSEST 0
|
|
|
|
void OutputMipLevel(uint MipLevel, uint2 OutputPixelPos, int ArrayIndex, float FurthestDeviceZ, float ClosestDeviceZ)
|
|
{
|
|
#if DIM_FURTHEST
|
|
#define COND_OUTPUT_LEVEL(_level_) \
|
|
if (MipLevel == _level_) \
|
|
{ \
|
|
FurthestHZBArrayOutput_##_level_[uint3(OutputPixelPos, ArrayIndex)] = FurthestDeviceZ; \
|
|
return; \
|
|
}
|
|
#endif
|
|
#if DIM_CLOSEST
|
|
ClosestHZBOutput_1[uint3(OutputPixelPos, ArrayIndex)] = ClosestDeviceZ;
|
|
#endif
|
|
|
|
COND_OUTPUT_LEVEL(1)
|
|
COND_OUTPUT_LEVEL(2)
|
|
COND_OUTPUT_LEVEL(3)
|
|
COND_OUTPUT_LEVEL(4)
|
|
|
|
#undef COND_OUTPUT_LEVEL
|
|
}
|
|
|
|
void BuildHZBPerPage(uint2 SrcPos, uint GroupThreadIndex, uint HZBArrayIndex, uint SrcArrayIndex, inout float4 InOutDeviceZ)
|
|
{
|
|
// Sample 2x2 footprint - thread group covers 32x32 area
|
|
// Merge with static (represented in the InOutDeviceZ)
|
|
float4 DeviceZ = max(InOutDeviceZ, Gather4VisZ(SrcPos, SrcArrayIndex));
|
|
// return the merged result
|
|
InOutDeviceZ = DeviceZ;
|
|
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
|
|
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
|
|
|
|
// Broadcast to all threads (16x16).
|
|
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
|
|
// Write base HZB level (half physical page size, e.g., 64x64)
|
|
uint2 OutPixelPos = SrcPos >> 1U;
|
|
FurthestHZBArrayOutput_0[uint3(OutPixelPos, HZBArrayIndex)] = MinDeviceZ;
|
|
|
|
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
const uint LaneCount = WaveGetLaneCount();
|
|
#else
|
|
// Actual wave size is unknown, assume the worst
|
|
const uint LaneCount = 0u;
|
|
#endif
|
|
|
|
// Build next 4 levels: 32, 16, 8, 4
|
|
UNROLL
|
|
for (uint MipLevel = 1U; MipLevel < LOG2_TILE_SIZE_XY; ++MipLevel)
|
|
{
|
|
// 8x8, 4x4, 2x2, 1x1
|
|
const uint OutTileDim = uint(TILE_THREAD_GROUP_SIZE_XY) >> MipLevel;
|
|
const uint ReduceBankSize = OutTileDim * OutTileDim;
|
|
|
|
// More waves than one wrote to LDS, need to sync.
|
|
if ((ReduceBankSize << 2u) > LaneCount)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
BRANCH
|
|
if (GroupThreadIndex < ReduceBankSize)
|
|
{
|
|
float4 ParentMinDeviceZ;
|
|
//float4 ParentMaxDeviceZ;
|
|
ParentMinDeviceZ[0] = MinDeviceZ;
|
|
//ParentMaxDeviceZ[0] = MaxDeviceZ;
|
|
|
|
UNROLL
|
|
for (uint i = 1; i < 4; i++)
|
|
{
|
|
uint LDSIndex = GroupThreadIndex + i * ReduceBankSize;
|
|
ParentMinDeviceZ[i] = SharedMinDeviceZ[LDSIndex];
|
|
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
|
|
}
|
|
|
|
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
|
|
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
|
|
|
|
OutPixelPos = OutPixelPos >> 1;
|
|
OutputMipLevel(MipLevel, OutPixelPos, HZBArrayIndex, MinDeviceZ, MaxDeviceZ);
|
|
|
|
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
|
|
//SharedMaxDeviceZ[GroupThreadIndex] = MaxDeviceZ;
|
|
}
|
|
}
|
|
}
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void BuildHZBPerPageCS(uint GroupThreadIndex : SV_GroupIndex, uint GroupIndex : SV_GroupID)
|
|
{
|
|
FPhysicalPageMetaData MetaData;
|
|
uint2 SrcTileOffset = GetTileOffset(GroupIndex, PhysicalPagesForHzb, MetaData).xy;
|
|
uint2 RemappedGroupThreadIndex = InitialTilePixelPositionForReduction2x2(LOG2_TILE_SIZE_XY - 1U, GroupThreadIndex);
|
|
uint2 SrcPos = SrcTileOffset + (RemappedGroupThreadIndex << uint2(1U, 1U));
|
|
|
|
// 1. build the static HZB slice (src slice 1 if enabled)
|
|
uint FirstSrcArrayIndex = GetVirtualShadowMapStaticArrayIndex();
|
|
uint FirstHZBArrayIndex = VirtualShadowMap.StaticHZBArrayIndex;
|
|
|
|
bool bViewUncached = (MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) != 0U;
|
|
// uncachable views always draw to the dynamic slice (slice 0), and thus there is no reason to build for the static slices
|
|
if (bViewUncached)
|
|
{
|
|
FirstSrcArrayIndex = 0u;
|
|
FirstHZBArrayIndex = 0u;
|
|
}
|
|
// 1. build for the static pages (or only if there is only one) & keep the 2x2 device Z to pass to the dynamic build (merge depths in case of both being rebuilt)
|
|
float4 StaticDeviceZ = (float4)0.0f;
|
|
BuildHZBPerPage(SrcPos, GroupThreadIndex, FirstHZBArrayIndex, FirstSrcArrayIndex, StaticDeviceZ);
|
|
|
|
// If we have not built the 0th slice, we need to do that also (it must then be the dynamic things)
|
|
// This also covers the case where the HZB only has one slice (and we thus do not build a dynamic one at all).
|
|
if (FirstHZBArrayIndex > 0u)
|
|
{
|
|
BuildHZBPerPage(SrcPos, GroupThreadIndex, 0u, 0u, StaticDeviceZ);
|
|
}
|
|
}
|
|
|
|
float4 Gather4(Texture2DArray Texture, SamplerState TextureSampler, uint2 SrcPos, uint ArrayIndex, float2 InvSize)
|
|
{
|
|
float2 SrcUV = float2(SrcPos) * InvSize;
|
|
return Texture.GatherRed(TextureSampler, float3(SrcUV, ArrayIndex), 0);
|
|
}
|
|
|
|
Texture2DArray ParentTextureArrayMip;
|
|
SamplerState ParentTextureMipSampler;
|
|
|
|
float2 InvHzbInputSize;
|
|
|
|
#define TOP_MIP_TILE_SIZE_XY 4
|
|
// Each fetches 2x2 using gather
|
|
#define TOP_MIP_TILE_THREAD_GROUP_SIZE_XY (TOP_MIP_TILE_SIZE_XY/2)
|
|
|
|
void BuildHZBPerPageTop(uint2 SrcPos, uint2 GroupThreadId, uint ArrayIndex)
|
|
{
|
|
// Sample 2x2 footprint - thread group covers 32x32 area
|
|
float4 DeviceZ = Gather4(ParentTextureArrayMip, ParentTextureMipSampler, SrcPos + uint2(1U, 1U), ArrayIndex, InvHzbInputSize);
|
|
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
|
|
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
|
|
|
|
// Broadcast to all threads.
|
|
SharedMinDeviceZ[GroupThreadId.y * TOP_MIP_TILE_THREAD_GROUP_SIZE_XY + GroupThreadId.x] = MinDeviceZ;
|
|
// Write first HZB output level (half size)
|
|
uint2 OutPixelPos = SrcPos >> 1U;
|
|
FurthestHZBArrayOutput_0[uint3(OutPixelPos, ArrayIndex)] = MinDeviceZ;
|
|
|
|
// Build last level
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
BRANCH
|
|
if (all(GroupThreadId.xy == uint2(0U, 0U)))
|
|
{
|
|
float4 ParentMinDeviceZ;
|
|
//float4 ParentMaxDeviceZ;
|
|
ParentMinDeviceZ[0] = MinDeviceZ;
|
|
//ParentMaxDeviceZ[0] = MaxDeviceZ;
|
|
|
|
UNROLL
|
|
for (uint Index = 1; Index < 4; ++Index)
|
|
{
|
|
ParentMinDeviceZ[Index] = SharedMinDeviceZ[Index];
|
|
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
|
|
}
|
|
|
|
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
|
|
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
|
|
|
|
OutPixelPos = OutPixelPos >> 1;
|
|
FurthestHZBArrayOutput_1[uint3(OutPixelPos, ArrayIndex)] = MinDeviceZ;
|
|
}
|
|
}
|
|
|
|
[numthreads(TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void BuildHZBPerPageTopCS(uint2 GroupThreadId : SV_GroupThreadID, uint PageInputIndex : SV_GroupID)
|
|
{
|
|
const uint PageIndex = PhysicalPagesForHzb[PageInputIndex];
|
|
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
|
|
|
|
// Pixel address of tile region for this thread group.
|
|
const uint2 SrcTileOffset = PhysPageAddress * uint2(TOP_MIP_TILE_SIZE_XY, TOP_MIP_TILE_SIZE_XY);
|
|
|
|
uint2 SrcPos = SrcTileOffset + (GroupThreadId << uint2(1U, 1U));
|
|
BuildHZBPerPageTop(SrcPos, GroupThreadId, 0u);
|
|
|
|
const bool bHasSeparateDynamicHZB = VirtualShadowMap.StaticHZBArrayIndex != 0;
|
|
if (bHasSeparateDynamicHZB)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
BuildHZBPerPageTop(SrcPos, GroupThreadId, 1u);
|
|
}
|
|
}
|
|
|
|
uint StatusMessageId;
|
|
StructuredBuffer<int> PhysicalPageLists;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void FeedbackStatusCS()
|
|
{
|
|
FGPUMessageWriter Mw = GPUMessageBegin(StatusMessageId, 3U);
|
|
|
|
GPUMessageWriteItem(Mw, VSM_STATUS_MSG_PAGE_MANAGEMENT);
|
|
|
|
// Write out how many pages are still available
|
|
int CountIndex = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_AVAILABLE) + VirtualShadowMap.MaxPhysicalPages;
|
|
GPUMessageWriteItem(Mw, PhysicalPageLists[CountIndex]);
|
|
|
|
// Write out the resolution lod bias from this frame
|
|
GPUMessageWriteItem(Mw, VirtualShadowMap.GlobalResolutionLodBias);
|
|
}
|
|
|
|
|
|
|
|
int PageListStatsRow;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void LogPageListStatsCS()
|
|
{
|
|
float TopMargin = 0.5f;
|
|
float ItemX = 0.05f;
|
|
FShaderPrintContext Ctx = InitShaderPrintContext(true, float2(ItemX, TopMargin));
|
|
|
|
Ctx.Pos.y += PageListStatsRow * 0.02f;
|
|
|
|
Print(Ctx, GetPhysicalPageListCount(0));
|
|
for (int i = 1; i < PHYSICAL_PAGE_LIST_COUNT; ++i)
|
|
{
|
|
Print(Ctx, TEXT(", "));
|
|
Print(Ctx, GetPhysicalPageListCount(i));
|
|
}
|
|
}
|