Files
UnrealEngine/Engine/Shaders/Private/VirtualShadowMaps/VirtualShadowMapPhysicalPageManagement.usf
2025-05-18 13:04:45 +08:00

1296 lines
49 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VirtualShadowMapPhysicalPageManagement.usf:
=============================================================================*/
#include "../Common.ush"
#include "../WaveOpUtil.ush"
#include "../ReductionCommon.ush"
#include "../GPUMessaging.ush"
#include "../ShaderPrint.ush"
#include "/Engine/Shared/VirtualShadowMapDefinitions.h"
#include "VirtualShadowMapProjectionStructs.ush"
#include "VirtualShadowMapProjectionCommon.ush"
#include "VirtualShadowMapPageAccessCommon.ush"
#include "VirtualShadowMapStats.ush"
#include "VirtualShadowMapPerPageDispatch.ush"
#ifndef HAS_CACHE_DATA
#define HAS_CACHE_DATA 1
#endif //HAS_CACHE_DATA
// Page flags generated by page allocation to indicate state to rendering passes (i.e., present / invalid)
Texture2D<uint> PageRequestFlags;
RWTexture2D<uint> OutPageFlags;
RWTexture2D<uint> OutPageTable;
RWStructuredBuffer<FPhysicalPageMetaData> OutPhysicalPageMetaData;
// A series of lists used to track various page states (free, used)
// Each list is MaxPhysicalPages + 1 uint counter
RWStructuredBuffer<int> OutPhysicalPageLists;
// Stores available pages (i.e. ones not used this frame) for allocation in LRU order
#define PHYSICAL_PAGE_LIST_LRU 0
// Packed available list
// Pages invalidated this frame will be added to the end. Allocations come from the end.
#define PHYSICAL_PAGE_LIST_AVAILABLE 1
// Stores invalidated/empty pages temporarily before they are re-added to the AVAILABLE list
#define PHYSICAL_PAGE_LIST_EMPTY 2
// Stores pages requested/used this frame, not available for allocation
#define PHYSICAL_PAGE_LIST_REQUESTED 3
// Number of page lists
#define PHYSICAL_PAGE_LIST_COUNT 4
int GetPhysicalPageListStart(int PageList)
{
return PageList * (VirtualShadowMap.MaxPhysicalPages + 1);
}
int GetPhysicalPageListItem(uint PageList, int Index)
{
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index];
}
void SetPhysicalPageListItem(uint PageList, int Index, int Value)
{
//check(Index < VirtualShadowMap.MaxPhysicalPages);
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index] = Value;
}
int GetPhysicalPageListCount(int PageList)
{
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages];
}
void SetPhysicalPageListCount(int PageList, int NewCount)
{
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages] = NewCount;
}
bool PushPhysicalPageList(uint PageList, int PhysicalPageIndex)
{
uint PageListStart = GetPhysicalPageListStart(PageList);
// NOTE: Counter is the final element of the list
int Offset = 0;
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], 1, Offset);
// We have to guard against overflow as it will overwrite the counter and potentially into other lists
if (Offset < VirtualShadowMap.MaxPhysicalPages)
{
OutPhysicalPageLists[PageListStart + Offset] = PhysicalPageIndex;
return true;
}
else
{
return false;
}
}
// Returns <0 if none available, otherwise returns the actual value
int PopPhysicalPageList(uint PageList)
{
uint PageListStart = GetPhysicalPageListStart(PageList);
int Offset = 0;
#if 1
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
#else
// Need negative numbers here...
InterlockedAdd(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
#endif
// We want the value *after* decrement in this case
--Offset;
return Offset < 0 ? INDEX_NONE : OutPhysicalPageLists[PageListStart + Offset];
}
StructuredBuffer<int> PrevPhysicalPageLists;
RWStructuredBuffer<uint4> OutUncachedPageRectBounds;
RWStructuredBuffer<uint4> OutAllocatedPageRectBounds;
uint NumPageRectsToClear;
// This is admitadly a weird fusion of several initializations but it is the first thing
// we run in a given analysis phase so it's more efficient to do it all here rather than
// have several small passes later.
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void InitPageRectBounds(uint3 Index : SV_DispatchThreadID)
{
// The X thread id maps to the range NumPageRectsToClear which is (GetNumFullShadowMaps() + GetNumSinglePageShadowMaps()) * FVirtualShadowMap::MaxMipLevels
// this avoids clearing unused slots.
if (Index.x < NumPageRectsToClear)
{
uint RectOffset = Index.x;
// The full shadow maps are offset to a distant part of the ID range
if (Index.x >= VirtualShadowMap.NumSinglePageShadowMaps * VSM_MAX_MIP_LEVELS )
{
RectOffset += VSM_MAX_SINGLE_PAGE_SHADOW_MAPS * VSM_MAX_MIP_LEVELS - VirtualShadowMap.NumSinglePageShadowMaps * VSM_MAX_MIP_LEVELS;
}
uint4 Empty = uint4(VSM_LEVEL0_DIM_PAGES_XY, VSM_LEVEL0_DIM_PAGES_XY, 0, 0);
OutUncachedPageRectBounds[RectOffset] = Empty;
OutAllocatedPageRectBounds[RectOffset] = Empty;
}
// Clear the various list counters
if (Index.x == 0)
{
// Thiese lists are going to start "full" before packing
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_LRU, VirtualShadowMap.MaxPhysicalPages);
// These start empty and are added to as elements are removed from the LRU one
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, 0);
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY, 0);
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED, 0);
}
}
// Mapping of previous frame/update data to current frame
StructuredBuffer<FNextVirtualShadowMapData> NextVirtualShadowMapData;
uint NextVirtualShadowMapDataCount;
// To propogate any invalidation flags to the physical page flags
Texture2D<uint> PrevPageRequestFlags;
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void UpdatePhysicalPageAddresses(uint3 Index : SV_DispatchThreadID)
{
// TODO: Make this a loose constant probably to remove dependency of this shader on VSM UB
// Still needs the VSM defines for IsVirtualShadowMapPageAddressValid addressing math though!
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
// Use identity mapping by default
int PhysicalPageIndex = Index.x;
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
FVSMPageOffset PrevGlobalPageOffset = FVSMPageOffset::Unpack(0u);
bool bKeepPage = false;
if (PrevMetaData.Flags != 0)
{
// Update virtual shadow map ID to the equivalent one this frame if present
// NOTE: We need a range check as we only add elements to this mapping if they exist this frame
FVirtualShadowMapHandle PrevVirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId);
if (PrevVirtualShadowMapHandle.IsValid() && PrevVirtualShadowMapHandle.GetDataIndex() < NextVirtualShadowMapDataCount)
{
PrevGlobalPageOffset = CalcPageOffset(PrevVirtualShadowMapHandle, PrevMetaData.MipLevel, PrevMetaData.PageAddress);
FNextVirtualShadowMapData NextData = NextVirtualShadowMapData[PrevVirtualShadowMapHandle.GetDataIndex()];
FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(NextData.NextVirtualShadowMapId);
// Check if it maps to a valid virtual shadow map this frame
if (VirtualShadowMapHandle.IsValid())
{
// Clipmap panning; zeroed otherwise so safe
int2 TestPageAddress = int2(PrevMetaData.PageAddress) + NextData.PageAddressOffset;
if (IsVirtualShadowMapPageAddressValid(TestPageAddress, PrevMetaData.MipLevel))
{
// Valid physical page in the cache!
// It may still be invalidated by flags or over-written by new requests this frame, but for now we will maintain it
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = uint2(TestPageAddress);
// No changes to other fields
bKeepPage = true;
}
}
}
}
if (bKeepPage)
{
#if HAS_CACHE_DATA
// Propogate any invalidation flags from the previous page requests to the physical page
const uint PrevFlags = PrevPageRequestFlags[PrevGlobalPageOffset.GetResourceAddress()];
const uint InvalidationFlags = PrevFlags & VSM_EXTENDED_FLAG_ANY_INVALIDATED;
if (InvalidationFlags != 0)
{
// Add them to any previous flags
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = PrevMetaData.Flags | InvalidationFlags;
}
#endif
}
else
{
// Only need to zero out flags for it to be considered invalid
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = 0;
}
}
int bDynamicPageInvalidation;
int bAllocateViaLRU;
int MaxPageAgeSinceLastRequest;
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void UpdatePhysicalPages(uint3 Index : SV_DispatchThreadID)
{
// Because of launch size rounding we might get here.
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
// This is the index in the PhysicalPageList
const uint PhysicalPageListIndex = Index.x;
bool bRemovedPageFromList = false;
// Use identity mapping by default
int PhysicalPageIndex = PhysicalPageListIndex;
#if HAS_CACHE_DATA
if (bAllocateViaLRU)
{
// If available, use last frame's LRU ordering as the input here so we can maintain that order
// NOTE: These end up sorted into the PHYSICAL_PAGE_LIST_REQUESTED list at the end of the frame
// LastFrameLRUList[PhysicalPageListIndex]
const int PrevPageListStart = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_REQUESTED);
PhysicalPageIndex = PrevPhysicalPageLists[PrevPageListStart + PhysicalPageListIndex];
checkSlow(PhysicalPageIndex >= INDEX_NONE);
checkSlow(PhysicalPageIndex < VirtualShadowMap.MaxPhysicalPages);
}
#endif
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
// 1:1 read modify write is safe
uint NextPhysicalFlags = 0;
#if HAS_CACHE_DATA
{
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
uint MipLevel = PrevMetaData.MipLevel;
if (PrevMetaData.Flags != 0)
{
// Convenience
const FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId);
const uint2 PageAddress = PrevMetaData.PageAddress;
// Look up the request flags for this frame to see if this page was requested again
const FVSMPageOffset GlobalPageOffset = CalcPageOffset(VirtualShadowMapHandle, MipLevel, PageAddress);
const uint RequestFlags = PageRequestFlags[GlobalPageOffset.GetResourceAddress()];
const bool bRequestedThisFrame = RequestFlags != 0;
const int PhysicalPageRequestedAge = int(VirtualShadowMap.SceneFrameNumber - PrevMetaData.LastRequestedSceneFrameNumber);
// If the light is unreferenced we also allow its pages to live (unless reallocated) regardless of age for now
// since we won't be rendering into them so they don't do a lot of harm being present.
// TODO: Revisit this... probably make it just age-based now
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapHandle);
if (bRequestedThisFrame || Projection.bUnreferenced || PhysicalPageRequestedAge <= MaxPageAgeSinceLastRequest)
{
const uint PrevPhysicalFlags = PrevMetaData.Flags;
// Update the mapping data for any valid cached pages so we don't lose it
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
if (!bRequestedThisFrame || Projection.bUnreferenced)
{
// If the page is unrefereced (i.e. we are not going to render to it this frame) we want to leave the physical
// metadata alone, *specifically* the invalidation flags. Since an unreferenced page will not get
// rendered to this frame, we can't clear these flags and instead want to maintain them until a potential
// future frame when this page might be referenced again.
// Tag the page so we can skip it in rendering-related tasks like clearing and merging
NextPhysicalFlags = PrevPhysicalFlags | VSM_EXTENDED_FLAG_UNREFERENCED;
// NOTE: This should be unused during this render, but may be used by invalidation between frames/renders
// We only want to set ALLOCATED so that it gets picked up by invalidation, but not by
// any rendering this frame. Any invalidation flags already on the physical page remain there,
// so this is just for new ones generated this frame to ensure we invalidate any cached-but-currently-unused
// pages still in the pool
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = VSM_FLAG_ALLOCATED;
}
else
{
uint NextPageFlags = VSM_FLAG_ALLOCATED;
// Distant lights ignore invalidations as they are round-robin invalidated
if (bDynamicPageInvalidation && !VirtualShadowMapHandle.IsSinglePage())
{
if ((PrevPhysicalFlags & VSM_EXTENDED_FLAG_ANY_INVALIDATED) != 0)
{
if ((PrevPhysicalFlags & VSM_EXTENDED_FLAG_INVALIDATE_STATIC) == 0)
{
// ONLY dynamic is invalidated, static can remain cached
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
}
else
{
// Invalidate both
NextPageFlags |= VSM_FLAG_ANY_UNCACHED;
}
}
// Always invalidate dynamic when using receiver mask, as the page may be incomplete
if (Projection.bUseReceiverMask)
{
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
}
}
uint PhysicalPageDetailGeometryFlag = (PrevPhysicalFlags & VSM_FLAG_DETAIL_GEOMETRY);
if (bRequestedThisFrame)
{
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
// Remove from LRU list and add to requested list
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
bRemovedPageFromList = true;
// If the detail geometry flag doesn't match the cached page we treat it as a full invalidation.
// TODO: This could potentially be a problem for interleaved multiview rendering;
// If the flag differs in the two views it will cause cache thrashing.
const uint RequestDetailGeometryFlag = (RequestFlags & VSM_FLAG_DETAIL_GEOMETRY);
if (RequestDetailGeometryFlag != PhysicalPageDetailGeometryFlag)
{
NextPageFlags |= (VSM_FLAG_STATIC_UNCACHED | VSM_FLAG_DYNAMIC_UNCACHED);
PhysicalPageDetailGeometryFlag = RequestDetailGeometryFlag;
}
// Only increment the stats for pages requested this render, otherwise it gets confusing
if (NextPageFlags & VSM_FLAG_STATIC_UNCACHED)
{
StatsBufferInterlockedInc(VSM_STAT_STATIC_INVALIDATED_PAGES);
}
else
{
StatsBufferInterlockedInc(VSM_STAT_STATIC_CACHED_PAGES);
}
if (NextPageFlags & VSM_FLAG_DYNAMIC_UNCACHED)
{
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_INVALIDATED_PAGES);
}
else
{
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_CACHED_PAGES);
}
}
NextPageFlags |= PhysicalPageDetailGeometryFlag;
const uint PhysicalFlags = (Projection.bUnCached ? VSM_EXTENDED_FLAG_VIEW_UNCACHED : 0U);
NextPhysicalFlags = NextPageFlags | PhysicalFlags;
// If the page is going to be fully cached, but the VSM_EXTENDED_FLAG_FORCE_CACHED flag is on,
// we want to do something special here. We want to consider this page for any rendering in case
// WPO distance disable has changed (which we want to start causing invalidations), but not actually
// invalidate or render anything into it. Currently we accomplish this by setitng the DYNAMIC_UNCACHED
// flag in the hierarchical page flags (WPO only ever gets rendered into dynamic cache -
// see ShouldCacheInstanceAsStatic), but NOT on the physical page (which would indicate a real invalidation).
// This can get significantly cleaned up if/when we free up an additional hierarchical page flag bit.
// We do NOT want these details showing up in debug visualizations or cache stats though
bool bPageValidForRendering = (NextPageFlags & VSM_FLAG_ANY_UNCACHED) != 0;
if (PrevPhysicalFlags & VSM_EXTENDED_FLAG_FORCE_CACHED)
{
// NOTE: WPO can only ever be dynamic cached
NextPageFlags |= VSM_FLAG_DYNAMIC_UNCACHED;
StatsBufferInterlockedInc(VSM_STAT_WPO_CONSIDERED_PAGES);
}
// Map the page to the physical page
// If we later allocate over top of this page (for one requested this frame), we will zero this out again. See AllocateNewPageMappings
OutPageTable[GlobalPageOffset.GetResourceAddress()] = ShadowEncodePageTable(VSMPhysicalIndexToPageAddress(PhysicalPageIndex), bPageValidForRendering);
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = NextPageFlags;
} // Unreferenced
}
}
}
#endif
// If page is invalidated/empty, remove it from the LRU list and add it to the empty list
// It will be re-added after packing to the end of the AVAILABLE list
if (NextPhysicalFlags == 0)
{
StatsBufferInterlockedInc(VSM_STAT_EMPTY_PAGES);
PushPhysicalPageList(PHYSICAL_PAGE_LIST_EMPTY, PhysicalPageIndex);
bRemovedPageFromList = true;
}
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = NextPhysicalFlags;
// Write out the LRU list while maintaining order, with anything we removed marked as INDEX_NONE
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, PhysicalPageListIndex, bRemovedPageFromList ? INDEX_NONE : PhysicalPageIndex);
}
void AllocateNewPageMappings(FVirtualShadowMapHandle VirtualShadowMapHandle, FVSMPageOffset GlobalPageOffset, uint MipLevel, uint2 PageAddress)
{
const uint RequestFlags = PageRequestFlags[GlobalPageOffset.GetResourceAddress()];
if (RequestFlags != 0)
{
// See if we already hooked this up to a mapped page
const uint PageFlags = (OutPageFlags[GlobalPageOffset.GetResourceAddress()] & VSM_PAGE_FLAGS_BITS_MASK);
if (PageFlags == 0u)
{
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
int PhysicalPageIndex = PopPhysicalPageList(PHYSICAL_PAGE_LIST_AVAILABLE);
if (PhysicalPageIndex >= 0)
{
StatsBufferInterlockedInc(VSM_STAT_ALLOCATED_NEW);
// Add back to the end of the requested list
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
uint2 PhysicalPageAddress = VSMPhysicalIndexToPageAddress(PhysicalPageIndex);
// FIRST, check if there's a valid page already mapped to this physical page
// If so, we must go back and clear out its page table entry before we reallocate this page
{
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
if (PrevMetaData.Flags != 0)
{
FVSMPageOffset PrevGlobalPageOffset = CalcPageOffset(FVirtualShadowMapHandle::MakeFromId(PrevMetaData.VirtualShadowMapId), PrevMetaData.MipLevel, PrevMetaData.PageAddress);
OutPageTable[PrevGlobalPageOffset.GetResourceAddress()] = 0;
OutPageFlags[PrevGlobalPageOffset.GetResourceAddress()] = 0;
}
}
uint RequestDetailGeometryFlag = RequestFlags & VSM_FLAG_DETAIL_GEOMETRY;
uint Flags = VSM_FLAG_ALLOCATED | VSM_FLAG_DYNAMIC_UNCACHED | VSM_FLAG_STATIC_UNCACHED | RequestDetailGeometryFlag;
// Mark this page as allocated and not cached (always valid for rendering)
OutPageTable[GlobalPageOffset.GetResourceAddress()] = ShadowEncodePageTable(PhysicalPageAddress, true);
OutPageFlags[GlobalPageOffset.GetResourceAddress()] = Flags;
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapHandle);
const uint PhysicalFlags = (Projection.bUnCached ? VSM_EXTENDED_FLAG_VIEW_UNCACHED : 0U);
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = Flags | PhysicalFlags;
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapHandle.Id;
OutPhysicalPageMetaData[PhysicalPageIndex].MipLevel = MipLevel;
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
}
else
{
// We end up here if we're out of physical pages, this means some parts get no physical backing provided.
// Post this error condition back to the host somehow!
// Probably want to know if we're getting close even.
//OutPageTable[GlobalPageOffset] = 0;
//OutPageFlags[GlobalPageOffset] = 0;
}
}
}
}
#ifdef ClearPageTableCS
RWTexture2D<uint> OutDestBuffer;
uint ClearValue;
uint SampleStride;
#define HAS_MIP_LEVELS (NUM_MIP_LEVELS > 1)
#if HAS_MIP_LEVELS
RWTexture2D<uint> OutDestBufferMips_0;
RWTexture2D<uint> OutDestBufferMips_1;
RWTexture2D<uint> OutDestBufferMips_2;
RWTexture2D<uint> OutDestBufferMips_3;
RWTexture2D<uint> OutDestBufferMips_4;
RWTexture2D<uint> OutDestBufferMips_5;
RWTexture2D<uint> OutDestBufferMips_6;
void ClearMip(RWTexture2D<uint> OutDestBufferMip, uint HMipLevel, FVirtualSMLevelOffset LevelOffset, uint2 PageCoord, uint SampleStrideLocal, uint MipLevel)
{
uint LevelDim = CalcLevelDimsPages(MipLevel + HMipLevel) * SampleStrideLocal;
uint2 HMipOffset = (LevelOffset.LevelTexelOffset * SampleStrideLocal) >> HMipLevel;
if (all(PageCoord < LevelDim))
{
OutDestBufferMip[HMipOffset + PageCoord] = ClearValue;
}
}
#endif
struct FClearPageTableWorker
{
void Run(FPerPageDispatchSetup Setup)
{
for (uint MipLevel = Setup.MipLevelStart; MipLevel < Setup.MipLevelEnd; ++MipLevel)
{
FVirtualSMLevelOffset LevelOffset = CalcPageTableLevelOffset(Setup.VirtualShadowMapHandle, MipLevel);
uint LoopEndXY = Setup.GetLoopEnd(MipLevel);
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
{
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
{
const FVSMPageOffset PageOffset = CalcPageOffset(LevelOffset, MipLevel, uint2(PageX, PageY));
BRANCH
if (SampleStride == 2u)
{
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(0u, 0u)] = ClearValue;
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(1u, 0u)] = ClearValue;
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(0u, 1u)] = ClearValue;
OutDestBuffer[PageOffset.GetResourceAddress() * 2u + uint2(1u, 1u)] = ClearValue;
#if HAS_MIP_LEVELS
#define DO_MIP_LEVEL(_HLevel_) ClearMip(OutDestBufferMips_##_HLevel_,_HLevel_ + 1, LevelOffset, uint2(PageX, PageY), 2u, MipLevel)
DO_MIP_LEVEL(0);
DO_MIP_LEVEL(1);
DO_MIP_LEVEL(2);
DO_MIP_LEVEL(3);
DO_MIP_LEVEL(4);
DO_MIP_LEVEL(5);
// Note: the NUM_MIP_LEVELS includes the base level, which is not included in the UAV array
#if NUM_MIP_LEVELS > 7
DO_MIP_LEVEL(6);
#endif
#undef DO_MIP_LEVEL
#endif
}
else
{
OutDestBuffer[PageOffset.GetResourceAddress()] = ClearValue;
#if HAS_MIP_LEVELS
#define DO_MIP_LEVEL(_HLevel_) ClearMip(OutDestBufferMips_##_HLevel_,_HLevel_ + 1, LevelOffset, uint2(PageX, PageY), 1u, MipLevel)
DO_MIP_LEVEL(0);
DO_MIP_LEVEL(1);
DO_MIP_LEVEL(2);
DO_MIP_LEVEL(3);
DO_MIP_LEVEL(4);
DO_MIP_LEVEL(5);
// Note: the NUM_MIP_LEVELS includes the base level, which is not included in the UAV array
#if NUM_MIP_LEVELS > 7
DO_MIP_LEVEL(6);
#endif
#undef DO_MIP_LEVEL
#endif
}
}
}
}
}
};
/**
*/
[numthreads(PER_PAGE_THREAD_GROUP_SIZE_XY, PER_PAGE_THREAD_GROUP_SIZE_XY, 1)]
void ClearPageTableCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
FClearPageTableWorker ClearPageTableWorker;
FPerPageDispatchSetup Setup;
Setup.Execute(DispatchThreadId, ClearPageTableWorker);
}
#endif // ClearPageTableCS
#ifdef AllocateNewPageMappingsCS
struct FAllocateNewPageMappingsWorker
{
void Run(FPerPageDispatchSetup Setup)
{
for (uint MipLevel = Setup.MipLevelStart; MipLevel < Setup.MipLevelEnd; ++MipLevel)
{
uint LoopEndXY = Setup.GetLoopEnd(MipLevel);
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
{
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
{
const FVSMPageOffset PageOffset = CalcPageOffset(Setup.VirtualShadowMapHandle, MipLevel, uint2(PageX, PageY));
AllocateNewPageMappings(Setup.VirtualShadowMapHandle, PageOffset, MipLevel, uint2(PageX, PageY));
}
}
}
}
};
/**
*/
[numthreads(PER_PAGE_THREAD_GROUP_SIZE_XY, PER_PAGE_THREAD_GROUP_SIZE_XY, 1)]
void AllocateNewPageMappingsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
FAllocateNewPageMappingsWorker AllocateNewPageMappingsWorker;
FPerPageDispatchSetup Setup;
Setup.Execute(DispatchThreadId, AllocateNewPageMappingsWorker);
}
#endif // AllocateNewPageMappingsCS
// NOTE: We only launch a single group here for now to avoid multi-pass so we really want it as large as possible
// Can optimize this later if needed for larger physical page counts
#define NUM_THREADS_PER_GROUP 1024
#include "../ThreadGroupPrefixSum.ush"
[numthreads(NUM_THREADS_PER_GROUP, 1, 1)]
void PackAvailablePages(uint GroupIndex : SV_GroupIndex)
{
int TotalCount = 0;
// Must be a uniform loop
for (int GroupStart = 0; GroupStart < VirtualShadowMap.MaxPhysicalPages; GroupStart += NUM_THREADS_PER_GROUP)
{
int ListIndex = GroupStart + GroupIndex;
int PhysicalPageIndex = ListIndex < VirtualShadowMap.MaxPhysicalPages ?
GetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, ListIndex) :
INDEX_NONE;
bool bListItemValid = PhysicalPageIndex != INDEX_NONE;
int SumValue = bListItemValid ? 1 : 0;
// NOTE: Cannot be under any divergent branching!
int GroupCount = 0;
int Offset = ThreadGroupPrefixSum(SumValue, GroupIndex, GroupCount);
if (bListItemValid)
{
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount + Offset, PhysicalPageIndex);
}
TotalCount += GroupCount;
// This should already be accounted for internally by ThreadGroupPrefixSum, but putting one here
// to be absolutely sure.
GroupMemoryBarrierWithGroupSync();
}
// Set total number
if (GroupIndex == 0)
{
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount);
}
}
#undef NUM_THREADS_PER_GROUP
uint bAppendEmptyToAvailable;
// If true, simply updates the counts instead of copying items
// This should be run with the same parameters right after the copy pass, with a single group
uint bUpdateCounts;
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void AppendPhysicalPageLists(uint ThreadId : SV_DispatchThreadID)
{
// We only need two variants currently, EMPTY->AVAILABLE and AVAILABLE->REQUESTED
int InputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_EMPTY : PHYSICAL_PAGE_LIST_AVAILABLE;
int OutputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_AVAILABLE : PHYSICAL_PAGE_LIST_REQUESTED;
// NOTE: This needs to maintain order!
// It also needs to be robust against physical page pool overflows, ensuring that we never "lose" any
// items in the final LRU list for the next frame.
int InputCount = GetPhysicalPageListCount(InputList);
int OutputCount = GetPhysicalPageListCount(OutputList);
int CopyCount = max(0, min(InputCount, int(VirtualShadowMap.MaxPhysicalPages) - OutputCount));
if (bUpdateCounts)
{
// Update pass (after copy pass)
if (ThreadId == 0)
{
int NewOutputCount = OutputCount + CopyCount;
SetPhysicalPageListCount(OutputList, NewOutputCount);
SetPhysicalPageListCount(InputList, 0);
// The REQUESTED list needs to specifically end up with a single unique copy of each index as
// this becomes the LRU list for the next update. If we were to lose any indices or list entries
// then we would also (permanently) lose actual physical pages. Thus we assert that at least the
// list must end up as the right size after the final append!
if (!bAppendEmptyToAvailable)
{
/*
PLATFORM_ASSERT4(
NewOutputCount == VirtualShadowMap.MaxPhysicalPages,
0xCECC,
__LINE__,
InputCount,
OutputCount,
NewOutputCount);
*/
checkSlow(NewOutputCount == VirtualShadowMap.MaxPhysicalPages);
}
else
{
/*
// All pages should now be in PHYSICAL_PAGE_LIS_AVAILABLE or REQUESTED
int AvailableCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE);
int RequestedCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED);
int EmptyCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY);
int TotalPages = AvailableCount + RequestedCount;
PLATFORM_ASSERT4(
TotalPages == VirtualShadowMap.MaxPhysicalPages,
0xCECC,
__LINE__,
AvailableCount,
RequestedCount,
EmptyCount);
*/
}
}
}
else
{
if (ThreadId < CopyCount)
{
int InputItem = GetPhysicalPageListItem(InputList, ThreadId);
SetPhysicalPageListItem(OutputList, OutputCount + ThreadId, InputItem);
}
}
}
StructuredBuffer<FPhysicalPageMetaData> PhysicalPageMetaData;
RWTexture2DArray<uint> OutPhysicalPagePool;
// Helper function to merge static and dynamic depth.
void MergePhysicalPixel(uint2 PixelCoord)
{
// 1:1 pixels so this is safe RMW
OutPhysicalPagePool[uint3(PixelCoord, 0)] = max(
OutPhysicalPagePool[uint3(PixelCoord, 0)],
OutPhysicalPagePool[uint3(PixelCoord, GetVirtualShadowMapStaticArrayIndex())]);
}
// Log2 2D dimension of thread group size, 2^4 == 16,
#define LOG2_TILE_THREAD_GROUP_SIZE_XY 4u
#define TILE_THREAD_GROUP_SIZE_XY (1u << LOG2_TILE_THREAD_GROUP_SIZE_XY)
// Each thread takes 2x2 samples to work with, so tile size is 2x thread group size
#define LOG2_TILE_SIZE_XY (LOG2_TILE_THREAD_GROUP_SIZE_XY + 1u)
#if VSM_LOG2_PAGE_SIZE < LOG2_TILE_SIZE_XY
#error "VSM_LOG2_PAGE_SIZE must be larger than LOG2_TILE_SIZE, either increase one or reduce the other"
#endif
// Number of tiles (thread groups) in each dimension to cover the page
#define LOG2_TILES_PER_PAGE_XY ( VSM_LOG2_PAGE_SIZE - LOG2_TILE_SIZE_XY )
// Log2 1D tile count to cover the page LOG2_TILES_PER_PAGE_XY * LOG2_TILES_PER_PAGE_XY
#define LOG2_TILES_PER_PAGE_1D ( 2U * LOG2_TILES_PER_PAGE_XY )
// 1D tile count to cover the page
#define TILES_PER_PAGE_1D ( 1U << LOG2_TILES_PER_PAGE_1D )
#define TILES_PER_PAGE_XY_MASK ( ( 1U << LOG2_TILES_PER_PAGE_XY ) - 1U )
#define TILES_PER_PAGE_1D_MASK ( ( 1U << LOG2_TILES_PER_PAGE_1D ) - 1U )
RWBuffer<uint> OutInitializePagesIndirectArgsBuffer;
RWStructuredBuffer<uint> OutPhysicalPagesToInitialize;
void EmitPageToProcess(RWBuffer<uint> OutIndirectArgsBuffer, RWStructuredBuffer<uint> OutSelectedPhysicalIndexBuffer, uint PhysicalPageIndex)
{
int GroupCount = 0;
// Each page needs TILES_PER_PAGE_1D groups launched
WaveInterlockedAddScalar_(OutIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
OutSelectedPhysicalIndexBuffer[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
}
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void SelectPagesToInitializeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
{
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
bool bUnreferenced = (MetaData.Flags & VSM_EXTENDED_FLAG_UNREFERENCED) != 0;
bool bFullyCached = (MetaData.Flags & VSM_FLAG_ANY_UNCACHED) == 0;
bool bStaticUncached = (MetaData.Flags & VSM_FLAG_STATIC_UNCACHED) != 0;
bool bForceCached = (MetaData.Flags & VSM_EXTENDED_FLAG_FORCE_CACHED) != 0;
if ((MetaData.Flags & VSM_FLAG_ALLOCATED) == 0)
{
// Page not used, we're done
}
else if (bUnreferenced || bFullyCached || bForceCached)
{
// Page fully cached or unreferenced. Leave the data alone.
}
else
{
// At least one of the pages is uncached
// NOTE: Dynamic cached/static uncached is currently an invalid state
// Since we merge the static stuff over the dynamic stuff after rendering we can't
// actually maintain separate dynamic cached pages when "only" the (theoretically)
// static moved. Thus if not fully cached, we always regenerate the dynamic page.
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex);
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
if (bStaticUncached &&
(MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U)
{
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages);
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
}
}
}
uint3 GetTileOffset(uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
{
const uint PageInputIndex = GroupIndex >> LOG2_TILES_PER_PAGE_1D;
uint PageIndex = PageIndexBuffer[PageInputIndex];
int ArrayIndex = 0;
if (PageIndex >= VirtualShadowMap.MaxPhysicalPages)
{
// Request to clear the static page
PageIndex -= VirtualShadowMap.MaxPhysicalPages;
ArrayIndex = 1;
}
OutMetaData = PhysicalPageMetaData[PageIndex];
// Each page has 1 << LOG2_TILES_PER_PAGE_XY groups (aka tiles) assigned to work on it.
const uint LocalTileIndex = GroupIndex & TILES_PER_PAGE_1D_MASK;
// wrap to 2D tile coord
const uint2 LocalTile = uint2(LocalTileIndex & TILES_PER_PAGE_XY_MASK, LocalTileIndex >> LOG2_TILES_PER_PAGE_XY);
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
// Pixel address of tile region for this thread group.
const uint2 TileOffset = (PhysPageAddress << uint2(VSM_LOG2_PAGE_SIZE, VSM_LOG2_PAGE_SIZE)) + (LocalTile << uint2(LOG2_TILE_SIZE_XY, LOG2_TILE_SIZE_XY));
return uint3(TileOffset, ArrayIndex);
}
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
{
// Pixel address of tile region for this thread group.
const uint3 TileOffset = GetTileOffset(GroupIndex, PageIndexBuffer, OutMetaData);
// Pixel address of 2x2 region to sample for this thread.
const uint2 BasePos = TileOffset.xy + (TileThreadID.xy << 1u);
return uint3(BasePos, TileOffset.z);
}
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer)
{
FPhysicalPageMetaData TmpMetaData;
return GetTileBasePos(TileThreadID, GroupIndex, PageIndexBuffer, TmpMetaData);
}
StructuredBuffer<uint> PhysicalPagesToInitialize;
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
void InitializePhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
{
FPhysicalPageMetaData MetaData;
uint3 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToInitialize, MetaData);
bool bStaticCached = (MetaData.Flags & VSM_FLAG_STATIC_UNCACHED) == 0U;
if (bStaticCached && (MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U)
{
// Initialize from the static page data
checkSlow(BasePos.z == 0U);
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 0U, 1U)];
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 0U, 1U)];
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 1U, 1U)];
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 1U, 1U)];
}
else
{
// Clear the page to zero
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = 0U;
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = 0U;
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = 0U;
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = 0U;
}
}
RWBuffer<uint> OutMergePagesIndirectArgsBuffer;
RWStructuredBuffer<uint> OutPhysicalPagesToMerge;
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void SelectPagesToMergeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
{
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
// An uncached view is always exclusively renders into the dynamic pages, and thus require no merging.
if ((MetaData.Flags & VSM_FLAG_ALLOCATED) != 0U &&
(MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) == 0U &&
(MetaData.Flags & VSM_EXTENDED_FLAG_DIRTY) != 0U &&
(MetaData.Flags & VSM_EXTENDED_FLAG_UNREFERENCED) == 0U)
{
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_MERGE);
EmitPageToProcess(OutMergePagesIndirectArgsBuffer, OutPhysicalPagesToMerge, PhysicalPageIndex);
}
}
StructuredBuffer<uint> PhysicalPagesToMerge;
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
void MergeStaticPhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
{
uint2 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToMerge).xy;
// 1:1 pixels so this is safe RMW
MergePhysicalPixel(BasePos + uint2(0U, 0U));
MergePhysicalPixel(BasePos + uint2(1U, 0U));
MergePhysicalPixel(BasePos + uint2(0U, 1U));
MergePhysicalPixel(BasePos + uint2(1U, 1U));
}
// Indirect HZB building:
RWStructuredBuffer<uint> DirtyPageFlagsInOut;
// Returns updated physical page flags
uint UpdateAndClearDirtyFlags(uint PhysicalPageIndex)
{
bool bPageDirty = DirtyPageFlagsInOut[PhysicalPageIndex] != 0U;
bool bInvalidatesDynamic = DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] != 0U;
bool bInvalidatesStatic = DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] != 0U;
bool bWPOAllowed = DirtyPageFlagsInOut[PhysicalPageIndex + 3U * VirtualShadowMap.MaxPhysicalPages] != 0U;
// clear the dirty/invalidation flags
DirtyPageFlagsInOut[PhysicalPageIndex] = 0U;
DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] = 0U;
DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] = 0U;
DirtyPageFlagsInOut[PhysicalPageIndex + 3U * VirtualShadowMap.MaxPhysicalPages] = 0U;
uint Flags = OutPhysicalPageMetaData[PhysicalPageIndex].Flags;
if (Flags != 0)
{
Flags |=
(bPageDirty ? VSM_EXTENDED_FLAG_DIRTY : 0U) |
(bInvalidatesStatic ? VSM_EXTENDED_FLAG_INVALIDATE_STATIC : 0U) |
(bInvalidatesDynamic ? VSM_EXTENDED_FLAG_INVALIDATE_DYNAMIC : 0U) |
(bWPOAllowed ? VSM_EXTENDED_FLAG_FORCE_CACHED : 0U);
// Update the metadata on the page
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = Flags;
}
return Flags;
}
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void UpdateAndClearDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
{
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
FPhysicalPageMetaData MetaData;
UpdateAndClearDirtyFlags(PhysicalPageIndex);
}
RWBuffer<uint> OutPagesForHZBIndirectArgsBuffer;
RWStructuredBuffer<uint> OutPhysicalPagesForHZB;
uint bFirstBuildThisFrame;
uint bForceFullHZBUpdate;
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
void SelectPagesForHZBAndUpdateDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
{
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
{
return;
}
uint PhysicalPageFlags = UpdateAndClearDirtyFlags(PhysicalPageIndex);
if ((PhysicalPageFlags & VSM_FLAG_ALLOCATED) != 0)
{
bool bRebuildHZB = false;
const bool bHasSeparateDynamicHZB = VirtualShadowMap.StaticHZBArrayIndex != 0;
const bool bPageDirty = (PhysicalPageFlags & VSM_EXTENDED_FLAG_DIRTY) != 0u;
const bool bStaticUncached = (PhysicalPageFlags & VSM_FLAG_STATIC_UNCACHED) != 0u;
const bool bUnreferenced = (PhysicalPageFlags & VSM_EXTENDED_FLAG_UNREFERENCED) != 0u;
const bool bNeedDynamicBuild = bHasSeparateDynamicHZB
&& (PhysicalPageFlags & VSM_FLAG_DYNAMIC_UNCACHED) != 0u;
// Skip it if it's not referenced; this is usually because we have already done
// the HZB rebuild but haven't yet cleared these flags, which happens the nexd time
// a page gets rendered.
// TODO: We should clear the relevant page flags immediately after doing HZB generation
bRebuildHZB = (bPageDirty || bStaticUncached || bNeedDynamicBuild) && !bUnreferenced;
if (bForceFullHZBUpdate || bRebuildHZB)
{
StatsBufferInterlockedInc(VSM_STAT_NUM_HZB_PAGES_BUILT);
int GroupCount = 0;
// Each page needs TILES_PER_PAGE_1D groups launched
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
OutPhysicalPagesForHZB[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
// Each top-reduction needs only one group launched
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0 + 4], 1U, GroupCount);
}
}
}
SamplerState PhysicalPagePoolSampler;
Texture2DArray<uint> PhysicalPagePool;
float4 Gather4VisZ(uint2 PixelCoord, uint ArrayIndex)
{
#if COMPILER_SUPPORTS_GATHER_UINT
// Offset to 2x2 footprint center and scale to UV space
float2 UV = float2(PixelCoord + uint2(1U, 1U)) * VirtualShadowMap.RecPhysicalPoolSize.xy;
return asfloat(PhysicalPagePool.Gather(PhysicalPagePoolSampler, float3(UV, ArrayIndex), 0));
#else
uint4 PixelRect = uint4(PixelCoord.xy, PixelCoord.xy + uint2(1U, 1U));
uint4 UintDepths = uint4(
PhysicalPagePool[uint3(PixelRect.xw, ArrayIndex)].r, // (-, +)
PhysicalPagePool[uint3(PixelRect.zw, ArrayIndex)].r, // (+, +)
PhysicalPagePool[uint3(PixelRect.zy, ArrayIndex)].r, // (+, -)
PhysicalPagePool[uint3(PixelRect.xy, ArrayIndex)].r // (-, -)
);
return asfloat(UintDepths);
#endif
}
StructuredBuffer<uint> PhysicalPagesForHzb;
// out input output
RWTexture2DArray<float> FurthestHZBArrayOutput_0; // 64 // 1 Group: 32 (16 threads x2) 16
RWTexture2DArray<float> FurthestHZBArrayOutput_1; // 32 // 1 16 8
RWTexture2DArray<float> FurthestHZBArrayOutput_2; // 16 8 4
RWTexture2DArray<float> FurthestHZBArrayOutput_3; // 8 4 2
RWTexture2DArray<float> FurthestHZBArrayOutput_4; // 4 2 1
groupshared float SharedMinDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
groupshared float SharedMaxDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
#define DIM_FURTHEST 1
#define DIM_CLOSEST 0
void OutputMipLevel(uint MipLevel, uint2 OutputPixelPos, int ArrayIndex, float FurthestDeviceZ, float ClosestDeviceZ)
{
#if DIM_FURTHEST
#define COND_OUTPUT_LEVEL(_level_) \
if (MipLevel == _level_) \
{ \
FurthestHZBArrayOutput_##_level_[uint3(OutputPixelPos, ArrayIndex)] = FurthestDeviceZ; \
return; \
}
#endif
#if DIM_CLOSEST
ClosestHZBOutput_1[uint3(OutputPixelPos, ArrayIndex)] = ClosestDeviceZ;
#endif
COND_OUTPUT_LEVEL(1)
COND_OUTPUT_LEVEL(2)
COND_OUTPUT_LEVEL(3)
COND_OUTPUT_LEVEL(4)
#undef COND_OUTPUT_LEVEL
}
void BuildHZBPerPage(uint2 SrcPos, uint GroupThreadIndex, uint HZBArrayIndex, uint SrcArrayIndex, inout float4 InOutDeviceZ)
{
// Sample 2x2 footprint - thread group covers 32x32 area
// Merge with static (represented in the InOutDeviceZ)
float4 DeviceZ = max(InOutDeviceZ, Gather4VisZ(SrcPos, SrcArrayIndex));
// return the merged result
InOutDeviceZ = DeviceZ;
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
// Broadcast to all threads (16x16).
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
// Write base HZB level (half physical page size, e.g., 64x64)
uint2 OutPixelPos = SrcPos >> 1U;
FurthestHZBArrayOutput_0[uint3(OutPixelPos, HZBArrayIndex)] = MinDeviceZ;
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
const uint LaneCount = WaveGetLaneCount();
#else
// Actual wave size is unknown, assume the worst
const uint LaneCount = 0u;
#endif
// Build next 4 levels: 32, 16, 8, 4
UNROLL
for (uint MipLevel = 1U; MipLevel < LOG2_TILE_SIZE_XY; ++MipLevel)
{
// 8x8, 4x4, 2x2, 1x1
const uint OutTileDim = uint(TILE_THREAD_GROUP_SIZE_XY) >> MipLevel;
const uint ReduceBankSize = OutTileDim * OutTileDim;
// More waves than one wrote to LDS, need to sync.
if ((ReduceBankSize << 2u) > LaneCount)
{
GroupMemoryBarrierWithGroupSync();
}
BRANCH
if (GroupThreadIndex < ReduceBankSize)
{
float4 ParentMinDeviceZ;
//float4 ParentMaxDeviceZ;
ParentMinDeviceZ[0] = MinDeviceZ;
//ParentMaxDeviceZ[0] = MaxDeviceZ;
UNROLL
for (uint i = 1; i < 4; i++)
{
uint LDSIndex = GroupThreadIndex + i * ReduceBankSize;
ParentMinDeviceZ[i] = SharedMinDeviceZ[LDSIndex];
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
}
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
OutPixelPos = OutPixelPos >> 1;
OutputMipLevel(MipLevel, OutPixelPos, HZBArrayIndex, MinDeviceZ, MaxDeviceZ);
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
//SharedMaxDeviceZ[GroupThreadIndex] = MaxDeviceZ;
}
}
}
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
void BuildHZBPerPageCS(uint GroupThreadIndex : SV_GroupIndex, uint GroupIndex : SV_GroupID)
{
FPhysicalPageMetaData MetaData;
uint2 SrcTileOffset = GetTileOffset(GroupIndex, PhysicalPagesForHzb, MetaData).xy;
uint2 RemappedGroupThreadIndex = InitialTilePixelPositionForReduction2x2(LOG2_TILE_SIZE_XY - 1U, GroupThreadIndex);
uint2 SrcPos = SrcTileOffset + (RemappedGroupThreadIndex << uint2(1U, 1U));
// 1. build the static HZB slice (src slice 1 if enabled)
uint FirstSrcArrayIndex = GetVirtualShadowMapStaticArrayIndex();
uint FirstHZBArrayIndex = VirtualShadowMap.StaticHZBArrayIndex;
bool bViewUncached = (MetaData.Flags & VSM_EXTENDED_FLAG_VIEW_UNCACHED) != 0U;
// uncachable views always draw to the dynamic slice (slice 0), and thus there is no reason to build for the static slices
if (bViewUncached)
{
FirstSrcArrayIndex = 0u;
FirstHZBArrayIndex = 0u;
}
// 1. build for the static pages (or only if there is only one) & keep the 2x2 device Z to pass to the dynamic build (merge depths in case of both being rebuilt)
float4 StaticDeviceZ = (float4)0.0f;
BuildHZBPerPage(SrcPos, GroupThreadIndex, FirstHZBArrayIndex, FirstSrcArrayIndex, StaticDeviceZ);
// If we have not built the 0th slice, we need to do that also (it must then be the dynamic things)
// This also covers the case where the HZB only has one slice (and we thus do not build a dynamic one at all).
if (FirstHZBArrayIndex > 0u)
{
BuildHZBPerPage(SrcPos, GroupThreadIndex, 0u, 0u, StaticDeviceZ);
}
}
float4 Gather4(Texture2DArray Texture, SamplerState TextureSampler, uint2 SrcPos, uint ArrayIndex, float2 InvSize)
{
float2 SrcUV = float2(SrcPos) * InvSize;
return Texture.GatherRed(TextureSampler, float3(SrcUV, ArrayIndex), 0);
}
Texture2DArray ParentTextureArrayMip;
SamplerState ParentTextureMipSampler;
float2 InvHzbInputSize;
#define TOP_MIP_TILE_SIZE_XY 4
// Each fetches 2x2 using gather
#define TOP_MIP_TILE_THREAD_GROUP_SIZE_XY (TOP_MIP_TILE_SIZE_XY/2)
void BuildHZBPerPageTop(uint2 SrcPos, uint2 GroupThreadId, uint ArrayIndex)
{
// Sample 2x2 footprint - thread group covers 32x32 area
float4 DeviceZ = Gather4(ParentTextureArrayMip, ParentTextureMipSampler, SrcPos + uint2(1U, 1U), ArrayIndex, InvHzbInputSize);
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
// Broadcast to all threads.
SharedMinDeviceZ[GroupThreadId.y * TOP_MIP_TILE_THREAD_GROUP_SIZE_XY + GroupThreadId.x] = MinDeviceZ;
// Write first HZB output level (half size)
uint2 OutPixelPos = SrcPos >> 1U;
FurthestHZBArrayOutput_0[uint3(OutPixelPos, ArrayIndex)] = MinDeviceZ;
// Build last level
GroupMemoryBarrierWithGroupSync();
BRANCH
if (all(GroupThreadId.xy == uint2(0U, 0U)))
{
float4 ParentMinDeviceZ;
//float4 ParentMaxDeviceZ;
ParentMinDeviceZ[0] = MinDeviceZ;
//ParentMaxDeviceZ[0] = MaxDeviceZ;
UNROLL
for (uint Index = 1; Index < 4; ++Index)
{
ParentMinDeviceZ[Index] = SharedMinDeviceZ[Index];
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
}
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
OutPixelPos = OutPixelPos >> 1;
FurthestHZBArrayOutput_1[uint3(OutPixelPos, ArrayIndex)] = MinDeviceZ;
}
}
[numthreads(TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, 1)]
void BuildHZBPerPageTopCS(uint2 GroupThreadId : SV_GroupThreadID, uint PageInputIndex : SV_GroupID)
{
const uint PageIndex = PhysicalPagesForHzb[PageInputIndex];
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
// Pixel address of tile region for this thread group.
const uint2 SrcTileOffset = PhysPageAddress * uint2(TOP_MIP_TILE_SIZE_XY, TOP_MIP_TILE_SIZE_XY);
uint2 SrcPos = SrcTileOffset + (GroupThreadId << uint2(1U, 1U));
BuildHZBPerPageTop(SrcPos, GroupThreadId, 0u);
const bool bHasSeparateDynamicHZB = VirtualShadowMap.StaticHZBArrayIndex != 0;
if (bHasSeparateDynamicHZB)
{
GroupMemoryBarrierWithGroupSync();
BuildHZBPerPageTop(SrcPos, GroupThreadId, 1u);
}
}
uint StatusMessageId;
StructuredBuffer<int> PhysicalPageLists;
[numthreads(1, 1, 1)]
void FeedbackStatusCS()
{
FGPUMessageWriter Mw = GPUMessageBegin(StatusMessageId, 3U);
GPUMessageWriteItem(Mw, VSM_STATUS_MSG_PAGE_MANAGEMENT);
// Write out how many pages are still available
int CountIndex = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_AVAILABLE) + VirtualShadowMap.MaxPhysicalPages;
GPUMessageWriteItem(Mw, PhysicalPageLists[CountIndex]);
// Write out the resolution lod bias from this frame
GPUMessageWriteItem(Mw, VirtualShadowMap.GlobalResolutionLodBias);
}
int PageListStatsRow;
[numthreads(1, 1, 1)]
void LogPageListStatsCS()
{
float TopMargin = 0.5f;
float ItemX = 0.05f;
FShaderPrintContext Ctx = InitShaderPrintContext(true, float2(ItemX, TopMargin));
Ctx.Pos.y += PageListStatsRow * 0.02f;
Print(Ctx, GetPhysicalPageListCount(0));
for (int i = 1; i < PHYSICAL_PAGE_LIST_COUNT; ++i)
{
Print(Ctx, TEXT(", "));
Print(Ctx, GetPhysicalPageListCount(i));
}
}