400 lines
14 KiB
HLSL
400 lines
14 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
VirtualShadowMapPageManagement.usf:
|
|
=============================================================================*/
|
|
|
|
#include "../Common.ush"
|
|
#include "../WaveOpUtil.ush"
|
|
#include "VirtualShadowMapProjectionStructs.ush"
|
|
#include "VirtualShadowMapProjectionCommon.ush"
|
|
#include "VirtualShadowMapPageAccessCommon.ush"
|
|
#include "VirtualShadowMapStats.ush"
|
|
#include "VirtualShadowMapPerPageDispatch.ush"
|
|
|
|
StructuredBuffer<FPhysicalPageMetaData> PhysicalPageMetaData;
|
|
RWTexture2D<uint> OutPageTable;
|
|
RWStructuredBuffer<uint4> OutUncachedPageRectBounds;
|
|
RWStructuredBuffer<uint4> OutAllocatedPageRectBounds;
|
|
|
|
#ifdef GenerateHierarchicalPageFlags
|
|
|
|
// Not yet set up in the UB
|
|
Texture2D<uint> InPageFlags;
|
|
Texture2D<uint> InPageReceiverMasks;
|
|
|
|
RWTexture2D<uint> OutPageFlagMips_0;
|
|
RWTexture2D<uint> OutPageFlagMips_1;
|
|
RWTexture2D<uint> OutPageFlagMips_2;
|
|
RWTexture2D<uint> OutPageFlagMips_3;
|
|
RWTexture2D<uint> OutPageFlagMips_4;
|
|
RWTexture2D<uint> OutPageFlagMips_5;
|
|
//RWTexture2D<uint> OutPageFlagMips_6;
|
|
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_0;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_1;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_2;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_3;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_4;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_5;
|
|
RWTexture2D<uint> OutPageReceiverMaskMips_6;
|
|
|
|
|
|
bool ProcessMipLevel(RWTexture2D<uint> OutPageFlagMip, uint Flag, FVSMPageOffset PageOffset, uint HMipLevel)
|
|
{
|
|
// shift to the appropriate mip level
|
|
uint2 MipTexelAddress = PageOffset.TexelAddress >> HMipLevel;
|
|
|
|
uint PreviousValue = 0;
|
|
InterlockedOr(OutPageFlagMip[MipTexelAddress], Flag, PreviousValue);
|
|
// If this was already the value for this HMip, then whoever did that will continue up the hierarhcy.
|
|
return PreviousValue == Flag;
|
|
}
|
|
|
|
|
|
// Mip a 2x1 (i.e., 8x4 bit) mask into a 4x4 mask
|
|
uint MipMask8x4(uint2 Mask2x1)
|
|
{
|
|
// Do two 4x4 16-bit masks at once
|
|
uint Packed = Mask2x1.x | (Mask2x1.y << 16u);
|
|
|
|
// merge rows
|
|
Packed |= Packed >> 4u;
|
|
// merge columns
|
|
Packed |= Packed >> 1u;
|
|
// Mask off garbage
|
|
Packed &= 0x5050505u;
|
|
// Now we have mipped the bits but they are interleaved
|
|
// [0000 0H0G 0000 0F0E 0000 0D0C 0000 0B0A] - each letter represents a 2x2 input region
|
|
// Move to [0000 00HG 0000 00FE 0000 00DC 0000 00BA]
|
|
Packed |= Packed >> 1u;
|
|
Packed &= 0x3030303u;
|
|
// fold down the mipped rows (pairs of bits) to the correct place
|
|
// [0000 00HG 00HG 00FE 0000 00DC 00DC 00BA]
|
|
Packed |= Packed >> 4u;
|
|
// interleave the two results
|
|
// [0000 00HG 00HG 00FE 0000 00DC HGDC FEBA]
|
|
Packed |= Packed >> 14u;
|
|
// keep just the two last rows, mask off all the garbage
|
|
// [0000 0000 0000 0000 0000 0000 HGDC FEBA]
|
|
Packed &= 0xFFu;
|
|
return Packed;
|
|
}
|
|
|
|
// Mip a 2x2 (i.e., 8x8 bit) mask into a 4x4 mask
|
|
uint MipMask8x8(uint4 Mask2x2)
|
|
{
|
|
// layout dictated by gather
|
|
// x: (-, +)
|
|
// y: (+, +)
|
|
// z: (+, -)
|
|
// w: (-, -)
|
|
|
|
uint Result = (MipMask8x4(uint2(Mask2x2.x, Mask2x2.y)) << 8u)
|
|
| MipMask8x4(uint2(Mask2x2.w, Mask2x2.z));
|
|
|
|
return Result;
|
|
}
|
|
|
|
// Mip a 4x4 mask & use fractional position to position inside resulting 4x4 mask
|
|
uint MipMask4x4(uint Mask4x4, uint2 FracPos)
|
|
{
|
|
// not 100% sure I need to mask off 0x33u
|
|
uint Mask2x2 = MipMask8x4(uint2(Mask4x4, 0u)) & 0x33u;
|
|
uint Shift = FracPos.y * 8u + FracPos.x * 2u;
|
|
uint Res4x4 = Mask2x2 << Shift;
|
|
return Res4x4;
|
|
}
|
|
|
|
bool IntersectMask8x8(uint4 Mask2x2, uint2 Min, uint2 Max)
|
|
{
|
|
// Assume: clamped to 8x8 rect
|
|
|
|
// general idea: work on 8x8 and pull out relevant sub-ranges.
|
|
|
|
// Make an 8-bit row mask & then select bits from that, easier than figuring out the ranges for each sub-range
|
|
uint NumXBits8 = Max.x - Min.x + 1u;
|
|
uint XMask8 = BitFieldMaskU32(NumXBits8, Min.x);
|
|
// XLow half (left half if you like)
|
|
uint XMaskLow = XMask8 & 0xFu;
|
|
uint XMaskHigh = XMask8 >> 4u;
|
|
|
|
// make an 8-row mask
|
|
uint MaxY4 = Max.y << 2u;
|
|
uint MinY4 = Min.y << 2u;
|
|
uint NumYBits = MaxY4 - MinY4 + 1u; // +1u instead of 4 to avoid overflow - we only use it to mask off 0x1111 anyway so no need to mask the last few
|
|
uint YMask8 = BitFieldMaskU32(NumYBits, MinY4);
|
|
uint RowMultLow = 0x1111u & YMask8;
|
|
uint RowMultHigh = 0x1111u & (YMask8 >> 16u);
|
|
|
|
// x: (-, +) y: (+, +) z: (+, -) w: (-, -)
|
|
|
|
uint4 TestMask;
|
|
TestMask.w = XMaskLow * RowMultLow;
|
|
TestMask.x = XMaskLow * RowMultHigh;
|
|
TestMask.z = XMaskHigh * RowMultLow;
|
|
TestMask.y = XMaskHigh * RowMultHigh;
|
|
|
|
return (Mask2x2.x & TestMask.x) != 0u
|
|
|| (Mask2x2.y & TestMask.y) != 0u
|
|
|| (Mask2x2.z & TestMask.z) != 0u
|
|
|| (Mask2x2.w & TestMask.w) != 0u;
|
|
}
|
|
|
|
|
|
void ProcessMaskMip4x4(RWTexture2D<uint> OutPageReceiverMaskMip, inout uint Mask4x4, inout uint2 MaskAddress)
|
|
{
|
|
Mask4x4 = MipMask4x4(Mask4x4, MaskAddress & 1u);
|
|
MaskAddress = MaskAddress >> 1u;
|
|
uint PrevMask = 0u;
|
|
InterlockedOr(OutPageReceiverMaskMip[MaskAddress], Mask4x4, PrevMask);
|
|
}
|
|
|
|
// Gather a 2x2 footprint given the upper-left coordinate
|
|
uint4 GatherReceiverMask(uint2 TexelCoord)
|
|
{
|
|
return GatherPageTable(InPageReceiverMasks, TexelCoord, 0u, 0.5f);
|
|
}
|
|
|
|
/**
|
|
* One thread per page physical table flag entry
|
|
*/
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void GenerateHierarchicalPageFlags(uint ThreadId : SV_DispatchThreadID)
|
|
{
|
|
// early out any overflowing threads.
|
|
if (ThreadId >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[ThreadId];
|
|
|
|
if (MetaData.Flags == 0U)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Use the group ID to ensure the compiler knows it is scalar / uniform
|
|
FVirtualShadowMapHandle VirtualShadowMapHandle = FVirtualShadowMapHandle::MakeFromId(MetaData.VirtualShadowMapId);
|
|
FVSMPageOffset GlobalPageTableEntryIndex = CalcPageOffset(VirtualShadowMapHandle, MetaData.MipLevel, MetaData.PageAddress);
|
|
|
|
uint Flag = InPageFlags[GlobalPageTableEntryIndex.GetResourceAddress()] & VSM_PAGE_FLAGS_BITS_MASK;
|
|
if (Flag != 0u)
|
|
{
|
|
uint MipLevel = MetaData.MipLevel;
|
|
uint2 PageAddress = MetaData.PageAddress;
|
|
// No hierarchy to propagate to for single-page pages.
|
|
if (VirtualShadowMapHandle.IsSinglePage())
|
|
{
|
|
// Note: we need to set the page rect bounds for the last mip level, since that is the only one that is valid, logically, for a single-page VSM.
|
|
// This is important since this is what filters all the rendering that would otherwise try to draw stuff to the other levels.
|
|
MipLevel = VSM_MAX_MIP_LEVELS - 1U;
|
|
}
|
|
|
|
// Compute the min/max rect of active pages
|
|
uint PageBoundIndex = VirtualShadowMapHandle.GetDataIndex() * VSM_MAX_MIP_LEVELS + MipLevel;
|
|
InterlockedMin(OutAllocatedPageRectBounds[PageBoundIndex].x, PageAddress.x);
|
|
InterlockedMin(OutAllocatedPageRectBounds[PageBoundIndex].y, PageAddress.y);
|
|
InterlockedMax(OutAllocatedPageRectBounds[PageBoundIndex].z, PageAddress.x);
|
|
InterlockedMax(OutAllocatedPageRectBounds[PageBoundIndex].w, PageAddress.y);
|
|
|
|
// Only add to the rendering page rect bounds if there is anything uncached
|
|
if ((Flag & VSM_FLAG_ANY_UNCACHED) != 0)
|
|
{
|
|
InterlockedMin(OutUncachedPageRectBounds[PageBoundIndex].x, PageAddress.x);
|
|
InterlockedMin(OutUncachedPageRectBounds[PageBoundIndex].y, PageAddress.y);
|
|
InterlockedMax(OutUncachedPageRectBounds[PageBoundIndex].z, PageAddress.x);
|
|
InterlockedMax(OutUncachedPageRectBounds[PageBoundIndex].w, PageAddress.y);
|
|
}
|
|
|
|
// No hierarchy to propagate to for single-page pages.
|
|
if (VirtualShadowMapHandle.IsSinglePage())
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Loop over H flag levels, this builds a mip pyramid over _each_ mip level in the page table
|
|
// the 0-th level in this hiearchy is the page table mip level itself.
|
|
uint MaxHLevel = VSM_MAX_MIP_LEVELS - MipLevel;
|
|
|
|
// Manually unrolled to work with "array" of textures,
|
|
// NOTE: returns when done!
|
|
bool bDone = false;
|
|
#define PROCESS_LEVEL(_Level_) \
|
|
if (_Level_ < MaxHLevel && !bDone) { \
|
|
bDone = ProcessMipLevel(OutPageFlagMips_##_Level_, Flag, GlobalPageTableEntryIndex, _Level_ + 1); \
|
|
}
|
|
|
|
PROCESS_LEVEL(0);
|
|
PROCESS_LEVEL(1);
|
|
PROCESS_LEVEL(2);
|
|
PROCESS_LEVEL(3);
|
|
PROCESS_LEVEL(4);
|
|
PROCESS_LEVEL(5);
|
|
|
|
#undef PROCESS_LEVEL
|
|
|
|
BRANCH
|
|
if (VirtualShadowMap.bEnableReceiverMasks)
|
|
{
|
|
// 2x2 masks
|
|
uint2 MaskAddress = GlobalPageTableEntryIndex.GetResourceAddress() * 2u;
|
|
|
|
// 1. load 2x2 mask from level 0
|
|
uint4 Mask8x8 = GatherReceiverMask(MaskAddress);
|
|
// construct 4x4 mask
|
|
uint Mask4x4 = MipMask8x8(Mask8x8);
|
|
// Store back
|
|
MaskAddress = MaskAddress >> 1u;
|
|
uint PreviousMask4x4 = 0u;
|
|
InterlockedOr(OutPageReceiverMaskMips_0[MaskAddress], Mask4x4, PreviousMask4x4);
|
|
// early out if PreviousMask4x4 == Mask4x4? Will it ever?
|
|
|
|
// 2. propagate upwards, at this point it is all self-similar as we shrink a 4x4 -> 2x2 word and store back
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_1, Mask4x4, MaskAddress);
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_2, Mask4x4, MaskAddress);
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_3, Mask4x4, MaskAddress);
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_4, Mask4x4, MaskAddress);
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_5, Mask4x4, MaskAddress);
|
|
ProcessMaskMip4x4(OutPageReceiverMaskMips_6, Mask4x4, MaskAddress);
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // GenerateHierarchicalPageFlags
|
|
|
|
#ifdef PropagateMappedMips
|
|
|
|
|
|
struct FMappedMipPropagator
|
|
{
|
|
void Run(FPerPageDispatchSetup Setup)
|
|
{
|
|
if (Setup.VirtualShadowMapHandle.IsSinglePage())
|
|
{
|
|
return;
|
|
}
|
|
|
|
FVirtualShadowMapProjectionShaderData ProjectionData = GetVirtualShadowMapProjectionData(Setup.VirtualShadowMapHandle);
|
|
|
|
if (ProjectionData.LightType == LIGHT_TYPE_DIRECTIONAL)
|
|
{
|
|
const uint NumLevel0Entries = VSM_LEVEL0_DIM_PAGES_XY * VSM_LEVEL0_DIM_PAGES_XY;
|
|
uint LoopEndXY = Setup.GetLoopEnd(0);
|
|
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
|
|
{
|
|
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
|
|
{
|
|
uint2 Level0Page;
|
|
Level0Page.x = PageX;
|
|
Level0Page.y = PageY;
|
|
// Directional lights propagate pages to their coarser/larger clipmap levels (and only use mip0 pages)
|
|
// Each clipmap level is a separate VSM, so we gather any mapped coarser pages as necessary and write only our own page output
|
|
// There's also technically a race similar to below with other threads writing the PT data we are reading,
|
|
// but it's still deterministic as long as we only look at pages with "bThisLODValid".
|
|
// There's some redundant work of course, but this shader is pretty cheap overall
|
|
|
|
FVSMPageOffset Page0Offset = CalcPageOffset(Setup.VirtualShadowMapHandle, 0, Level0Page);
|
|
FShadowPhysicalPage pPage0 = ShadowDecodePageTable(OutPageTable[Page0Offset.GetResourceAddress()]);
|
|
|
|
BRANCH
|
|
if (!pPage0.bThisLODValid)
|
|
{
|
|
const int OffsetScale = (VSM_LEVEL0_DIM_PAGES_XY >> 2);
|
|
int2 BaseOffset = OffsetScale * ProjectionData.ClipmapCornerRelativeOffset;
|
|
int2 BasePage = int2(Level0Page) - BaseOffset;
|
|
|
|
// Search for first mapped page past this one
|
|
uint RemainingLevels = ProjectionData.ClipmapLevelCountRemaining;
|
|
for (uint ClipmapOffset = 1; ClipmapOffset < RemainingLevels; ++ClipmapOffset)
|
|
{
|
|
FVirtualShadowMapHandle ClipmapLevelHandle = Setup.VirtualShadowMapHandle.MakeOffset(int(ClipmapOffset));
|
|
FVirtualShadowMapProjectionShaderData LevelProjectionData = GetVirtualShadowMapProjectionData(ClipmapLevelHandle);
|
|
int2 LevelOffset = OffsetScale * LevelProjectionData.ClipmapCornerRelativeOffset;
|
|
|
|
int2 LevelPage = (BasePage + (LevelOffset << ClipmapOffset)) >> ClipmapOffset;
|
|
if (IsVirtualShadowMapPageAddressValid(LevelPage, 0))
|
|
{
|
|
FVSMPageOffset LevelPageOffset = CalcPageOffset(ClipmapLevelHandle, 0, uint2(LevelPage));
|
|
FShadowPhysicalPage pPage = ShadowDecodePageTable(OutPageTable[LevelPageOffset.GetResourceAddress()]);
|
|
if (pPage.bThisLODValid)
|
|
{
|
|
OutPageTable[Page0Offset.GetResourceAddress()] = ShadowEncodePageTable(pPage.PhysicalAddress, ClipmapOffset);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// TODO: We're off the edge... can this ever even happen in practice given the construction?
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int MinLevel = Setup.MipLevelStart;
|
|
uint LoopEndXY = Setup.GetLoopEnd(MinLevel);
|
|
for (uint PageY = Setup.LoopStart.y; PageY < LoopEndXY; PageY += Setup.LoopStride)
|
|
{
|
|
for (uint PageX = Setup.LoopStart.x; PageX < LoopEndXY; PageX += Setup.LoopStride)
|
|
{
|
|
uint2 MinLevelPage;
|
|
MinLevelPage.x = PageX;
|
|
MinLevelPage.y = PageY;
|
|
|
|
// Local lights propagate pages to their coarser mips
|
|
int MappedPageLevel = -1;
|
|
uint2 MappedPhysicalAddress = 0;
|
|
|
|
for (int Level = (VSM_MAX_MIP_LEVELS - 1); Level >= MinLevel; --Level)
|
|
{
|
|
int LevelDelta = Level - MinLevel;
|
|
uint2 vPage = MinLevelPage >> LevelDelta;
|
|
FVSMPageOffset PageOffset = CalcPageOffset(Setup.VirtualShadowMapHandle, Level, vPage);
|
|
FShadowPhysicalPage pPage = ShadowDecodePageTable(OutPageTable[PageOffset.GetResourceAddress()]);
|
|
|
|
BRANCH
|
|
if (pPage.bThisLODValid)
|
|
{
|
|
// This page is mapped, so leave it alone and propagate downwards
|
|
MappedPageLevel = Level;
|
|
MappedPhysicalAddress = pPage.PhysicalAddress;
|
|
}
|
|
else if( MappedPageLevel >= 0 )
|
|
{
|
|
// This page is not mapped; replace it with our suitably offset parent mapped page
|
|
// Ensure only one thread writes each value to avoid races, but we read on all threads as the broadcast
|
|
// Note that this can race with the other threads reading this value, but since bThisLODValid will
|
|
// always be false on these updated pages the values will be ignored. As long as the writes to the page
|
|
// table are atomic (currently a single DWORD), this is safe.
|
|
if (all((vPage << LevelDelta) == MinLevelPage))
|
|
{
|
|
uint MipOffset = MappedPageLevel - Level;
|
|
OutPageTable[PageOffset.GetResourceAddress()] = ShadowEncodePageTable(MappedPhysicalAddress, MipOffset);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* One thread per page in level 0, launched as 1d groups, with 2D grid with Y dim == NumFullShadowMaps.
|
|
* This is effectively just a big broadcast operation. There are more efficient ways to do this with
|
|
* fewer threads and wave ops, but given the page counts just relying on memory coalescing is
|
|
* good enough for now.
|
|
*/
|
|
[numthreads(PER_PAGE_THREAD_GROUP_SIZE_XY, PER_PAGE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void PropagateMappedMips(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
FPerPageDispatchSetup Setup;
|
|
FMappedMipPropagator Propagator;
|
|
Setup.Execute(DispatchThreadId, Propagator);
|
|
}
|
|
|
|
#endif // PropagateMappedMips
|