// Copyright Epic Games, Inc. All Rights Reserved. #include "VirtualShadowMapCacheManager.h" #include "VirtualShadowMapClipmap.h" #include "VirtualShadowMapShaders.h" #include "RendererModule.h" #include "RenderGraphUtils.h" #include "RHIGPUReadback.h" #include "ScenePrivate.h" #include "HAL/FileManager.h" #include "NaniteDefinitions.h" #include "DataDrivenShaderPlatformInfo.h" #include "PrimitiveSceneInfo.h" #include "ShaderPrint.h" #include "RendererOnScreenNotification.h" #include "SystemTextures.h" #include "Shadows/ShadowScene.h" #include "ProfilingDebugging/CountersTrace.h" #define LOCTEXT_NAMESPACE "VirtualShadowMapCacheManager" CSV_DECLARE_CATEGORY_EXTERN(VSM); UE_TRACE_CHANNEL_EXTERN(VSMChannel); static int32 GVSMAccumulateStats = 0; static FAutoConsoleVariableRef CVarAccumulateStats( TEXT("r.Shadow.Virtual.AccumulateStats"), GVSMAccumulateStats, TEXT("When nonzero, VSM stats will be collected over multiple frames and written to a CSV file output to the Saved/Profiling directory.\n") TEXT(" If set to a number N > 0 it will auto disable and write the result after N frames, if < 0 it must be manually turned off by setting back to 0."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarCacheVirtualSMs( TEXT("r.Shadow.Virtual.Cache"), 1, TEXT("Turn on to enable caching"), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarDrawInvalidatingBounds( TEXT("r.Shadow.Virtual.Cache.DrawInvalidatingBounds"), 0, TEXT("Turn on debug render cache invalidating instance bounds, heat mapped by number of pages invalidated.\n") TEXT(" 1 = Draw all bounds.\n") TEXT(" 2 = Draw those invalidating static cached pages only\n") TEXT(" 3 = Draw those invalidating dynamic cached pages only"), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarCacheVsmUseHzb( TEXT("r.Shadow.Virtual.Cache.InvalidateUseHZB"), 1, TEXT(" When enabled, instances invalidations are tested against the HZB. Instances that are fully occluded will not cause page invalidations."), ECVF_RenderThreadSafe); int32 GClipmapPanning = 1; FAutoConsoleVariableRef CVarEnableClipmapPanning( TEXT("r.Shadow.Virtual.Cache.ClipmapPanning"), GClipmapPanning, TEXT("Enable support for panning cached clipmap pages for directional lights, allowing re-use of cached data when the camera moves. Keep this enabled outside of debugging."), ECVF_RenderThreadSafe ); static int32 GVSMCacheDeformableMeshesInvalidate = 1; FAutoConsoleVariableRef CVarCacheDeformableMeshesInvalidate( TEXT("r.Shadow.Virtual.Cache.DeformableMeshesInvalidate"), GVSMCacheDeformableMeshesInvalidate, TEXT("If enabled, Primitive Proxies that are marked as having deformable meshes (HasDeformableMesh() == true) cause invalidations regardless of whether their transforms are updated."), ECVF_RenderThreadSafe); static int32 GVSMCacheDebugSkipRevealedPrimitivesInvalidate = 0; FAutoConsoleVariableRef CVarCacheDebugSkipRevealedPrimitivesInvalidate( TEXT("r.Shadow.Virtual.Cache.DebugSkipRevealedPrimitivesInvalidation"), GVSMCacheDebugSkipRevealedPrimitivesInvalidate, TEXT("Debug skip invalidation of revealed Non-Nanite primitives, i.e. they go from being culled on the CPU to unculled."), ECVF_RenderThreadSafe); // NOTE: At this point it should be fairly safe and minimal performance impact to have this // "functionally unlimited", but we'll leave the default somewhat lower as a small mitigation // for unforeseen issues. int32 GVSMMaxPageAgeSinceLastRequest = 1000; FAutoConsoleVariableRef CVarVSMMaxPageAgeSinceLastRequest( TEXT("r.Shadow.Virtual.Cache.MaxPageAgeSinceLastRequest"), GVSMMaxPageAgeSinceLastRequest, TEXT("The maximum number of frames to allow cached pages that aren't requested in the current frame to live. 0=disabled."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarMaxLightAgeSinceLastRequest( TEXT("r.Shadow.Virtual.Cache.MaxLightAgeSinceLastRequest"), 10, TEXT("The maximum number of frames to allow lights (and their associated pages) that aren't present in the current frame to live in the cache.\n") TEXT("Larger values can allow pages from offscreen local lights to live longer, but can also increase various page table management overheads."), ECVF_RenderThreadSafe); static TAutoConsoleVariable CVarFramesStaticThreshold( TEXT("r.Shadow.Virtual.Cache.FramesStaticThreshold"), 100, TEXT("Number of frames without an invalidation before an object will transition to static caching."), ECVF_RenderThreadSafe); static TAutoConsoleVariable CVarVSMReservedResource( TEXT("r.Shadow.Virtual.AllocatePagePoolAsReservedResource"), 1, TEXT("Allocate VSM page pool as a reserved/virtual texture, backed by N small physical memory allocations to reduce fragmentation."), ECVF_RenderThreadSafe ); TAutoConsoleVariable CVarVSMDynamicResolutionMaxLodBias( TEXT("r.Shadow.Virtual.DynamicRes.MaxResolutionLodBias"), 2.0f, TEXT("As memory or compute-time cost limits are approached, VSM resolution ramps down by biasing the LOD up, similar to 'ResolutionLodBiasDirectional'.\n") TEXT("Memory pressure is based on page allocation compared to pool capacity.\n") TEXT("This is the maximum LOD bias to clamp to for global dynamic shadow resolution reduction. 0 = disabled"), ECVF_RenderThreadSafe ); TAutoConsoleVariable CVarVSMDynamicResolutionMaxLodBiasDirectional( TEXT("r.Shadow.Virtual.DynamicRes.MaxComputeResolutionLodBiasDirectional"), 99999.0f, TEXT("As compute-time cost limits are approached, VSM resolution ramps down by biasing the LOD up.\n") TEXT("The maximum LOD bias that is applied is min(this variable, MaxResolutionLodBias).\n") TEXT("This variable applies to directional lights only."), ECVF_RenderThreadSafe ); TAutoConsoleVariable CVarVSMDynamicResolutionMaxLodBiasLocal( TEXT("r.Shadow.Virtual.DynamicRes.MaxComputeResolutionLodBiasLocal"), 99999.0f, TEXT("As compute-time cost limits are approached, VSM resolution ramps down by biasing the LOD up.\n") TEXT("The maximum LOD bias that is applied is min(this variable, MaxResolutionLodBias).\n") TEXT("This variable applies to local lights only."), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarVSMDynamicResolutionMaxPagePoolLoadFactor( TEXT("r.Shadow.Virtual.DynamicRes.MaxPagePoolLoadFactor"), 0.85f, TEXT("If allocation exceeds this factor of total page pool capacity, shadow resolution will be biased downwards. 0 = disabled"), ECVF_RenderThreadSafe ); static TAutoConsoleVariable CVarClipmapWPODisableDistanceInvalidate( TEXT("r.Shadow.Virtual.Clipmap.WPODisableDistance.InvalidateOnScaleChange"), 0, TEXT("When enabled, forces an invalidation of clipmap levels when the distance disable clipmap level shifts due to resolution or FOV changes.\n") TEXT("This can sometimes make things visually more consistent but can also introduce unpredictable performance spikes, so it is disabled by default."), ECVF_Scalability | ECVF_RenderThreadSafe ); int32 GVSMLightRadiusInvalidationCulling = 1; FAutoConsoleVariableRef CVarVSMLightRadiusCulling( TEXT("r.Shadow.Virtual.Cache.CPUCullInvalidationsOutsideLightRadius"), GVSMLightRadiusInvalidationCulling, TEXT("CPU culls invalidations that are outside a local light's radius."), ECVF_RenderThreadSafe ); int32 GVSMAllowScreenOverflowMessages = 1; FAutoConsoleVariableRef CVarVSMAllowScreenOverflowMessages( TEXT("r.Shadow.Virtual.AllowScreenOverflowMessages"), GVSMAllowScreenOverflowMessages, TEXT("Can be used to disable on-screen VSM-related overflow messages. Even with the messages disabled, overflows will still be logged and can result in visual corruption."), ECVF_RenderThreadSafe ); static const char *VirtualShadowMap_StatNames[] = { "REQUESTED_THIS_FRAME_PAGES", "STATIC_CACHED_PAGES", "STATIC_INVALIDATED_PAGES", "DYNAMIC_CACHED_PAGES", "DYNAMIC_INVALIDATED_PAGES", "EMPTY_PAGES", "NON_NANITE_INSTANCES_TOTAL", "NON_NANITE_INSTANCES_DRAWN", "NON_NANITE_INSTANCES_HZB_CULLED", "NON_NANITE_INSTANCES_PAGE_MASK_CULLED", "NON_NANITE_INSTANCES_EMPTY_RECT_CULLED", "NON_NANITE_INSTANCES_FRUSTUM_CULLED", "NUM_PAGES_TO_MERGE", "NUM_PAGES_TO_CLEAR", "NUM_HZB_PAGES_BUILT", "ALLOCATED_NEW", "NANITE_CLUSTERS_HW", "NANITE_CLUSTERS_SW", "NANITE_TRIANGLES", "NANITE_INSTANCES_MAIN", "NANITE_INSTANCES_POST", "WPO_CONSIDERED_PAGES", "OVERFLOW_FLAGS", "TMP_1", "TMP_2", "TMP_3", }; static_assert(UE_ARRAY_COUNT(VirtualShadowMap_StatNames) == VSM_STAT_NUM, "Stat text name array length mismatch!"); namespace Nanite { extern bool IsStatFilterActive(const FString& FilterName); } void FVirtualShadowMapCacheEntry::UpdateClipmapLevel( FVirtualShadowMapArray& VirtualShadowMapArray, const FVirtualShadowMapPerLightCacheEntry& PerLightEntry, int32 VirtualShadowMapId, FInt64Point PageSpaceLocation, double LevelRadius, double ViewCenterZ, double ViewRadiusZ, double WPODistanceDisableThresholdSquared) { int32 PrevVirtualShadowMapId = CurrentVirtualShadowMapId; FInt64Point PrevPageSpaceLocation = Clipmap.PageSpaceLocation; UpdatePrevHZBMetadata(); bool bCacheValid = (PrevVirtualShadowMapId != INDEX_NONE); if (bCacheValid && GClipmapPanning == 0) { if (PageSpaceLocation.X != PrevPageSpaceLocation.X || PageSpaceLocation.Y != PrevPageSpaceLocation.Y) { bCacheValid = false; //UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap level (VSM %d) with page space location %d,%d (Prev %d, %d)"), // VirtualShadowMapId, PageSpaceLocation.X, PageSpaceLocation.Y, PrevPageSpaceLocation.X, PrevPageSpaceLocation.Y); } } // Invalidate if the new Z radius strayed too close/outside the guardband of the cached shadow map if (bCacheValid) { double DeltaZ = FMath::Abs(ViewCenterZ - Clipmap.ViewCenterZ); if ((DeltaZ + LevelRadius) > 0.9 * Clipmap.ViewRadiusZ) { bCacheValid = false; //UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap level (VSM %d) due to depth range movement"), VirtualShadowMapId); } } // Not valid if it was never rendered bCacheValid = bCacheValid && (PerLightEntry.Prev.RenderedFrameNumber >= 0); // Not valid if radius has changed bCacheValid = bCacheValid && (ViewRadiusZ == Clipmap.ViewRadiusZ); // Not valid if WPO threshold has changed if (bCacheValid && CVarClipmapWPODisableDistanceInvalidate.GetValueOnRenderThread() != 0 && (WPODistanceDisableThresholdSquared != Clipmap.WPODistanceDisableThresholdSquared)) { bCacheValid = false; // Only warn once per change... when this changes it will hit all of them if (PerLightEntry.ShadowMapEntries.Num() > 0 && PerLightEntry.ShadowMapEntries[0].CurrentVirtualShadowMapId == VirtualShadowMapId) { UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap due to WPO threshold change. This can occur due to resolution or FOV changes."), VirtualShadowMapId); } } if (!bCacheValid) { Clipmap.ViewCenterZ = ViewCenterZ; Clipmap.ViewRadiusZ = ViewRadiusZ; Clipmap.WPODistanceDisableThresholdSquared = WPODistanceDisableThresholdSquared; } else { // NOTE: Leave the view center and radius where they were previously for the cached page FInt64Point CurrentToPreviousPageOffset(PageSpaceLocation - PrevPageSpaceLocation); VirtualShadowMapArray.UpdateNextData(PrevVirtualShadowMapId, VirtualShadowMapId, FInt32Point(CurrentToPreviousPageOffset)); } CurrentVirtualShadowMapId = VirtualShadowMapId; Clipmap.PageSpaceLocation = PageSpaceLocation; } void FVirtualShadowMapCacheEntry::Update( FVirtualShadowMapArray& VirtualShadowMapArray, const FVirtualShadowMapPerLightCacheEntry& PerLightEntry, int32 VirtualShadowMapId) { // Swap previous frame data over. int32 PrevVirtualShadowMapId = CurrentVirtualShadowMapId; UpdatePrevHZBMetadata(); bool bCacheValid = (PrevVirtualShadowMapId != INDEX_NONE); // Not valid if it was never rendered bCacheValid = bCacheValid && (PerLightEntry.Prev.RenderedFrameNumber >= 0); if (bCacheValid) { // Invalidate on transition between single page and full bool bPrevSinglePage = FVirtualShadowMapArray::IsSinglePage(PrevVirtualShadowMapId); bool bCurrentSinglePage = FVirtualShadowMapArray::IsSinglePage(VirtualShadowMapId); if (bPrevSinglePage != bCurrentSinglePage) { bCacheValid = false; } } if (bCacheValid) { // Update previous/next frame mapping if we have a valid cached shadow map VirtualShadowMapArray.UpdateNextData(PrevVirtualShadowMapId, VirtualShadowMapId); } CurrentVirtualShadowMapId = VirtualShadowMapId; // Current HZB metadata gets updated during rendering } void FVirtualShadowMapCacheEntry::SetHZBViewParams(Nanite::FPackedViewParams& OutParams) { OutParams.PrevTargetLayerIndex = PrevHZBMetadata.TargetLayerIndex; OutParams.PrevViewMatrices = PrevHZBMetadata.ViewMatrices; OutParams.Flags |= NANITE_VIEW_FLAG_HZBTEST; } void FVirtualShadowMapCacheEntry::UpdateHZBMetadata(const FViewMatrices& ViewMatrices, const FIntRect& ViewRect, uint32 TargetLayerIndex) { CurrentHZBMetadata.ViewRect = ViewRect; CurrentHZBMetadata.TargetLayerIndex = TargetLayerIndex; // As this structure has grown a lot, we try to avoid updating it if we don't have to // For shadows, this should be a reasonable test of the relevant parameters that could change // TODO: This test is very similar to the tests we do for whether caching is valid; could probably share the two better with some refactoring CurrentHZBMetadata.bMatricesDirty = ViewMatrices.GetPreViewTranslation() != CurrentHZBMetadata.ViewMatrices.GetPreViewTranslation() || ViewMatrices.GetTranslatedViewProjectionMatrix() != CurrentHZBMetadata.ViewMatrices.GetTranslatedViewProjectionMatrix(); if (CurrentHZBMetadata.bMatricesDirty) { CurrentHZBMetadata.ViewMatrices = ViewMatrices; } } void FVirtualShadowMapPerLightCacheEntry::UpdateClipmap( const FVector& LightDirection, int FirstLevel, bool bForceInvalidate, bool bInUseReceiverMask) { Prev.RenderedFrameNumber = FMath::Max(Prev.RenderedFrameNumber, Current.RenderedFrameNumber); Current.RenderedFrameNumber = -1; if (bForceInvalidate || LightDirection != ClipmapCacheKey.LightDirection || FirstLevel != ClipmapCacheKey.FirstLevel) { Prev.RenderedFrameNumber = -1; } ClipmapCacheKey.LightDirection = LightDirection; ClipmapCacheKey.FirstLevel = FirstLevel; // If we swap receiver mask on/off we also may need to invalidate due to incomplete pages if (bInUseReceiverMask != bUseReceiverMask) { Prev.RenderedFrameNumber = -1; bUseReceiverMask = bInUseReceiverMask; } // If the cache was invalidated for any reason (light movement, etc), we render the next frame as // uncached as this is more efficient. Thus continuously moving lights will automatically take the // uncached path always without needing to explicitly set ForceInvalidateDirectional. After one static // frame though we will swap back so that we can begin establishing static cache data. Thus it is still // useful to explicitly set ForceInvalidateDirectional in cases where the light is invalidating frequently // but not every single frame to keep the performance consistent. bool bNewIsUncached = Prev.RenderedFrameNumber < 0; // On transition between uncached <-> cached we must invalidate since the static pages may not be initialized if (bNewIsUncached != bIsUncached) { Prev.RenderedFrameNumber = -1; bIsUncached = bNewIsUncached; } LightOrigin = FVector(0, 0, 0); LightRadius = -1.0f; } void FVirtualShadowMapPerLightCacheEntry::UpdateLocal( const FProjectedShadowInitializer& InCacheKey, const FVector& NewLightOrigin, const float NewLightRadius, bool bNewIsDistantLight, bool bForceInvalidate, bool bAllowInvalidation, bool bInUseReceiverMask) { // TODO: The logic in this function is needlessly convoluted... clean up Prev.RenderedFrameNumber = FMath::Max(Prev.RenderedFrameNumber, Current.RenderedFrameNumber); Prev.ScheduledFrameNumber = FMath::Max(Prev.ScheduledFrameNumber, Current.ScheduledFrameNumber); const bool bLightMoved = LocalCacheKey.PreShadowTranslation != InCacheKey.PreShadowTranslation || LocalCacheKey.WorldToLight != InCacheKey.WorldToLight; LocalCacheKey.PreShadowTranslation = InCacheKey.PreShadowTranslation; LocalCacheKey.WorldToLight = InCacheKey.WorldToLight; // Check cache validity based of shadow setup // If it is a distant light, we want to let the time-share perform the invalidation. if (bForceInvalidate || (bAllowInvalidation && bLightMoved)) { Prev.RenderedFrameNumber = -1; //UE_LOG(LogRenderer, Display, TEXT("Invalidated!")); } // If we swap receiver mask on/off we also may need to invalidate due to incomplete pages if (bInUseReceiverMask != bUseReceiverMask) { Prev.RenderedFrameNumber = -1; bUseReceiverMask = bInUseReceiverMask; } // On transition between uncached <-> cached we must invalidate since the static pages may not be initialized bool bNewIsUncached = Prev.RenderedFrameNumber < 0; if (bNewIsUncached != bIsUncached) { Prev.RenderedFrameNumber = -1; } // On transition between distant <-> regular we must invalidate if (bNewIsDistantLight != bIsDistantLight) { Prev.RenderedFrameNumber = -1; } Current.RenderedFrameNumber = -1; Current.ScheduledFrameNumber = -1; bIsDistantLight = bNewIsDistantLight; bIsUncached = bNewIsUncached; LightOrigin = NewLightOrigin; LightRadius = NewLightRadius; } void FVirtualShadowMapArrayCacheManager::FShadowInvalidatingInstancesImplementation::AddPrimitive(const FPrimitiveSceneInfo* PrimitiveSceneInfo) { AddInstanceRange(PrimitiveSceneInfo->GetPersistentIndex(), PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetNumInstanceSceneDataEntries()); } void FVirtualShadowMapArrayCacheManager::FShadowInvalidatingInstancesImplementation::AddInstanceRange(FPersistentPrimitiveIndex PersistentPrimitiveIndex, uint32 InstanceSceneDataOffset, uint32 NumInstanceSceneDataEntries) { PrimitiveInstancesToInvalidate.Add(FVirtualShadowMapInstanceRange{ PersistentPrimitiveIndex, int32(InstanceSceneDataOffset), int32(NumInstanceSceneDataEntries), true}); } static uint32 EncodeInstanceInvalidationPayload(int32 VirtualShadowMapId, uint32 Flags = VSM_INVALIDATION_PAYLOAD_FLAG_NONE) { check(VirtualShadowMapId >= 0); // Should not be INDEX_NONE by this point uint32 Payload = Flags; Payload = Payload | (uint32(VirtualShadowMapId) << VSM_INVALIDATION_PAYLOAD_FLAG_BITS); return Payload; } FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::FInvalidatingPrimitiveCollector( FVirtualShadowMapArrayCacheManager* InCacheManager) : Scene(InCacheManager->Scene) , Manager(*InCacheManager) { uint32 Num = Manager.CachePrimitiveAsDynamic.Num(); InvalidatedPrimitives.SetNum(Num, false); RemovedPrimitives.SetNum(Num, false); } void FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::AddPrimitivesToInvalidate() { const TBitArray<>& ManagerCachePrimitiveAsDynamic = Manager.CachePrimitiveAsDynamic; for (auto& CacheEntryIt : Manager.CacheEntries) { FVirtualShadowMapPerLightCacheEntry& CacheEntry = *CacheEntryIt.Value; // Global invalidations for (const FVirtualShadowMapInstanceRange& Range : Manager.ShadowInvalidatingInstancesImplementation.PrimitiveInstancesToInvalidate) { // If something doesn't have a valid persistent primitive index, it is treated as a dynamic primitive // See GetCachePrimitiveAsDynamic bool bCachePrimativeAsDynamic = true; if (Range.PersistentPrimitiveIndex.IsValid()) { bCachePrimativeAsDynamic = ManagerCachePrimitiveAsDynamic[Range.PersistentPrimitiveIndex.Index]; if (Range.bMarkAsDynamic) { InvalidatedPrimitives[Range.PersistentPrimitiveIndex.Index] = true; } } AddInvalidation( CacheEntry, Range.InstanceSceneDataOffset, Range.NumInstanceSceneDataEntries, bCachePrimativeAsDynamic); } // Per-light invalidations for (const FVirtualShadowMapInstanceRange& Range : CacheEntry.PrimitiveInstancesToInvalidate) { check(Range.PersistentPrimitiveIndex.IsValid()); // Should always be valid currently in this path bool bCachePrimativeAsDynamic = true; if (Range.PersistentPrimitiveIndex.IsValid()) { bCachePrimativeAsDynamic = ManagerCachePrimitiveAsDynamic[Range.PersistentPrimitiveIndex.Index]; if (Range.bMarkAsDynamic) { InvalidatedPrimitives[Range.PersistentPrimitiveIndex.Index] = true; } } AddInvalidation( CacheEntry, Range.InstanceSceneDataOffset, Range.NumInstanceSceneDataEntries, bCachePrimativeAsDynamic); } CacheEntry.PrimitiveInstancesToInvalidate.Reset(); } Manager.ShadowInvalidatingInstancesImplementation.PrimitiveInstancesToInvalidate.Reset(); } void FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::AddInvalidation( FPrimitiveSceneInfo * PrimitiveSceneInfo, EInvalidationCause InvalidationCause) { const int32 PrimitiveID = PrimitiveSceneInfo->GetIndex(); const int32 InstanceSceneDataOffset = PrimitiveSceneInfo->GetInstanceSceneDataOffset(); if (PrimitiveID < 0 || InstanceSceneDataOffset == INDEX_NONE) { return; } const FPrimitiveFlagsCompact PrimitiveFlagsCompact = Scene.PrimitiveFlagsCompact[PrimitiveID]; if (!PrimitiveFlagsCompact.bCastDynamicShadow) { return; } const FPersistentPrimitiveIndex PersistentPrimitiveIndex = PrimitiveSceneInfo->GetPersistentIndex(); if (InvalidationCause == EInvalidationCause::Removed) { RemovedPrimitives[PersistentPrimitiveIndex.Index] = true; InvalidatedPrimitives[PersistentPrimitiveIndex.Index] = true; } else if (InvalidationCause == EInvalidationCause::Updated) { // Suppress invalidations from moved primitives that are marked to behave as if they were static. if (PrimitiveSceneInfo->Proxy->GetShadowCacheInvalidationBehavior() == EShadowCacheInvalidationBehavior::Static) { return; } InvalidatedPrimitives[PersistentPrimitiveIndex.Index] = true; } else if (InvalidationCause == EInvalidationCause::Added) { // Skip marking as dynamic if it is a static mesh (mobility is static & no WPO) or it is forced to behave as static // this avoids needing to re-cache all static meshes. if (PrimitiveSceneInfo->Proxy->IsMeshShapeOftenMoving() && PrimitiveSceneInfo->Proxy->GetShadowCacheInvalidationBehavior() != EShadowCacheInvalidationBehavior::Static) { InvalidatedPrimitives[PersistentPrimitiveIndex.Index] = true; } } const int32 NumInstanceSceneDataEntries = PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(); const FBoxSphereBounds PrimitiveBounds = PrimitiveSceneInfo->Proxy->GetBounds(); const bool bCachePrimativeAsDynamic = Manager.CachePrimitiveAsDynamic[PersistentPrimitiveIndex.Index]; for (auto& CacheEntry : Manager.CacheEntries) { AddInvalidation( *CacheEntry.Value, InstanceSceneDataOffset, NumInstanceSceneDataEntries, bCachePrimativeAsDynamic, (GVSMLightRadiusInvalidationCulling != 0), PrimitiveBounds); } } void FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::AddInvalidation( const FVirtualShadowMapPerLightCacheEntry& CacheEntry, int32 InstanceSceneDataOffset, int32 NumInstanceSceneDataEntries, bool bCachePrimitiveAsDynamic, bool bLightRadiusCulling, const FBoxSphereBounds& PrimitiveBounds) { // We don't need explicit invalidations for force invalidated/uncached lights if (CacheEntry.IsUncached()) { return; } // We don't need explicit dynamic invalidations when using receiver mask if (bCachePrimitiveAsDynamic && CacheEntry.ShouldUseReceiverMask()) { return; } // Quick bounds overlap check to eliminate stuff that is too far away to affect a light if (bLightRadiusCulling && !CacheEntry.AffectsBounds(PrimitiveBounds)) { return; } // Add item for each shadow map explicitly, inflates host data but improves load balancing, for (const auto& SmCacheEntry : CacheEntry.ShadowMapEntries) { Instances.Add(InstanceSceneDataOffset, NumInstanceSceneDataEntries, EncodeInstanceInvalidationPayload(SmCacheEntry.CurrentVirtualShadowMapId)); } } FVirtualShadowMapFeedback::FVirtualShadowMapFeedback() { for (int32 i = 0; i < MaxBuffers; ++i) { Buffers[i].Buffer = new FRHIGPUBufferReadback(TEXT("Shadow.Virtual.Readback")); Buffers[i].Size = 0; } } FVirtualShadowMapFeedback::~FVirtualShadowMapFeedback() { for (int32 i = 0; i < MaxBuffers; ++i) { delete Buffers[i].Buffer; Buffers[i].Buffer = nullptr; Buffers[i].Size = 0; } } void FVirtualShadowMapFeedback::SubmitFeedbackBuffer( FRDGBuilder& GraphBuilder, FRDGBufferRef FeedbackBuffer) { // Source copy usage is required for readback check((FeedbackBuffer->Desc.Usage & EBufferUsageFlags::SourceCopy) == EBufferUsageFlags::SourceCopy); if (NumPending == MaxBuffers) { return; } FRHIGPUBufferReadback* ReadbackBuffer = Buffers[WriteIndex].Buffer; Buffers[WriteIndex].Size = FeedbackBuffer->Desc.GetSize(); AddReadbackBufferPass(GraphBuilder, RDG_EVENT_NAME("Readback"), FeedbackBuffer, [ReadbackBuffer, FeedbackBuffer](FRDGAsyncTask, FRHICommandList& RHICmdList) { ReadbackBuffer->EnqueueCopy(RHICmdList, FeedbackBuffer->GetRHI(), 0u); }); WriteIndex = (WriteIndex + 1) % MaxBuffers; NumPending = FMath::Min(NumPending + 1, MaxBuffers); } FVirtualShadowMapFeedback::FReadbackInfo FVirtualShadowMapFeedback::GetLatestReadbackBuffer() { int32 LatestBufferIndex = -1; // Find latest buffer that is ready while (NumPending > 0) { uint32 Index = (WriteIndex + MaxBuffers - NumPending) % MaxBuffers; if (Buffers[Index].Buffer->IsReady()) { --NumPending; LatestBufferIndex = Index; } else { break; } } return LatestBufferIndex >= 0 ? Buffers[LatestBufferIndex] : FReadbackInfo(); } IMPLEMENT_SCENE_EXTENSION(FVirtualShadowMapArrayCacheManager); bool FVirtualShadowMapArrayCacheManager::ShouldCreateExtension(FScene& Scene) { return DoesPlatformSupportVirtualShadowMaps(GetFeatureLevelShaderPlatform(Scene.GetFeatureLevel())); } ISceneExtensionUpdater* FVirtualShadowMapArrayCacheManager::CreateUpdater() { // NOTE: We need this check because shader platform can change during scene destruction so we need to ensure we // don't try and run shaders on a new platform that doesn't support VSMs... if (UseVirtualShadowMaps(Scene.GetShaderPlatform(), Scene.GetFeatureLevel())) { return new FVirtualShadowMapInvalidationSceneUpdater(*this); } return nullptr; } FVirtualShadowMapArrayCacheManager::FVirtualShadowMapArrayCacheManager(FScene& InScene) : ISceneExtension(InScene) , ShadowInvalidatingInstancesImplementation(*this) { #if !UE_BUILD_SHIPPING LastOverflowTimes.Init(-10.0f, VSM_STAT_OVERFLOW_FLAG_NUM); #endif } void FVirtualShadowMapArrayCacheManager::InitExtension(FScene& InScene) { // Handle message with status sent back from GPU StatusFeedbackSocket = GPUMessage::RegisterHandler(TEXT("Shadow.Virtual.StatusFeedback"), [this](GPUMessage::FReader Message) { int32 MessageType = Message.Read(); if(MessageType == VSM_STATUS_MSG_PAGE_MANAGEMENT) { // Goes negative on underflow int32 LastFreePhysicalPages = Message.Read(0); const float LastGlobalResolutionLodBias = FMath::AsFloat(Message.Read(0U)); CSV_CUSTOM_STAT(VSM, FreePages, LastFreePhysicalPages, ECsvCustomStatOp::Set); // Dynamic resolution { // Could be cvars if needed, but not clearly something that needs to be tweaked currently // NOTE: Should react more quickly when reducing resolution than when increasing again // TODO: Possibly something smarter/PID-like rather than simple exponential decay const float ResolutionDownExpLerpFactor = 0.5f; const float ResolutionUpExpLerpFactor = 0.1f; const uint32 FramesBeforeResolutionUp = 10; const float MaxPageAllocation = CVarVSMDynamicResolutionMaxPagePoolLoadFactor.GetValueOnRenderThread(); const float MaxLodBias = CVarVSMDynamicResolutionMaxLodBias.GetValueOnRenderThread(); if (MaxPageAllocation > 0.0f) { const uint32 SceneFrameNumber = Scene.GetFrameNumberRenderThread(); // Dynamically bias shadow resolution when we get too near the maximum pool capacity // NB: In a perfect world each +1 of resolution bias will drop the allocation in half float CurrentAllocation = 1.0f - (LastFreePhysicalPages / static_cast(MaxPhysicalPages)); float AllocationRatio = CurrentAllocation / MaxPageAllocation; float TargetLodBias = FMath::Max(0.0f, LastGlobalResolutionLodBias + FMath::Log2(AllocationRatio)); if (CurrentAllocation <= MaxPageAllocation && (SceneFrameNumber - LastFrameOverPageAllocationBudget) > FramesBeforeResolutionUp) { GlobalResolutionLodBias = FMath::Lerp(GlobalResolutionLodBias, TargetLodBias, ResolutionUpExpLerpFactor); } else if (CurrentAllocation > MaxPageAllocation) { LastFrameOverPageAllocationBudget = SceneFrameNumber; GlobalResolutionLodBias = FMath::Lerp(GlobalResolutionLodBias, TargetLodBias, ResolutionDownExpLerpFactor); } } GlobalResolutionLodBias = FMath::Clamp(GlobalResolutionLodBias, 0.0f, MaxLodBias); } #if !UE_BUILD_SHIPPING if (LastFreePhysicalPages < 0) { uint32 PagePoolOverflowTypeIndex = (uint32)FMath::Log2((double)VSM_STAT_OVERFLOW_FLAG_PAGE_POOL); LastOverflowTimes[PagePoolOverflowTypeIndex] = float(FGameTime::GetTimeSinceAppStart().GetRealTimeSeconds()); if ((LoggedOverflowFlags & VSM_STAT_OVERFLOW_FLAG_PAGE_POOL) == 0) { static const auto* CVarResolutionLodBiasLocalPtr = IConsoleManager::Get().FindTConsoleVariableDataFloat(TEXT("r.Shadow.Virtual.ResolutionLodBiasLocal")); static const auto* CVarResolutionLodBiasDirectionalPtr = IConsoleManager::Get().FindTConsoleVariableDataFloat(TEXT("r.Shadow.Virtual.ResolutionLodBiasDirectional")); UE_LOG(LogRenderer, Warning, TEXT("Virtual Shadow Map Page Pool overflow (%d page allocations were not served), this will produce visual artifacts (missing shadow), increase the page pool limit or reduce resolution bias to avoid.\n") TEXT(" See r.Shadow.Virtual.MaxPhysicalPages (%d), r.Shadow.Virtual.ResolutionLodBiasLocal (%.2f), r.Shadow.Virtual.ResolutionLodBiasDirectional (%.2f), Global Resolution Lod Bias (%.2f)"), -LastFreePhysicalPages, MaxPhysicalPages, CVarResolutionLodBiasLocalPtr->GetValueOnRenderThread(), CVarResolutionLodBiasDirectionalPtr->GetValueOnRenderThread(), GlobalResolutionLodBias); LoggedOverflowFlags |= VSM_STAT_OVERFLOW_FLAG_PAGE_POOL; } } else { LoggedOverflowFlags &= ~VSM_STAT_OVERFLOW_FLAG_PAGE_POOL; } #endif } else if(MessageType == VSM_STATUS_MSG_OVERFLOW) { #if !UE_BUILD_SHIPPING uint32 OverflowFlags = Message.Read(); if (OverflowFlags) { float CurrentTime = float(FGameTime::GetTimeSinceAppStart().GetRealTimeSeconds()); for (uint32 OverflowTypeIndex = 0; OverflowTypeIndex < VSM_STAT_OVERFLOW_FLAG_NUM; OverflowTypeIndex++) { uint32 OverflowTypeFlag = 1 << OverflowTypeIndex; if (OverflowFlags & OverflowTypeFlag) { LastOverflowTimes[OverflowTypeIndex] = CurrentTime; if ((LoggedOverflowFlags & OverflowTypeFlag) == 0) { UE_LOG(LogRenderer, Warning, TEXT("%s"), *(GetOverflowMessage(OverflowTypeIndex).ToString())); LoggedOverflowFlags |= OverflowTypeFlag; } } } } #endif } }); #if !UE_BUILD_SHIPPING // Handle message with stats sent back from GPU whenever stats are enabled StatsFeedbackSocket = GPUMessage::RegisterHandler(TEXT("Shadow.Virtual.StatsFeedback"), [this](GPUMessage::FReader Message) { // Culling stats int32 NaniteNumTris = Message.Read(0); int32 NanitePostCullNodeCount = Message.Read(0); TConstArrayView Stats = Message.ReadCount(VSM_STAT_NUM); const bool bInsightsVSMChannelEnabled = UE_TRACE_CHANNELEXPR_IS_ENABLED(VSMChannel); if (bInsightsVSMChannelEnabled) { // requires 'trace.enable counters,vsm' to see this in ue insights TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesRequested"), Stats[VSM_STAT_REQUESTED_THIS_FRAME_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesCachedStatic"), Stats[VSM_STAT_STATIC_CACHED_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesInvalidatedStatic"), Stats[VSM_STAT_STATIC_INVALIDATED_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesCachedDynamic"), Stats[VSM_STAT_DYNAMIC_CACHED_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesInvalidatedDynamic"), Stats[VSM_STAT_DYNAMIC_INVALIDATED_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesEmpty"), Stats[VSM_STAT_EMPTY_PAGES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesTotal"), Stats[VSM_STAT_NON_NANITE_INSTANCES_TOTAL]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesDrawn"), Stats[VSM_STAT_NON_NANITE_INSTANCES_DRAWN]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesHZBCulled"), Stats[VSM_STAT_NON_NANITE_INSTANCES_HZB_CULLED]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesPageMaskCulled"), Stats[VSM_STAT_NON_NANITE_INSTANCES_PAGE_MASK_CULLED]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesEmptyRectCulled"), Stats[VSM_STAT_NON_NANITE_INSTANCES_EMPTY_RECT_CULLED]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.NonNanite.InstancesFrustumCulled"), Stats[VSM_STAT_NON_NANITE_INSTANCES_FRUSTUM_CULLED]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesToMerge"), Stats[VSM_STAT_NUM_PAGES_TO_MERGE]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesToClear"), Stats[VSM_STAT_NUM_PAGES_TO_CLEAR]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.HZBPagesBuilt"), Stats[VSM_STAT_NUM_HZB_PAGES_BUILT]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesAllocatedNew"), Stats[VSM_STAT_ALLOCATED_NEW]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Nanite.ClustersHW"), Stats[VSM_STAT_NANITE_CLUSTERS_HW]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Nanite.ClustersSW"), Stats[VSM_STAT_NANITE_CLUSTERS_SW]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Nanite.Triangles"), Stats[VSM_STAT_NANITE_TRIANGLES]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Nanite.InstancesMain"), Stats[VSM_STAT_NANITE_INSTANCES_MAIN]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Nanite.InstancesPost"), Stats[VSM_STAT_NANITE_INSTANCES_POST]); TRACE_INT_VALUE(TEXT("Shadow.Virtual.PagesWPOConsidered"), Stats[VSM_STAT_WPO_CONSIDERED_PAGES]); } CSV_CUSTOM_STAT(VSM, NaniteNumTris, NaniteNumTris, ECsvCustomStatOp::Set); CSV_CUSTOM_STAT(VSM, NanitePostCullNodeCount, NanitePostCullNodeCount, ECsvCustomStatOp::Set); #if CSV_PROFILER CSV_CUSTOM_STAT(VSM, NonNanitePostCullInstanceCount, int32(Stats[VSM_STAT_NON_NANITE_INSTANCES_DRAWN]), ECsvCustomStatOp::Set); if (FCsvProfiler::Get()->IsCapturing_Renderthread()) { static bool bRegisteredInlineStats = false; auto StatCatIndex = CSV_CATEGORY_INDEX(VSM); if (FCsvProfiler::Get()->IsCategoryEnabled(StatCatIndex)) { for (int32 StatIndex = 0; StatIndex < UE_ARRAY_COUNT(VirtualShadowMap_StatNames); ++StatIndex) { const char *StatName = VirtualShadowMap_StatNames[StatIndex]; #if CSVPROFILERTRACE_ENABLED if (!bRegisteredInlineStats) { FCsvProfilerTrace::OutputInlineStat(StatName, StatCatIndex); } #endif FCsvProfiler::RecordCustomStat(StatName, StatCatIndex, int32(Stats[StatIndex]), ECsvCustomStatOp::Set); } bRegisteredInlineStats = true; } } #endif // Large page area items LastLoggedPageOverlapAppTime.SetNumZeroed(Scene.GetMaxPersistentPrimitiveIndex()); float RealTimeSeconds = float(FGameTime::GetTimeSinceAppStart().GetRealTimeSeconds()); TConstArrayView PageAreaDiags = Message.ReadCount(FVirtualShadowMapArray::MaxPageAreaDiagnosticSlots * 2); for (int32 Index = 0; Index < PageAreaDiags.Num(); Index += 2) { uint32 Overlap = PageAreaDiags[Index]; uint32 PersistentPrimitiveId = PageAreaDiags[Index + 1]; int32 PrimtiveIndex = Scene.GetPrimitiveIndex(FPersistentPrimitiveIndex{ int32(PersistentPrimitiveId) }); if (Overlap > 0 && PrimtiveIndex != INDEX_NONE) { if (RealTimeSeconds - LastLoggedPageOverlapAppTime[PersistentPrimitiveId] > 5.0f) { LastLoggedPageOverlapAppTime[PersistentPrimitiveId] = RealTimeSeconds; UE_LOG(LogRenderer, Warning, TEXT("Non-Nanite VSM page overlap performance Warning, %d, %s, %s"), Overlap, *Scene.Primitives[PrimtiveIndex]->GetOwnerActorNameOrLabelForDebuggingOnly(), *Scene.Primitives[PrimtiveIndex]->GetFullnameForDebuggingOnly()); } LargePageAreaItems.Add(PersistentPrimitiveId, FLargePageAreaItem{ Overlap, RealTimeSeconds }); } } }); #endif #if !UE_BUILD_SHIPPING ScreenMessageDelegate = FRendererOnScreenNotification::Get().AddLambda([this](TMultiMap& OutMessages) { float RealTimeSeconds = float(FGameTime::GetTimeSinceAppStart().GetRealTimeSeconds()); if (GVSMAllowScreenOverflowMessages != 0) { for (uint32 OverflowTypeIndex = 0; OverflowTypeIndex < VSM_STAT_OVERFLOW_FLAG_NUM; OverflowTypeIndex++) { // Show for ~10s after last overflow float LastOverflowTime = LastOverflowTimes[OverflowTypeIndex]; if (LastOverflowTime >= 0.0f && RealTimeSeconds - LastOverflowTime < 10.0f) { FText OverflowMessage = GetOverflowMessage(OverflowTypeIndex); OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("%s (%0.0f seconds ago). See r.Shadow.Virtual.AllowScreenOverflowMessages."), *(OverflowMessage.ToString()), RealTimeSeconds - LastOverflowTime))); } } } for (const auto& Item : LargePageAreaItems) { int32 PrimtiveIndex = Scene.GetPrimitiveIndex(FPersistentPrimitiveIndex{ int32(Item.Key) }); uint32 Overlap = Item.Value.PageArea; if (PrimtiveIndex != INDEX_NONE && RealTimeSeconds - Item.Value.LastTimeSeen < 2.5f) { OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Non-Nanite VSM page overlap performance Warning: Primitive '%s' overlapped %d Pages"), *Scene.Primitives[PrimtiveIndex]->GetOwnerActorNameOrLabelForDebuggingOnly(), Overlap))); } } TrimLoggingInfo(); if (GVSMAccumulateStats > 0) { OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Virtual Shadow Map Stats Accumulation (%d frames left)"), GVSMAccumulateStats))); } else if (GVSMAccumulateStats < 0) { OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Virtual Shadow Map Stats Accumulation Active. Set r.Shadow.Virtual.AccumulateStats to 0 to stop.")))); } }); #endif } FVirtualShadowMapArrayCacheManager::~FVirtualShadowMapArrayCacheManager() { #if !UE_BUILD_SHIPPING FRendererOnScreenNotification::Get().Remove(ScreenMessageDelegate); #endif } #if !UE_BUILD_SHIPPING FText FVirtualShadowMapArrayCacheManager::GetOverflowMessage(uint32 OverflowTypeIndex) const { uint32 OverflowTypeFlag = 1 << OverflowTypeIndex; switch (OverflowTypeFlag) { case VSM_STAT_OVERFLOW_FLAG_MARKING_JOB_QUEUE: return LOCTEXT("VSM_MarkingJobQueueOverflow", "[VSM] Non-Nanite Marking Job Queue overflow. Performance may be affected. This occurs when many non-nanite meshes cover a large area of the shadow map."); case VSM_STAT_OVERFLOW_FLAG_OPP_MAX_LIGHTS: return LOCTEXT("VSM_OPPMaxLightsOverflow", "[VSM] One Pass Projection max lights overflow. If you see shadow artifacts, decrease the amount of local lights per pixel, or increase r.Shadow.Virtual.OnePassProjection.MaxLightsPerPixel."); case VSM_STAT_OVERFLOW_FLAG_PAGE_POOL: return LOCTEXT("VSM_PagePoolOverflow", "[VSM] Page Pool overflow detected, this will produce visual artifacts (missing shadow). Increase the page pool limit or reduce resolution bias to avoid."); case VSM_STAT_OVERFLOW_FLAG_VISIBLE_INSTANCES: return LOCTEXT("VSM_VisibleInstancesOverflow", "[VSM] Non-Nanite visible instances buffer overflow detected, this will produce visual artifacts (missing shadow)."); default: return LOCTEXT("VSM_UnknownOverflow", "[VSM] Unknown overflow"); } } #endif void FVirtualShadowMapArrayCacheManager::SetPhysicalPoolSize(FRDGBuilder& GraphBuilder, FIntPoint RequestedSize, int RequestedArraySize, uint32 RequestedMaxPhysicalPages) { bool bInvalidateCache = false; // Using ReservedResource|ImmediateCommit flags hint to the RHI that the resource can be allocated using N small physical memory allocations, // instead of a single large contighous allocation. This helps Windows video memory manager page allocations in and out of local memory more efficiently. ETextureCreateFlags RequestedCreateFlags = (CVarVSMReservedResource.GetValueOnRenderThread() && GRHIGlobals.ReservedResources.Supported) ? (TexCreate_ReservedResource | TexCreate_ImmediateCommit) : TexCreate_None; if (!PhysicalPagePool || PhysicalPagePool->GetDesc().Extent != RequestedSize || PhysicalPagePool->GetDesc().ArraySize != RequestedArraySize || RequestedMaxPhysicalPages != MaxPhysicalPages || PhysicalPagePoolCreateFlags != RequestedCreateFlags) { if (PhysicalPagePool) { UE_LOG(LogRenderer, Display, TEXT("Recreating Shadow.Virtual.PhysicalPagePool due to size or flags change. This will also drop any cached pages.")); } // Track changes to these ourselves instead of from the GetDesc() since that may get manipulated internally PhysicalPagePoolCreateFlags = RequestedCreateFlags; const ETextureCreateFlags PoolTexCreateFlags = TexCreate_ShaderResource | TexCreate_UAV | TexCreate_AtomicCompatible; FPooledRenderTargetDesc Desc2D = FPooledRenderTargetDesc::Create2DArrayDesc( RequestedSize, PF_R32_UINT, FClearValueBinding::None, PhysicalPagePoolCreateFlags, PoolTexCreateFlags, false, RequestedArraySize ); GRenderTargetPool.FindFreeElement(GraphBuilder.RHICmdList, Desc2D, PhysicalPagePool, TEXT("Shadow.Virtual.PhysicalPagePool")); MaxPhysicalPages = RequestedMaxPhysicalPages; // Allocate page metadata alongside FRDGBufferRef PhysicalPageMetaDataRDG = GraphBuilder.CreateBuffer( FRDGBufferDesc::CreateStructuredDesc(sizeof(FPhysicalPageMetaData), MaxPhysicalPages), TEXT("Shadow.Virtual.PhysicalPageMetaData")); // Persistent, so we extract it immediately PhysicalPageMetaData = GraphBuilder.ConvertToExternalBuffer(PhysicalPageMetaDataRDG); bInvalidateCache = true; } if (bInvalidateCache) { Invalidate(GraphBuilder); } } void FVirtualShadowMapArrayCacheManager::FreePhysicalPool(FRDGBuilder& GraphBuilder) { if (PhysicalPagePool) { PhysicalPagePool = nullptr; PhysicalPageMetaData = nullptr; Invalidate(GraphBuilder); } } TRefCountPtr FVirtualShadowMapArrayCacheManager::SetHZBPhysicalPoolSize(FRDGBuilder& GraphBuilder, FIntPoint RequestedHZBSize, int32 RequestedArraySize, const EPixelFormat Format) { if (!HZBPhysicalPagePoolArray || HZBPhysicalPagePoolArray->GetDesc().Extent != RequestedHZBSize || HZBPhysicalPagePoolArray->GetDesc().Format != Format || HZBPhysicalPagePoolArray->GetDesc().ArraySize != RequestedArraySize ) { FPooledRenderTargetDesc Desc = FPooledRenderTargetDesc::Create2DArrayDesc( RequestedHZBSize, Format, FClearValueBinding::None, GFastVRamConfig.HZB, TexCreate_ShaderResource | TexCreate_UAV, false, RequestedArraySize, FVirtualShadowMap::NumHZBLevels); GRenderTargetPool.FindFreeElement(GraphBuilder.RHICmdList, Desc, HZBPhysicalPagePoolArray, TEXT("Shadow.Virtual.HZBPhysicalPagePool")); // TODO: Clear to black? Invalidate(GraphBuilder); } return HZBPhysicalPagePoolArray; } void FVirtualShadowMapArrayCacheManager::FreeHZBPhysicalPool(FRDGBuilder& GraphBuilder) { if (HZBPhysicalPagePoolArray) { HZBPhysicalPagePoolArray = nullptr; Invalidate(GraphBuilder); } } void FVirtualShadowMapArrayCacheManager::Invalidate(FRDGBuilder& GraphBuilder) { // Clear the cache CacheEntries.Reset(); PrevBuffers = FVirtualShadowMapArrayFrameData(); //UE_LOG(LogRenderer, Display, TEXT("Virtual shadow map cache invalidated.")); // Clear the physical page metadata (on all GPUs) if (PhysicalPageMetaData) { RDG_GPU_MASK_SCOPE(GraphBuilder, FRHIGPUMask::All()); FRDGBufferRef PhysicalPageMetaDataRDG = GraphBuilder.RegisterExternalBuffer(PhysicalPageMetaData); AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(PhysicalPageMetaDataRDG), 0); } } bool FVirtualShadowMapArrayCacheManager::IsCacheEnabled() { return CVarCacheVirtualSMs.GetValueOnRenderThread() != 0; } bool FVirtualShadowMapArrayCacheManager::IsCacheDataAvailable() { return IsCacheEnabled() && PhysicalPagePool && PhysicalPageMetaData && PrevBuffers.PageTable && PrevBuffers.PageFlags && PrevBuffers.UncachedPageRectBounds && PrevBuffers.AllocatedPageRectBounds && PrevBuffers.ProjectionData && PrevBuffers.PhysicalPageLists && PrevBuffers.PageRequestFlags; } bool FVirtualShadowMapArrayCacheManager::IsHZBDataAvailable() { // NOTE: HZB can be used/valid even when physical page caching is disabled return HZBPhysicalPagePoolArray && PrevBuffers.PageTable && PrevBuffers.PageFlags; } FRDGBufferRef FVirtualShadowMapArrayCacheManager::UploadCachePrimitiveAsDynamic(FRDGBuilder& GraphBuilder) const { const uint32 NumElements = FMath::Max(1, FMath::DivideAndRoundUp(CachePrimitiveAsDynamic.Num(), 32)); //GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), BufferElements), FRDGBufferRef CachePrimitiveAsDynamicRDG = CreateStructuredBuffer( GraphBuilder, TEXT("CachePrimitiveAsDynamic"), sizeof(uint32), NumElements, CachePrimitiveAsDynamic.GetData(), FMath::DivideAndRoundUp(CachePrimitiveAsDynamic.Num(), 8) // Size in bytes of initial data ); return CachePrimitiveAsDynamicRDG; } TSharedPtr FVirtualShadowMapArrayCacheManager::FindCreateLightCacheEntry( int32 LightSceneId, uint32 ViewUniqueID, uint32 NumShadowMaps, uint32 TypeIdTag) { const FVirtualShadowMapCacheKey CacheKey = { ViewUniqueID, LightSceneId, TypeIdTag }; TSharedPtr* LightEntryKey = CacheEntries.Find(CacheKey); if (LightEntryKey) { TSharedPtr LightEntry = *LightEntryKey; if (LightEntry->ShadowMapEntries.Num() == NumShadowMaps) { LightEntry->bReferencedThisRender = true; LightEntry->LastReferencedFrameNumber = Scene.GetFrameNumberRenderThread(); return LightEntry; } else { // Remove this entry and create a new one below // NOTE: This should only happen for clipmaps currently on cvar changes UE_LOG(LogRenderer, Display, TEXT("Virtual shadow map cache invalidated for light due to clipmap level count change")); CacheEntries.Remove(CacheKey); } } // Make new entry for this light TSharedPtr LightEntry = MakeShared(Scene.GetMaxPersistentPrimitiveIndex(), NumShadowMaps); LightEntry->bReferencedThisRender = true; LightEntry->LastReferencedFrameNumber = Scene.GetFrameNumberRenderThread(); CacheEntries.Add(CacheKey, LightEntry); return LightEntry; } void FVirtualShadowMapPerLightCacheEntry::OnPrimitiveRendered(const FPrimitiveSceneInfo* PrimitiveSceneInfo, bool bPrimitiveRevealed) { bool bInvalidate = false; bool bMarkAsDynamic = true; // Deformable mesh primitives need to trigger invalidation (even if they did not move) or we get artifacts, for example skinned meshes that are animating but not currently moving. // Skip if the invalidation mode is NOT auto (because Always will do it elsewhere & the others should prevent this). if (GVSMCacheDeformableMeshesInvalidate != 0 && PrimitiveSceneInfo->Proxy->HasDeformableMesh() && PrimitiveSceneInfo->Proxy->GetShadowCacheInvalidationBehavior() == EShadowCacheInvalidationBehavior::Auto) { bInvalidate = true; } // With new invalidations on, we need to invalidate any time a (non-nanite) primitive is "revealed", i.e. stopped being culled. // Note that this invalidation will be a frame late - similar to WPO starting - as it will get picked up by the next scene update. else if (bPrimitiveRevealed && GVSMCacheDebugSkipRevealedPrimitivesInvalidate == 0) { bInvalidate = true; bMarkAsDynamic = false; // Don't mark primitives as dynamic just because they were revealed //UE_LOG(LogRenderer, Display, TEXT("VirtualShadowMapCacheManager: Primitive revealed %d!"), PrimitiveSceneInfo->GetPersistentIndex().Index); } if (bInvalidate) { PrimitiveInstancesToInvalidate.Add(FVirtualShadowMapInstanceRange{ PrimitiveSceneInfo->GetPersistentIndex(), PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(), bMarkAsDynamic }); } } void FVirtualShadowMapArrayCacheManager::UpdateUnreferencedCacheEntries( FVirtualShadowMapArray& VirtualShadowMapArray) { const uint32 SceneFrameNumber = Scene.GetFrameNumberRenderThread(); const int32 MaxLightAge = CVarMaxLightAgeSinceLastRequest.GetValueOnRenderThread(); for (FEntryMap::TIterator It = CacheEntries.CreateIterator(); It; ++It) { TSharedPtr CacheEntry = (*It).Value; // For this test we care if it is active *this render*, not just this scene frame number (which can include multiple renders) if (CacheEntry->bReferencedThisRender) { // Active this render, leave it alone check(CacheEntry->ShadowMapEntries.Last().CurrentVirtualShadowMapId < VirtualShadowMapArray.GetNumShadowMapSlots()); } else if (int32(SceneFrameNumber - CacheEntry->LastReferencedFrameNumber) <= MaxLightAge) { // Not active this render, but still recent enough to keep it and its pages alive int PrevBaseVirtualShadowMapId = CacheEntry->ShadowMapEntries[0].CurrentVirtualShadowMapId; bool bIsSinglePage = FVirtualShadowMapArray::IsSinglePage(PrevBaseVirtualShadowMapId); // Keep the entry, reallocate new VSM IDs int32 NumMaps = CacheEntry->ShadowMapEntries.Num(); int32 VirtualShadowMapId = VirtualShadowMapArray.Allocate(bIsSinglePage, NumMaps); for (int32 Map = 0; Map < NumMaps; ++Map) { CacheEntry->ShadowMapEntries[Map].Update(VirtualShadowMapArray, *CacheEntry, VirtualShadowMapId + Map); // Mark it as inactive for this frame/render // NOTE: We currently recompute/overwrite the whole ProjectionData structure for referenced lights, but if that changes we // will need to clear this flag again when they become referenced. CacheEntry->ShadowMapEntries[Map].ProjectionData.Flags |= VSM_PROJ_FLAG_UNREFERENCED; } } else { It.RemoveCurrent(); } } } class FVirtualSmCopyStatsCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FVirtualSmCopyStatsCS); SHADER_USE_PARAMETER_STRUCT(FVirtualSmCopyStatsCS, FGlobalShader) public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, InStatsBuffer) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, NaniteStatsBuffer) SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer, AccumulatedStatsBufferOut) END_SHADER_PARAMETER_STRUCT() static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) && DoesPlatformSupportVirtualShadowMaps(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); OutEnvironment.SetDefine(TEXT("MAX_STAT_FRAMES"), FVirtualShadowMapArrayCacheManager::MaxStatFrames); } }; IMPLEMENT_GLOBAL_SHADER(FVirtualSmCopyStatsCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCopyStats.usf", "CopyStatsCS", SF_Compute); void FVirtualShadowMapArrayCacheManager::ExtractFrameData( FRDGBuilder& GraphBuilder, FVirtualShadowMapArray &VirtualShadowMapArray, const FSceneRenderer& SceneRenderer, bool bAllowPersistentData) { TrimLoggingInfo(); const bool bNewShadowData = VirtualShadowMapArray.IsAllocated(); const bool bDropAll = !bAllowPersistentData; const bool bDropPrevBuffers = bDropAll || bNewShadowData; if (bDropPrevBuffers) { PrevBuffers = FVirtualShadowMapArrayFrameData(); PrevUniformParameters.NumFullShadowMaps = 0; PrevUniformParameters.NumSinglePageShadowMaps = 0; PrevUniformParameters.NumShadowMapSlots = 0; } if (bDropAll) { // We drop the physical page pool here as well to ensure that it disappears in the case where // thumbnail rendering or similar creates multiple FSceneRenderers that never get deleted. // Caching is disabled on these contexts intentionally to avoid these issues. FreePhysicalPool(GraphBuilder); FreeHZBPhysicalPool(GraphBuilder); } else if (bNewShadowData) { // Page table and associated data are needed by HZB next frame even when VSM physical page caching is disabled GraphBuilder.QueueTextureExtraction(VirtualShadowMapArray.PageTableRDG, &PrevBuffers.PageTable); GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.UncachedPageRectBoundsRDG, &PrevBuffers.UncachedPageRectBounds); GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.AllocatedPageRectBoundsRDG, &PrevBuffers.AllocatedPageRectBounds); GraphBuilder.QueueTextureExtraction(VirtualShadowMapArray.PageFlagsRDG, &PrevBuffers.PageFlags); GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.NanitePerformanceFeedbackRDG, &PrevBuffers.NanitePerformanceFeedback); GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.ThrottleBufferRDG, &PrevBuffers.ThrottleBuffer); GraphBuilder.QueueTextureExtraction(VirtualShadowMapArray.PageReceiverMasksRDG, &PrevBuffers.PageReceiverMasks); if (IsCacheEnabled()) { GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.ProjectionDataRDG, &PrevBuffers.ProjectionData); GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.PhysicalPageListsRDG, &PrevBuffers.PhysicalPageLists); GraphBuilder.QueueTextureExtraction(VirtualShadowMapArray.PageRequestFlagsRDG, &PrevBuffers.PageRequestFlags); // Store but drop any temp references embedded in the uniform parameters this frame PrevUniformParameters = VirtualShadowMapArray.UniformParameters; PrevUniformParameters.ProjectionData = nullptr; PrevUniformParameters.PageTable = nullptr; PrevUniformParameters.UncachedPageRectBounds = nullptr; PrevUniformParameters.AllocatedPageRectBounds = nullptr; PrevUniformParameters.PageFlags = nullptr; PrevUniformParameters.PerViewData.LightGridData = nullptr; PrevUniformParameters.PerViewData.NumCulledLightsGrid = nullptr; PrevUniformParameters.CachePrimitiveAsDynamic = nullptr; } // propagate current-frame primitive state to cache entry for (const auto& LightInfo : SceneRenderer.VisibleLightInfos) { for (const TSharedPtr &Clipmap : LightInfo.VirtualShadowMapClipmaps) { // Push data to cache entry Clipmap->UpdateCachedFrameData(); } } ExtractStats(GraphBuilder, VirtualShadowMapArray); } // Clear out the referenced light flags since this render is finishing for (auto& LightEntry : CacheEntries) { LightEntry.Value->bReferencedThisRender = false; } } void FVirtualShadowMapArrayCacheManager::ExtractStats(FRDGBuilder& GraphBuilder, FVirtualShadowMapArray &VirtualShadowMapArray) { FRDGBufferRef AccumulatedStatsBufferRDG = nullptr; // Note: stats accumulation thing is here because it needs to persist over frames. if (AccumulatedStatsBuffer.IsValid()) { AccumulatedStatsBufferRDG = GraphBuilder.RegisterExternalBuffer(AccumulatedStatsBuffer, TEXT("Shadow.Virtual.AccumulatedStatsBuffer")); } // Auto stop at zero, use -1 to record indefinitely if (GVSMAccumulateStats > 0) { --GVSMAccumulateStats; } if (IsAccumulatingStats()) { if (!AccumulatedStatsBuffer.IsValid()) { FRDGBufferDesc Desc = FRDGBufferDesc::CreateBufferDesc(4, 1 + VSM_STAT_NUM * MaxStatFrames); Desc.Usage = EBufferUsageFlags(Desc.Usage | BUF_SourceCopy); AccumulatedStatsBufferRDG = GraphBuilder.CreateBuffer(Desc, TEXT("Shadow.Virtual.AccumulatedStatsBuffer")); // TODO: Can't be a structured buffer as EnqueueCopy is only defined for vertex buffers AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT), 0); AccumulatedStatsBuffer = GraphBuilder.ConvertToExternalBuffer(AccumulatedStatsBufferRDG); } // Initialize/clear if (!bAccumulatingStats) { AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT), 0); bAccumulatingStats = true; } FVirtualSmCopyStatsCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); PassParameters->InStatsBuffer = GraphBuilder.CreateSRV(VirtualShadowMapArray.StatsBufferRDG, PF_R32_UINT); PassParameters->AccumulatedStatsBufferOut = GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT); // Dummy data PassParameters->NaniteStatsBuffer = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder)); // Optionally pull in some nanite stats too // NOTE: This only works if nanite is set to gather stats from the VSM pass! // i.e. run "NaniteStats VirtualShadowMaps" before starting accumulation if (Nanite::IsStatFilterActive(TEXT("VirtualShadowMaps"))) { TRefCountPtr NaniteStatsBuffer = Nanite::GGlobalResources.GetStatsBufferRef(); if (NaniteStatsBuffer) { PassParameters->NaniteStatsBuffer = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(NaniteStatsBuffer)); } } auto ComputeShader = GetGlobalShaderMap(Scene.GetFeatureLevel())->GetShader(); FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME("Copy Stats"), ComputeShader, PassParameters, FIntVector(1, 1, 1) ); } else if (bAccumulatingStats) { bAccumulatingStats = false; GPUBufferReadback = new FRHIGPUBufferReadback(TEXT("Shadow.Virtual.AccumulatedStatsBufferReadback")); AddEnqueueCopyPass(GraphBuilder, GPUBufferReadback, AccumulatedStatsBufferRDG, 0u); } else if (AccumulatedStatsBuffer.IsValid()) { AccumulatedStatsBuffer.SafeRelease(); } if (GPUBufferReadback && GPUBufferReadback->IsReady()) { TArray Tmp; Tmp.AddDefaulted(1 + VSM_STAT_NUM * MaxStatFrames); { const uint32* BufferPtr = (const uint32*)GPUBufferReadback->Lock((1 + VSM_STAT_NUM * MaxStatFrames) * sizeof(uint32)); FPlatformMemory::Memcpy(Tmp.GetData(), BufferPtr, Tmp.Num() * Tmp.GetTypeSize()); GPUBufferReadback->Unlock(); delete GPUBufferReadback; GPUBufferReadback = nullptr; } FString FileName = FPaths::ProfilingDir() + FString::Printf(TEXT("VSMStats(%s).csv"), *FDateTime::Now().ToString(TEXT("%Y%m%d_%H%M%S"))); const uint32 NumRows = Tmp[0]; UE_LOG(LogRenderer, Log, TEXT("Writing VSM accumulated stats (%d frames) to file '%s'"), NumRows, *FileName); FArchive * FileToLogTo = IFileManager::Get().CreateFileWriter(*FileName, false); ensure(FileToLogTo); if (FileToLogTo) { // Print header FString StringToPrint; for (int32 Index = 0; Index < VSM_STAT_NUM; ++Index) { if (!StringToPrint.IsEmpty()) { StringToPrint += TEXT(","); } StringToPrint.Append(VirtualShadowMap_StatNames[Index]); } StringToPrint += TEXT("\n"); FileToLogTo->Serialize(TCHAR_TO_ANSI(*StringToPrint), StringToPrint.Len()); for (uint32 Ind = 0; Ind < NumRows; ++Ind) { StringToPrint.Empty(); for (uint32 StatInd = 0; StatInd < VSM_STAT_NUM; ++StatInd) { if (!StringToPrint.IsEmpty()) { StringToPrint += TEXT(","); } StringToPrint += FString::Printf(TEXT("%d"), Tmp[1 + Ind * VSM_STAT_NUM + StatInd]); } StringToPrint += TEXT("\n"); FileToLogTo->Serialize(TCHAR_TO_ANSI(*StringToPrint), StringToPrint.Len()); } FileToLogTo->Close(); } } } bool FVirtualShadowMapArrayCacheManager::IsAccumulatingStats() { return GVSMAccumulateStats != 0; } static uint32 GetPrimFlagsBufferSizeInDwords(int32 MaxPersistentPrimitiveIndex) { return FMath::RoundUpToPowerOfTwo(FMath::DivideAndRoundUp(MaxPersistentPrimitiveIndex, 32)); } void FVirtualShadowMapArrayCacheManager::ReallocatePersistentPrimitiveIndices() { const int32 MaxPersistentPrimitiveIndex = FMath::Max(1, Scene.GetMaxPersistentPrimitiveIndex()); for (auto& CacheEntry : CacheEntries) { CacheEntry.Value->RenderedPrimitives.SetNum(MaxPersistentPrimitiveIndex, false); } // TODO: Initialize new primitives based on their mobility; need a way to know which ones are newly created though CachePrimitiveAsDynamic.SetNum(MaxPersistentPrimitiveIndex, false); if (MaxPersistentPrimitiveIndex > LastPrimitiveInvalidatedFrame.Num()) { const uint32 OldSize = LastPrimitiveInvalidatedFrame.Num(); LastPrimitiveInvalidatedFrame.SetNumUninitialized(MaxPersistentPrimitiveIndex); for (int32 It = OldSize; It < MaxPersistentPrimitiveIndex; ++It) { // Unknown last invalidation LastPrimitiveInvalidatedFrame[It] = 0xFFFFFFFF; } } // Do instance-based GPU allocations here too? For now we do them lazily each frame when the FVirtualShadowMapArray gets constructed } uint32 FVirtualShadowMapArrayCacheManager::GetPhysicalMaxWidth() { return GetMax2DTextureDimension(); //return 2048u; } BEGIN_SHADER_PARAMETER_STRUCT(FInvalidatePagesParameters, ) SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FVirtualShadowMapUniformParameters, VirtualShadowMap) SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene) SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, PhysicalPageMetaDataOut) SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, OutPageRequestFlags) // When USE_HZB_OCCLUSION SHADER_PARAMETER_RDG_TEXTURE(Texture2D, HZBPageTable) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, HZBPageRectBounds) SHADER_PARAMETER_RDG_TEXTURE(Texture2DArray, HZBTextureArray) SHADER_PARAMETER_SAMPLER(SamplerState, HZBSampler) SHADER_PARAMETER(FVector2f, HZBSize) END_SHADER_PARAMETER_STRUCT() class FInvalidateInstancePagesLoadBalancerCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FInvalidateInstancePagesLoadBalancerCS); SHADER_USE_PARAMETER_STRUCT(FInvalidateInstancePagesLoadBalancerCS, FGlobalShader) class FUseHzbDim : SHADER_PERMUTATION_BOOL("USE_HZB_OCCLUSION"); using FPermutationDomain = TShaderPermutationDomain; public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_STRUCT_INCLUDE(FInvalidatePagesParameters, InvalidatePagesParameters) SHADER_PARAMETER_STRUCT_INCLUDE(FGPUScene::FInstanceGPULoadBalancer::FShaderParameters, LoadBalancerParameters) END_SHADER_PARAMETER_STRUCT() // This is probably fine even in instance list mode static constexpr int Cs1dGroupSizeX = FVirtualShadowMapArrayCacheManager::FInstanceGPULoadBalancer::ThreadGroupSize; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) && DoesPlatformSupportVirtualShadowMaps(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); FVirtualShadowMapArray::SetShaderDefines(OutEnvironment); OutEnvironment.SetDefine(TEXT("CS_1D_GROUP_SIZE_X"), Cs1dGroupSizeX); OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1); FGPUScene::FInstanceGPULoadBalancer::SetShaderDefines(OutEnvironment); } }; IMPLEMENT_GLOBAL_SHADER(FInvalidateInstancePagesLoadBalancerCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCacheLoadBalancer.usf", "InvalidateInstancePagesLoadBalancerCS", SF_Compute); class FProcessInvalidationQueueGPUCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FProcessInvalidationQueueGPUCS); SHADER_USE_PARAMETER_STRUCT(FProcessInvalidationQueueGPUCS, FGlobalShader) class FUseHzbDim : SHADER_PERMUTATION_BOOL("USE_HZB_OCCLUSION"); using FPermutationDomain = TShaderPermutationDomain; public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_STRUCT_INCLUDE(FInvalidatePagesParameters, InvalidatePagesParameters) SHADER_PARAMETER_RDG_BUFFER_SRV(Buffer, InvalidationArgs) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, InvalidationQueue) SHADER_PARAMETER(int32, InvalidationQueueMax) RDG_BUFFER_ACCESS(IndirectArgsBuffer, ERHIAccess::IndirectArgs) END_SHADER_PARAMETER_STRUCT() static constexpr int ThreadGroupSize = 64; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) && DoesPlatformSupportVirtualShadowMaps(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); FVirtualShadowMapArray::SetShaderDefines(OutEnvironment); OutEnvironment.SetDefine(TEXT("THREAD_GROUP_SIZE"), ThreadGroupSize); OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1); // OutEnvironment.CompilerFlags.Add(CFLAG_SkipOptimizations); } }; IMPLEMENT_GLOBAL_SHADER(FProcessInvalidationQueueGPUCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCacheGPUInvalidation.usf", "ProcessInvalidationQueueGPUCS", SF_Compute); void FVirtualShadowMapArrayCacheManager::UpdateCachePrimitiveAsDynamic(FInvalidatingPrimitiveCollector& InvalidatingPrimitiveCollector) { //UE_LOG(LogRenderer, Display, TEXT("VirtualShadowMapCacheManager: UpdateCachePrimitiveAsDynamic")); const uint32 SceneFrameNumber = Scene.GetFrameNumberRenderThread(); const uint32 FramesStaticThreshold = CVarFramesStaticThreshold.GetValueOnRenderThread(); // Update the cache states of things that are being invalidated for (TConstSetBitIterator<> BitIt(InvalidatingPrimitiveCollector.InvalidatedPrimitives); BitIt; ++BitIt) { int32 PersistentPrimitiveIndex = BitIt.GetIndex(); // Any invalidations mean we set this primitive to dynamic. We already added an invalidation otherwise // we wouldn't be here, so no need to add another. CachePrimitiveAsDynamic[PersistentPrimitiveIndex] = true; LastPrimitiveInvalidatedFrame[PersistentPrimitiveIndex] = SceneFrameNumber; } // Zero out anything that was being removed // NOTE: This will be redundant with the invalidated stuff, but shouldn't be a big deal for (TConstSetBitIterator<> BitIt(InvalidatingPrimitiveCollector.RemovedPrimitives); BitIt; ++BitIt) { int32 PersistentPrimitiveIndex = BitIt.GetIndex(); // TODO: We probably want to start new primitives as dynamic by default instead, but we don't want to have // to loop over all of them and try and get their PrimitiveSceneInfo every frame for invalid ones CachePrimitiveAsDynamic[PersistentPrimitiveIndex] = false; LastPrimitiveInvalidatedFrame[PersistentPrimitiveIndex] = 0xFFFFFFFF; //UE_LOG(LogRenderer, Display, TEXT("VirtualShadowMapCacheManager: Removing primitive %d!"), PersistentPrimitiveIndex); } // Finally check anything that is currently dynamic to see if it has not invalidated for long enough that // we should move it back to static for (TConstSetBitIterator<> BitIt(CachePrimitiveAsDynamic); BitIt; ++BitIt) { int32 PersistentPrimitiveIndex = BitIt.GetIndex(); const uint32 LastInvalidationFrame = LastPrimitiveInvalidatedFrame[PersistentPrimitiveIndex]; // Note: cleared to MAX_uint32; treated as "unknown/no invalidations" const uint32 InvalidationAge = SceneFrameNumber >= LastInvalidationFrame ? (SceneFrameNumber - LastInvalidationFrame) : 0xFFFFFFFF; const bool bWantStatic = InvalidationAge > FramesStaticThreshold; if (bWantStatic) { // Add invalidation and swap it to static FPersistentPrimitiveIndex WrappedIndex; WrappedIndex.Index = PersistentPrimitiveIndex; FPrimitiveSceneInfo* PrimitiveSceneInfo = Scene.GetPrimitiveSceneInfo(WrappedIndex); if (PrimitiveSceneInfo) { // Add an invalidation for every light for (auto& CacheEntry : CacheEntries) { for (const auto& SmCacheEntry : CacheEntry.Value->ShadowMapEntries) { const uint32 PayloadForceStatic = EncodeInstanceInvalidationPayload(SmCacheEntry.CurrentVirtualShadowMapId, VSM_INVALIDATION_PAYLOAD_FLAG_FORCE_STATIC); InvalidatingPrimitiveCollector.Instances.Add( PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(), PayloadForceStatic); } } } else { // This seems to still happen very occasionally... presumably a remove gets "missed" somehow and thus we try and transition // something that is no longer valid back to static. This could also potentially mean we incorrect transition a new thing that // grabbed this slot back to static, but that is less likely as the addition would trigger a separate invalidation. // Not much we can do here currently other than ignore it and move on // Disabling log due to build automation spam // UE_LOG(LogRenderer, Display, TEXT("VirtualShadowMapCacheManager: Invalid persistent primitive index %d, age %u!"), PersistentPrimitiveIndex, InvalidationAge); LastPrimitiveInvalidatedFrame[PersistentPrimitiveIndex] = 0xFFFFFFFF; } // NOTE: This is safe with the current set bit iterator, but should maybe use a temp array for future safety? CachePrimitiveAsDynamic[PersistentPrimitiveIndex] = false; } } } void FVirtualShadowMapArrayCacheManager::ProcessInvalidations(FRDGBuilder& GraphBuilder, FSceneUniformBuffer &SceneUniformBuffer, FInvalidatingPrimitiveCollector& InvalidatingPrimitiveCollector) { if (IsCacheDataAvailable() && PrevUniformParameters.NumFullShadowMaps > 0) { RDG_EVENT_SCOPE(GraphBuilder, "Shadow.Virtual.ProcessInvalidations"); // TODO: Some of this stuff probably needs to move out of this function as we don't need to evaluate it twice // (before/after GPUScene update). That said, we clear the lists so in practice we are just going to do it // before, just could use some refactoring for clarity. // NOTE: Important that we get some of these parameters (ex. CachePrimitiveAsDynamic) before // we update them as the shader needs to know the previous cache states for invalidation. FInvalidationPassCommon InvalidationPassCommon = GetUniformParametersForInvalidation(GraphBuilder, SceneUniformBuffer); // Add invalidations for skeletal meshes, CPU culling changes, dynamic primitives, etc. InvalidatingPrimitiveCollector.AddPrimitivesToInvalidate(); // Check whether we want to swap any cache states and add any invalidations to that end as well UpdateCachePrimitiveAsDynamic(InvalidatingPrimitiveCollector); InvalidatingPrimitiveCollector.Instances.FinalizeBatches(); if (!InvalidatingPrimitiveCollector.Instances.IsEmpty()) { ProcessInvalidations(GraphBuilder, InvalidationPassCommon, InvalidatingPrimitiveCollector.Instances); } } else { // Clear any queued-up invalidations ShadowInvalidatingInstancesImplementation.PrimitiveInstancesToInvalidate.Reset(); for (auto& CacheEntry : CacheEntries) { CacheEntry.Value->PrimitiveInstancesToInvalidate.Reset(); } } } void FVirtualShadowMapArrayCacheManager::ProcessRemovedLights(const TBitArray& RemovedLightMask) { for (auto It = CacheEntries.CreateIterator(); It; ++It) { int32 LightSceneId = It->Key.LightSceneId; if (RemovedLightMask.IsValidIndex(LightSceneId) && RemovedLightMask[LightSceneId]) { It.RemoveCurrent(); } } } FVirtualShadowMapArrayCacheManager::FInvalidationPassCommon FVirtualShadowMapArrayCacheManager::GetUniformParametersForInvalidation( FRDGBuilder& GraphBuilder, FSceneUniformBuffer &SceneUniformBuffer) const { // Construct a uniform buffer based on the previous frame data, reimported into this graph builder FVirtualShadowMapUniformParameters* UniformParameters = GraphBuilder.AllocParameters(); *UniformParameters = PrevUniformParameters; { auto RegExtCreateSrv = [&GraphBuilder](const TRefCountPtr& Buffer, const TCHAR* Name) -> FRDGBufferSRVRef { return GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(Buffer, Name)); }; UniformParameters->PageTableSampler = TStaticSamplerState::GetRHI(); UniformParameters->ProjectionData = RegExtCreateSrv(PrevBuffers.ProjectionData, TEXT("Shadow.Virtual.PrevProjectionData")); UniformParameters->PageTable = GraphBuilder.RegisterExternalTexture(PrevBuffers.PageTable, TEXT("Shadow.Virtual.PrevPageTable")); UniformParameters->PageFlags = GraphBuilder.RegisterExternalTexture(PrevBuffers.PageFlags, TEXT("Shadow.Virtual.PrevPageFlags")); UniformParameters->PageReceiverMasks = GraphBuilder.RegisterExternalTexture(PrevBuffers.PageReceiverMasks, TEXT("Shadow.Virtual.PrevPageReceiverMasks")); UniformParameters->UncachedPageRectBounds = RegExtCreateSrv(PrevBuffers.UncachedPageRectBounds, TEXT("Shadow.Virtual.PrevUncachedPageRectBounds")); UniformParameters->AllocatedPageRectBounds = RegExtCreateSrv(PrevBuffers.AllocatedPageRectBounds, TEXT("Shadow.Virtual.PrevAllocatedPageRectBounds")); UniformParameters->CachePrimitiveAsDynamic = GraphBuilder.CreateSRV(UploadCachePrimitiveAsDynamic(GraphBuilder)); // Unused in this path... may be a better way to handle this UniformParameters->PhysicalPagePool = GSystemTextures.GetZeroUIntArrayAtomicCompatDummy(GraphBuilder); FRDGBufferSRVRef Uint32SRVDummy = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, sizeof(uint32))); UniformParameters->PerViewData.LightGridData = Uint32SRVDummy; UniformParameters->PerViewData.NumCulledLightsGrid = Uint32SRVDummy; } FInvalidationPassCommon Result; Result.UniformParameters = UniformParameters; Result.VirtualShadowMapUniformBuffer = GraphBuilder.CreateUniformBuffer(UniformParameters); Result.SceneUniformBuffer = SceneUniformBuffer.GetBuffer(GraphBuilder); return Result; } void FVirtualShadowMapArrayCacheManager::SetInvalidateInstancePagesParameters( FRDGBuilder& GraphBuilder, const FInvalidationPassCommon& InvalidationPassCommon, FInvalidatePagesParameters* PassParameters) const { // TODO: We should make this UBO once and reuse it for all the passes PassParameters->VirtualShadowMap = InvalidationPassCommon.VirtualShadowMapUniformBuffer; PassParameters->Scene = InvalidationPassCommon.SceneUniformBuffer; PassParameters->PhysicalPageMetaDataOut = GraphBuilder.CreateUAV(GraphBuilder.RegisterExternalBuffer(PhysicalPageMetaData)); PassParameters->OutPageRequestFlags = GraphBuilder.CreateUAV(GraphBuilder.RegisterExternalTexture(PrevBuffers.PageRequestFlags)); const bool bUseHZB = (CVarCacheVsmUseHzb.GetValueOnRenderThread() != 0); const TRefCountPtr HZBPhysical = (bUseHZB && HZBPhysicalPagePoolArray) ? HZBPhysicalPagePoolArray : nullptr; if (HZBPhysical) { // Same, since we are not producing a new frame just yet PassParameters->HZBPageTable = InvalidationPassCommon.UniformParameters->PageTable; PassParameters->HZBPageRectBounds = InvalidationPassCommon.UniformParameters->AllocatedPageRectBounds; // TODO: Uncached? PassParameters->HZBTextureArray = GraphBuilder.RegisterExternalTexture(HZBPhysical); PassParameters->HZBSize = HZBPhysical->GetDesc().Extent; PassParameters->HZBSampler = TStaticSamplerState< SF_Point, AM_Clamp, AM_Clamp, AM_Clamp >::GetRHI(); } } void FVirtualShadowMapArrayCacheManager::ProcessInvalidations( FRDGBuilder& GraphBuilder, const FInvalidationPassCommon& InvalidationPassCommon, const FInstanceGPULoadBalancer& Instances) const { RDG_GPU_MASK_SCOPE(GraphBuilder, CacheValidGPUMask); check(InvalidationPassCommon.UniformParameters->NumFullShadowMaps > 0); check(!Instances.IsEmpty()); FInvalidateInstancePagesLoadBalancerCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); SetInvalidateInstancePagesParameters(GraphBuilder, InvalidationPassCommon, &PassParameters->InvalidatePagesParameters); Instances.UploadFinalized(GraphBuilder).GetShaderParameters(GraphBuilder, PassParameters->LoadBalancerParameters); FInvalidateInstancePagesLoadBalancerCS::FPermutationDomain PermutationVector; PermutationVector.Set(PassParameters->InvalidatePagesParameters.HZBTextureArray != nullptr); auto ComputeShader = GetGlobalShaderMap(Scene.GetFeatureLevel())->GetShader(PermutationVector); //UE_LOG(LogRenderer, Display, TEXT("VirtualShadowMapCacheManager: Invalidation %lu instances!"), Instances.GetTotalNumInstances()); FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME("InvalidateInstancePagesLoadBalancerCS (%d batches)", Instances.GetBatches().Num()), ComputeShader, PassParameters, Instances.GetWrappedCsGroupCount() ); } // Remove old info used to track logging. void FVirtualShadowMapArrayCacheManager::TrimLoggingInfo() { #if !UE_BUILD_SHIPPING // Remove old items float RealTimeSeconds = float(FGameTime::GetTimeSinceAppStart().GetRealTimeSeconds()); LargePageAreaItems = LargePageAreaItems.FilterByPredicate([RealTimeSeconds](const TMap::ElementType& Element) { return RealTimeSeconds - Element.Value.LastTimeSeen < 5.0f; }); #endif } FVirtualShadowMapArrayCacheManager::FViewData::FViewData() : InstanceState(1024, TEXT("Shadow.Virtual.CacheManager.ViewState")) { } FVirtualShadowMapInvalidationSceneUpdater::FVirtualShadowMapInvalidationSceneUpdater(FVirtualShadowMapArrayCacheManager& InCacheManager) : CacheManager(InCacheManager) {} void FVirtualShadowMapInvalidationSceneUpdater::PreLightsUpdate(FRDGBuilder& GraphBuilder, const FLightSceneChangeSet& LightSceneChangeSet) { if (LightSceneChangeSet.RemovedLightIds.Num() > 0) { CacheManager.ProcessRemovedLights(LightSceneChangeSet.RemovedLightsMask); } } void FVirtualShadowMapInvalidationSceneUpdater::PreSceneUpdate(FRDGBuilder& GraphBuilder, const FScenePreUpdateChangeSet& ChangeSet, FSceneUniformBuffer& SceneUniforms) { SCOPED_NAMED_EVENT(FScene_VirtualShadowCacheUpdate, FColor::Orange); if (ChangeSet.ViewUpdateChangeSet) { for (FPersistentViewId ViewId : ChangeSet.ViewUpdateChangeSet->RemovedViewIds) { // If VSM was disabled then the data may not contain the view being removed (if VSM is enabled & the view removed on the same frame). if (CacheManager.ViewData.IsValidIndex(ViewId.Index)) { CacheManager.ViewData.RemoveAt(ViewId.Index); } } } FShadowScene& ShadowScene = CacheManager.Scene.GetExtension(); // Needs to be called before the first time we start adding invalidations. // There may be a way to avoid doing this both in pre and post, but it is pretty light if there is nothing to do anyways. CacheManager.ReallocatePersistentPrimitiveIndices(); if (CacheManager.IsCacheDataAvailable()) { FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector InvalidatingPrimitiveCollector(&CacheManager); // TODO: Note that there is an ordering dependency here on ShadowScene, which may not have updated the AlwaysInvalidatingPrimitives list at this point. // Probably harmless (as they will get invalidated next frame etc) but something to think about. // Primitives that are tracked as always invalidating shadows, pipe through as transform updates for (FPrimitiveSceneInfo* PrimitiveSceneInfo : ShadowScene.GetAlwaysInvalidatingPrimitives()) { InvalidatingPrimitiveCollector.UpdatedTransform(PrimitiveSceneInfo); } // Note: skips added as they are not fully defined at this point (not primitive ID allocated, ChangeSet.PrimitiveUpdates.ForEachUpdateCommand(ESceneUpdateCommandFilter::Updated | ESceneUpdateCommandFilter::Deleted, EPrimitiveUpdateDirtyFlags::AllCulling, [&](const FPrimitiveUpdateCommand& Cmd) { if (Cmd.IsDelete()) { // All removed primitives must invalidate their footprints in the VSM before leaving. InvalidatingPrimitiveCollector.Removed(Cmd.GetSceneInfo()); } else { InvalidatingPrimitiveCollector.UpdatedTransform(Cmd.GetSceneInfo()); } }); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Cache.PreInvalidationInstances"), InvalidatingPrimitiveCollector.Instances.GetTotalNumInstances()); CacheManager.ProcessInvalidations(GraphBuilder, SceneUniforms, InvalidatingPrimitiveCollector); } } using FLoadBalancer = TInstanceCullingLoadBalancer; class FVSMResetInstanceStateCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FVSMResetInstanceStateCS); SHADER_USE_PARAMETER_STRUCT(FVSMResetInstanceStateCS, FGlobalShader) public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) SHADER_PARAMETER_STRUCT_INCLUDE(FLoadBalancer::FShaderParameters, LoadBalancerParameters) SHADER_PARAMETER_STRUCT_INCLUDE(FGPUSceneResourceParameters, GPUScene) SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, InOutViewInstanceState) SHADER_PARAMETER(int32, StateWordStride) END_SHADER_PARAMETER_STRUCT() // This is probably fine even in instance list mode static constexpr int ThreadGroupSize = FLoadBalancer::ThreadGroupSize; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return DoesPlatformSupportVirtualShadowMaps(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1); OutEnvironment.SetDefine(TEXT("USE_GLOBAL_GPU_SCENE_DATA"), 1); FGPUScene::FInstanceGPULoadBalancer::SetShaderDefines(OutEnvironment); } }; IMPLEMENT_GLOBAL_SHADER(FVSMResetInstanceStateCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCacheGPUInvalidation.usf", "VSMResetInstanceStateCS", SF_Compute); void FVirtualShadowMapInvalidationSceneUpdater::PostSceneUpdate(FRDGBuilder& GraphBuilder, const FScenePostUpdateChangeSet& ChangeSet) { // Loop over the current state, rather than the change set to handle that VSM can be enabled/disabled for (auto It = CacheManager.Scene.PersistentViewStateUniqueIDs.CreateConstIterator(); It; ++It) { FPersistentViewId ViewId { It.GetIndex() }; if (!CacheManager.ViewData.IsValidIndex(ViewId.Index)) { CacheManager.ViewData.EmplaceAt(ViewId.Index); } } CacheManager.ReallocatePersistentPrimitiveIndices(); // Grab a reference, but we currently do all the work in PostGPUSceneUpdate PostUpdateChangeSet = &ChangeSet; } void FVirtualShadowMapInvalidationSceneUpdater::PostGPUSceneUpdate(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUniforms) { // TODO: Separate scope for post-update pass? SCOPED_NAMED_EVENT(FScene_VirtualShadowCacheUpdate, FColor::Orange); // Update the allocations for storing a bit per view to track state for each view (dynamic/static) // This depends on GPU-Scene having correct instance ranges, so must happen post GPU scene update. CacheManager.InstanceStateMaskWordStride = FMath::DivideAndRoundUp(CacheManager.Scene.GPUScene.GetNumInstances(), 32); const int32 NumInstanceWordsNeeded = CacheManager.InstanceStateMaskWordStride * FVirtualShadowMapArrayCacheManager::FViewData::NumBitsPerInstance; if (NumInstanceWordsNeeded > 0 && !CacheManager.ViewData.IsEmpty()) { TArray > InstanceStatesRDG; InstanceStatesRDG.Reserve(CacheManager.ViewData.Num()); for (FVirtualShadowMapArrayCacheManager::FViewData& ViewData : CacheManager.ViewData) { InstanceStatesRDG.Emplace(ViewData.InstanceState.ResizeAndClearBufferIfNeeded(GraphBuilder, NumInstanceWordsNeeded)); } FLoadBalancer LoadBalancer; for (FPrimitiveSceneInfo* PrimitiveSceneInfo : PostUpdateChangeSet->AddedPrimitiveSceneInfos) { int32 WordOffset = PrimitiveSceneInfo->GetInstanceSceneDataOffset() / 32; int32 NumWords = FMath::DivideAndRoundUp(PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(), 32); LoadBalancer.Add(WordOffset, NumWords, PrimitiveSceneInfo->GetPersistentIndex().Index); } if (!LoadBalancer.IsEmpty()) { FLoadBalancer::FGPUData LoadBalancerGpuData = LoadBalancer.Upload(GraphBuilder); auto ComputeShader = GetGlobalShaderMap(CacheManager.Scene.GetFeatureLevel())->GetShader(); FGPUSceneResourceParameters GPUSceneParams = CacheManager.Scene.GPUScene.GetShaderParameters(GraphBuilder); for (FRDGBuffer* InstanceStateRDG : InstanceStatesRDG) { FVSMResetInstanceStateCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); PassParameters->InOutViewInstanceState = GraphBuilder.CreateUAV(InstanceStateRDG); PassParameters->GPUScene = GPUSceneParams; PassParameters->StateWordStride = CacheManager.InstanceStateMaskWordStride; LoadBalancerGpuData.AddPass(GraphBuilder, RDG_EVENT_NAME("ResetInstanceState (%d batches)", LoadBalancerGpuData.NumBatches), ComputeShader, PassParameters); } } } if (CacheManager.IsCacheDataAvailable()) { FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector InvalidatingPrimitiveCollector(&CacheManager); // Filter out all updates that are either "add" or has dirty flags to say they affect the bounds. PostUpdateChangeSet->PrimitiveUpdates.ForEachUpdateCommand(ESceneUpdateCommandFilter::AddedUpdated, EPrimitiveUpdateDirtyFlags::AllCulling, [&](const FPrimitiveUpdateCommand& Cmd) { if (Cmd.IsAdd()) { InvalidatingPrimitiveCollector.Added(Cmd.GetSceneInfo()); } else { InvalidatingPrimitiveCollector.UpdatedTransform(Cmd.GetSceneInfo()); } }); TRACE_INT_VALUE(TEXT("Shadow.Virtual.Cache.PostInvalidationInstances"), InvalidatingPrimitiveCollector.Instances.GetTotalNumInstances()); CacheManager.ProcessInvalidations(GraphBuilder, SceneUniforms, InvalidatingPrimitiveCollector); } PostUpdateChangeSet = nullptr; } class FVSMUpdateViewInstanceStateCS : public FGlobalShader { DECLARE_GLOBAL_SHADER(FVSMUpdateViewInstanceStateCS); SHADER_USE_PARAMETER_STRUCT(FVSMUpdateViewInstanceStateCS, FGlobalShader) public: BEGIN_SHADER_PARAMETER_STRUCT(FParameters, ) // SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FVirtualShadowMapUniformParameters, VirtualShadowMap) SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene) SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, OutCacheInstanceAsDynamic) SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, InOutViewInstanceState) SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer, OutInvalidationArgs) SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, OutInvalidationQueue) SHADER_PARAMETER(int32, SceneRendererViewId) SHADER_PARAMETER(int32, StateWordStride) SHADER_PARAMETER(int32, MaxValidInstanceIndex) SHADER_PARAMETER(int32, InvalidationQueueMax) END_SHADER_PARAMETER_STRUCT() static constexpr int ThreadGroupSize = 64; static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters) { return DoesPlatformSupportVirtualShadowMaps(Parameters.Platform); } static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment) { FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment); FVirtualShadowMapArray::SetShaderDefines(OutEnvironment); OutEnvironment.SetDefine(TEXT("THREAD_GROUP_SIZE"), ThreadGroupSize); OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1); //OutEnvironment.CompilerFlags.Add(CFLAG_SkipOptimizations); } }; IMPLEMENT_GLOBAL_SHADER(FVSMUpdateViewInstanceStateCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCacheGPUInvalidation.usf", "VSMUpdateViewInstanceStateCS", SF_Compute); BEGIN_SHADER_PARAMETER_STRUCT(FVirtualShadowMapInvalidationSceneUniforms, RENDERER_API) SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer, CacheInstanceAsDynamic) SHADER_PARAMETER(uint32, InstanceStateViewWordStride) END_SHADER_PARAMETER_STRUCT() DECLARE_SCENE_UB_STRUCT(FVirtualShadowMapInvalidationSceneUniforms, VSMCache, RENDERER_API) class FVirtualShadowMapInvalidationSceneRenderer : public ISceneExtensionRenderer { DECLARE_SCENE_EXTENSION_RENDERER(FVirtualShadowMapInvalidationSceneRenderer, FVirtualShadowMapArrayCacheManager); public: FVirtualShadowMapInvalidationSceneRenderer(FSceneRendererBase& InSceneRenderer, FVirtualShadowMapArrayCacheManager& InCacheManager) : ISceneExtensionRenderer(InSceneRenderer) , CacheManager(InCacheManager) { } virtual void UpdateSceneUniformBuffer(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUniforms) override { FSceneRendererBase& SceneRenderer = GetSceneRenderer(); const int32 NumInstances = SceneRenderer.Scene->GPUScene.GetNumInstances(); const int32 NumViews = SceneRenderer.Views.Num(); int32 CacheInstanceAsDynamicInstanceMaskWordStride = CacheManager.InstanceStateMaskWordStride; // Note: we can't check this here because ... dynamic primitives might have allocated more instances. Those need to be handled somehow wrt this. TBD. I guess they need a bit in the mask? Or we just return 0? // check(CacheInstanceAsDynamicInstanceMaskWordStride * 32 >= NumInstances); // Allocate space for WordStride words for each view, x2 for storing state change mask int32 NumCacheInstanceAsDynamicWords = 2 * CacheInstanceAsDynamicInstanceMaskWordStride * NumViews; // If there are no instances or views, leave SceneUB with defaults. if (NumCacheInstanceAsDynamicWords > 0) { CacheInstanceAsDynamicRDG = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), NumCacheInstanceAsDynamicWords), TEXT("Shadow.Virtual.CacheInstanceAsDynamic")); AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(CacheInstanceAsDynamicRDG), 0u); FVirtualShadowMapInvalidationSceneUniforms VirtualShadowMapInvalidationSceneUniforms; VirtualShadowMapInvalidationSceneUniforms.CacheInstanceAsDynamic = GraphBuilder.CreateSRV(CacheInstanceAsDynamicRDG); VirtualShadowMapInvalidationSceneUniforms.InstanceStateViewWordStride = CacheInstanceAsDynamicInstanceMaskWordStride; SceneUniforms.Set(SceneUB::VSMCache, VirtualShadowMapInvalidationSceneUniforms); } } virtual void PreRender(FRDGBuilder& GraphBuilder) override { // This happens post visibility update so here we know what instances are deemed deforming. // Early out if there are no instances (or no views?) if (CacheInstanceAsDynamicRDG == nullptr) { return; } FSceneRendererBase& SceneRenderer = GetSceneRenderer(); ERHIFeatureLevel::Type FeatureLevel = SceneRenderer.Scene->GetFeatureLevel(); FVSMUpdateViewInstanceStateCS::FParameters SharedParameters; SharedParameters.Scene = SceneRenderer.GetSceneUniforms().GetBuffer(GraphBuilder); const int32 NumInstances = CacheManager.Scene.GPUScene.GetNumInstances(); const int32 NumViews = SceneRenderer.Views.Num(); const int32 InvalidationQueueMax = NumInstances * NumViews; GPUInvalidateInstancesArgsRDG = CreateAndClearIndirectDispatchArgs(GraphBuilder, FeatureLevel, TEXT("Shadow.Virtual.GPUInvalidateInstancesArgs"), FIntVector3(0,1,1), 1u, 4u); // Worst-case allocation of NumInstances X NumView slots, though it's a transient buffer so probably ok GPUInvalidationQueueRDG = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), InvalidationQueueMax), TEXT("Shadow.Virtual.GPUInvalidationQueue")); SharedParameters.OutCacheInstanceAsDynamic = GraphBuilder.CreateUAV(CacheInstanceAsDynamicRDG, ERDGUnorderedAccessViewFlags::SkipBarrier); SharedParameters.MaxValidInstanceIndex = NumInstances; SharedParameters.OutInvalidationArgs = GraphBuilder.CreateUAV(GPUInvalidateInstancesArgsRDG, ERDGUnorderedAccessViewFlags::SkipBarrier); SharedParameters.OutInvalidationQueue = GraphBuilder.CreateUAV(GPUInvalidationQueueRDG, ERDGUnorderedAccessViewFlags::SkipBarrier); SharedParameters.InvalidationQueueMax = InvalidationQueueMax; SharedParameters.StateWordStride = CacheManager.InstanceStateMaskWordStride; bool bWasAnyInvalidationsQueued = false; for (int32 ViewIndex = 0; ViewIndex < SceneRenderer.Views.Num(); ++ViewIndex) { FViewInfo& View = SceneRenderer.Views[ViewIndex]; if (!View.PersistentViewId.IsValid()) { continue; } FVSMUpdateViewInstanceStateCS::FParameters* PassParameters = GraphBuilder.AllocParameters(&SharedParameters); PassParameters->SceneRendererViewId = ViewIndex; check(CacheManager.ViewData.IsValidIndex(View.PersistentViewId.Index)); PassParameters->InOutViewInstanceState = GraphBuilder.CreateUAV(CacheManager.ViewData[View.PersistentViewId.Index].InstanceState.Register(GraphBuilder)); auto ComputeShader = GetGlobalShaderMap(FeatureLevel)->GetShader(); // Run one thread per word FComputeShaderUtils::AddPass( GraphBuilder, RDG_EVENT_NAME("VSMUpdateViewInstanceState"), ComputeShader, PassParameters, FComputeShaderUtils::GetGroupCount(CacheManager.InstanceStateMaskWordStride, FVSMUpdateViewInstanceStateCS::ThreadGroupSize) ); bWasAnyInvalidationsQueued = true; } // Run the resultant invalidations right away, this could be deferred and flushed later instead if desired. if (CacheManager.IsCacheDataAvailable() && bWasAnyInvalidationsQueued && CacheManager.PrevUniformParameters.NumFullShadowMaps > 0) { RDG_EVENT_SCOPE(GraphBuilder, "Shadow.Virtual.ProcessInvalidationsGPU"); FVirtualShadowMapArrayCacheManager::FInvalidationPassCommon InvalidationPassCommon = CacheManager.GetUniformParametersForInvalidation(GraphBuilder, SceneRenderer.GetSceneUniforms()); FProcessInvalidationQueueGPUCS::FParameters* PassParameters = GraphBuilder.AllocParameters(); PassParameters->InvalidationArgs = GraphBuilder.CreateSRV(GPUInvalidateInstancesArgsRDG); PassParameters->InvalidationQueue = GraphBuilder.CreateSRV(GPUInvalidationQueueRDG); PassParameters->InvalidationQueueMax = InvalidationQueueMax; PassParameters->IndirectArgsBuffer = GPUInvalidateInstancesArgsRDG; CacheManager.SetInvalidateInstancePagesParameters(GraphBuilder, InvalidationPassCommon, &PassParameters->InvalidatePagesParameters); FProcessInvalidationQueueGPUCS::FPermutationDomain PermutationVector; PermutationVector.Set(PassParameters->InvalidatePagesParameters.HZBTextureArray != nullptr); auto ComputeShader = GetGlobalShaderMap(FeatureLevel)->GetShader(PermutationVector); FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("ProcessInvalidationQueueGPU"), ComputeShader, PassParameters, GPUInvalidateInstancesArgsRDG, 0); } } private: FRDGBuffer* CacheInstanceAsDynamicRDG = nullptr; FRDGBuffer* GPUInvalidationQueueRDG = nullptr; FRDGBuffer* GPUInvalidateInstancesArgsRDG = nullptr; FVirtualShadowMapArrayCacheManager& CacheManager; }; ISceneExtensionRenderer* FVirtualShadowMapArrayCacheManager::CreateRenderer(FSceneRendererBase& InSceneRenderer, const FEngineShowFlags& EngineShowFlags) { // NOTE: We need this check because shader platform can change during scene destruction so we need to ensure we // don't try and run shaders on a new platform that doesn't support VSMs... if (UseVirtualShadowMaps(Scene.GetShaderPlatform(), Scene.GetFeatureLevel()) && !EngineShowFlags.HitProxies && EngineShowFlags.VirtualShadowMapPersistentData) { return new FVirtualShadowMapInvalidationSceneRenderer(InSceneRenderer, *this); } return nullptr; } static void GetSceneUBDefaultParameters(FVirtualShadowMapInvalidationSceneUniforms& OutParameters, FRDGBuilder& GraphBuilder) { OutParameters.CacheInstanceAsDynamic = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, 4u)); OutParameters.InstanceStateViewWordStride = 0u; } IMPLEMENT_SCENE_UB_STRUCT(FVirtualShadowMapInvalidationSceneUniforms, VSMCache, GetSceneUBDefaultParameters); #undef LOCTEXT_NAMESPACE