Files
UnrealEngine/Engine/Shaders/Private/VirtualShadowMaps/VirtualShadowMapThrottle.usf
2025-05-18 13:04:45 +08:00

162 lines
6.4 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "../Common.ush"
#include "VirtualShadowMapHandle.ush"
#include "VirtualShadowMapProjectionStructs.ush"
#include "/Engine/Shared/VirtualShadowMapDefinitions.h"
#include "/Engine/Shared/LightDefinitions.h"
StructuredBuffer<uint> PrevThrottleBuffer;
StructuredBuffer<uint> PrevNanitePerformanceFeedback;
StructuredBuffer<FNextVirtualShadowMapData> NextVirtualShadowMapData;
uint NextVirtualShadowMapDataCount;
uint NumThrottleBufferEntries;
RWStructuredBuffer<uint> OutThrottleBuffer;
RWByteAddressBuffer InOutProjectionData;
float DynResThrottleStrength;
float ThrottleLoadBudget;
float ThrottleEMAHistoryWeight;
float ThrottleMaxBiasDirectional;
float ThrottleMaxBiasLocal;
[numthreads(64, 1, 1)]
void ProcessPrevFramePerfDataCS(uint DispatchThreadId : SV_DispatchThreadID)
{
const uint TotalClustersHW = PrevNanitePerformanceFeedback[VSM_NPF_HEADER_TOTAL_HW_CLUSTERS];
const uint TotalClustersSW = PrevNanitePerformanceFeedback[VSM_NPF_HEADER_TOTAL_SW_CLUSTERS];
if (DispatchThreadId == 0)
{
OutThrottleBuffer[VSM_TB_HEADER_TOTAL_HW_CLUSTERS] = TotalClustersHW;
OutThrottleBuffer[VSM_TB_HEADER_TOTAL_SW_CLUSTERS] = TotalClustersSW;
}
if(DispatchThreadId < NextVirtualShadowMapDataCount)
{
FVirtualShadowMapHandle PrevVSMHandle = FVirtualShadowMapHandle::MakeFromId(DispatchThreadId);
const FNextVirtualShadowMapData NextVSMData = NextVirtualShadowMapData[PrevVSMHandle.GetDataIndex()];
FVirtualShadowMapHandle NextVSMHandle = FVirtualShadowMapHandle::MakeFromId(NextVSMData.NextVirtualShadowMapId);
if (NextVSMHandle.IsValid())
{
const uint NPFOffset = VSM_NPF_SIZEOF_HEADER + VSM_NPF_SIZEOF_ENTRY * PrevVSMHandle.GetDataIndex();
const uint ClustersHW = PrevNanitePerformanceFeedback[NPFOffset + VSM_NPF_ENTRY_HW_CLUSTERS];
const uint ClustersSW = PrevNanitePerformanceFeedback[NPFOffset + VSM_NPF_ENTRY_SW_CLUSTERS];
const uint ThrottleBufferOffset = VSM_TB_SIZEOF_HEADER + VSM_TB_SIZEOF_ENTRY * NextVSMHandle.GetDataIndex();
OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_HW_CLUSTERS] = ClustersHW;
OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_SW_CLUSTERS] = ClustersSW;
OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_THROTTLE] = PrevThrottleBuffer[VSM_TB_SIZEOF_HEADER + VSM_TB_SIZEOF_ENTRY * PrevVSMHandle.GetDataIndex() + VSM_TB_ENTRY_THROTTLE];
InterlockedMax(OutThrottleBuffer[VSM_TB_HEADER_MAX_HW_CLUSTERS], ClustersHW);
InterlockedMax(OutThrottleBuffer[VSM_TB_HEADER_MAX_SW_CLUSTERS], ClustersSW);
}
}
}
float CalculateCostHeuristic(const float NaniteClustersHW, const float NaniteClustersSW)
{
// todo: can significantly improve the fit of this heuristic with page count data (requested, cached, invalidated)
return 0.00012176f * NaniteClustersHW + 0.00002700f * NaniteClustersSW;
}
void ApplyResolutionLODBias(RWByteAddressBuffer ProjectionData, FVirtualShadowMapHandle VirtualShadowMapHandle, float ExtraBias)
{
FVirtualShadowMapProjectionShaderData Data;
// Load
uint Offset = VirtualShadowMapHandle.GetDataIndex() * VSM_PSD_STRIDE + VSM_PSD_OFFSET_RESOLUTION_LOD_BIAS;
Data.ResolutionLodBias = asfloat(ProjectionData.Load(Offset));
// Modify
Data.ResolutionLodBias += ExtraBias;
// Store
ProjectionData.Store(Offset, asuint(Data.ResolutionLodBias));
}
uint GetLightType(RWByteAddressBuffer ProjectionData, FVirtualShadowMapHandle VirtualShadowMapHandle)
{
uint Offset = VirtualShadowMapHandle.GetDataIndex() * VSM_PSD_STRIDE + VSM_PSD_OFFSET_LIGHT_TYPE;
return ProjectionData.Load(Offset);
}
[numthreads(64, 1, 1)]
void UpdateThrottleParametersCS(uint DispatchThreadId : SV_DispatchThreadID)
{
const uint TotalClustersHW = OutThrottleBuffer[VSM_TB_HEADER_TOTAL_HW_CLUSTERS];
const uint TotalClustersSW = OutThrottleBuffer[VSM_TB_HEADER_TOTAL_SW_CLUSTERS];
const uint MaxClustersHW = OutThrottleBuffer[VSM_TB_HEADER_MAX_HW_CLUSTERS];
const uint MaxClustersSW = OutThrottleBuffer[VSM_TB_HEADER_MAX_SW_CLUSTERS];
float GlobalThrottleIntensity;
{
float TotalCostHeuristic = CalculateCostHeuristic(TotalClustersHW, TotalClustersSW);
bool bThrottleBasedOnDynRes = DynResThrottleStrength >= 0;
if (bThrottleBasedOnDynRes)
{
GlobalThrottleIntensity = DynResThrottleStrength;
}
else
{
// hand-tuned parameters
const float ThrottleResponse = 0.5f;
const float ThrottleMaxStepUp = 0.2f;
const float ThrottleMaxStepDown = -0.01f;
float TotalThrottleHeuristic = TotalCostHeuristic - ThrottleLoadBudget;
float ThrottleModifier = clamp(TotalThrottleHeuristic * ThrottleResponse, ThrottleMaxStepDown, ThrottleMaxStepUp);
float PrevThrottleIntensity = asfloat(PrevThrottleBuffer[VSM_TB_HEADER_TOTAL_THROTTLE]);
GlobalThrottleIntensity = saturate(PrevThrottleIntensity + ThrottleModifier);
}
}
if (DispatchThreadId == 0)
{
OutThrottleBuffer[VSM_TB_HEADER_TOTAL_THROTTLE] = asuint(GlobalThrottleIntensity);
}
if(DispatchThreadId < NumThrottleBufferEntries)
{
FVirtualShadowMapHandle VSMHandle = FVirtualShadowMapHandle::MakeFromId(DispatchThreadId);
uint ThrottleBufferOffset = VSM_TB_SIZEOF_HEADER + VSM_TB_SIZEOF_ENTRY * VSMHandle.GetDataIndex();
uint ClustersHW = OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_HW_CLUSTERS];
uint ClustersSW = OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_SW_CLUSTERS];
float ThrottleIntensity;
if (ClustersHW + ClustersSW == 0)
{
// Likely no data available, fall back to global throttle value
ThrottleIntensity = GlobalThrottleIntensity;
}
else
{
float PrevThrottleIntensity = asfloat(OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_THROTTLE]);
float MaxCostHeuristic = CalculateCostHeuristic(MaxClustersHW, MaxClustersSW);
float CostHeuristic = CalculateCostHeuristic(ClustersHW, ClustersSW);
float CostContibutionHeuristic = CostHeuristic / MaxCostHeuristic;
ThrottleIntensity = saturate(CostContibutionHeuristic * GlobalThrottleIntensity);
ThrottleIntensity = ThrottleIntensity * (1.0 - ThrottleEMAHistoryWeight) + PrevThrottleIntensity * ThrottleEMAHistoryWeight;
}
uint LightType = GetLightType(InOutProjectionData, VSMHandle);
float MaxBias = (LightType == LIGHT_TYPE_DIRECTIONAL) ? ThrottleMaxBiasDirectional : ThrottleMaxBiasLocal;
float ExtraBias = ThrottleIntensity * MaxBias;
ApplyResolutionLODBias(InOutProjectionData, VSMHandle, ExtraBias);
OutThrottleBuffer[ThrottleBufferOffset + VSM_TB_ENTRY_THROTTLE] = asuint(ThrottleIntensity);
}
}