Files
UnrealEngine/Engine/Shaders/Private/HairStrands/HairStrandsVoxelRasterCompute.usf
2025-05-18 13:04:45 +08:00

1327 lines
50 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#define HAIR_STRANDS_PARAMETERS 1
#include "../Common.ush"
#include "../Common.ush"
#include "../Matrices.ush"
#include "../SceneTextureParameters.ush"
#include "../PositionReconstructionCommon.ush"
#include "../DeferredShadingCommon.ush"
#include "../ShaderPrint.ush"
#include "HairStrandsAABBCommon.ush"
#include "HairStrandsVisibilityCommon.ush"
#include "HairStrandsClusterCommon.ush"
#include "HairStrandsVertexFactoryCommon.ush"
#include "HairStrandsVoxelPageCommon.ush"
#include "HairStrandsDeepShadowCommonStruct.ush"
#define GLOBAL_PAGE_COUNTER_INDEX 0
#define GROUPS_PAGE_COUNTER_INDEX 1
///////////////////////////////////////////////////////////////////////////////////////////////////
// Page allocation
#if SHADER_ALLOCATEPAGEINDEX
float CPUPageWorldSize;
float CPUVoxelWorldSize;
uint bUseCPUVoxelWorldSize; // When adaptive voxel size is disabled, we use CPU voxel size value
uint TotalPageIndexCount; // This is the max page index count;
uint PageResolution; // Resolution of a page
uint MacroGroupCount;
uint IndirectDispatchGroupSize;
uint bDoesMacroGroupSupportVoxelization;
// For testing parity with CPU version
float4 CPU_TranslatedWorldMinAABB[MAX_HAIR_MACROGROUP_COUNT];
float4 CPU_TranslatedWorldMaxAABB[MAX_HAIR_MACROGROUP_COUNT];
int4 CPU_PageIndexResolution[MAX_HAIR_MACROGROUP_COUNT];
uint CPU_bUseCPUData;
Buffer<float> GPUVoxelWorldSize;
Buffer<float> MacroGroupVoxelSizeBuffer;
Buffer<int> MacroGroupAABBBuffer;
RWBuffer<int> MacroGroupVoxelAlignedAABBBuffer;
RWBuffer<uint4> OutPageIndexResolutionAndOffsetBuffer;
RWBuffer<uint> OutPageIndexAllocationIndirectBufferArgs;
#if GROUP_SIZE != MAX_HAIR_MACROGROUP_COUNT
#error MAX_HAIR_MACROGROUP_COUNT needs to match MAX_HAIR_MACROGROUP_COUNT
#endif
#define INVALID_OFFSET 0xFFFFFFFF
groupshared uint PageIndexOffsets[MAX_HAIR_MACROGROUP_COUNT];
// This code assume we have less than 32 macro group (which fit into a single CU/SM)
[numthreads(GROUP_SIZE, 1, 1)]
void AllocatePageIndex(uint2 DispatchThreadId : SV_DispatchThreadID)
{
const uint MacroGroupId = DispatchThreadId.x;
FHairAABB Bound = InitHairAABB();
float PageWorldSize = CPUPageWorldSize;
bool bIsValid = MacroGroupId < MacroGroupCount;
if (bIsValid)
{
const bool bSupportVoxelization = (bDoesMacroGroupSupportVoxelization >> MacroGroupId) & 0x1;
if (CPU_bUseCPUData > 0)
{
Bound.Min = CPU_TranslatedWorldMinAABB[MacroGroupId].xyz;
Bound.Max = CPU_TranslatedWorldMaxAABB[MacroGroupId].xyz;
}
else
{
Bound = ReadHairAABB(MacroGroupId, MacroGroupAABBBuffer);
}
const float VoxelWorldSize = QuantizeVoxelWorldSize(bUseCPUVoxelWorldSize ? CPUVoxelWorldSize : max(GPUVoxelWorldSize[0], MacroGroupVoxelSizeBuffer[MacroGroupId]));
PageWorldSize = VoxelWorldSize * PageResolution;
if (any(Bound.Min > Bound.Max) || !bSupportVoxelization)
{
Bound.Min = 0;
Bound.Max = 0;
bIsValid = false;
}
}
// Page index allocation
int3 PageIndexResolution = 0;
{
// Snap the max AABB to the voxel size.
// The contents of MacroGroupAABBBuffer (tight fitting AABBs) and MacroGroupVoxelAlignedAABBBuffer diverge here
// because the macro group AABBs for voxelization need to be snapped to the voxel page boundary.
// Allocate enough pages to cover the AABB, where page (0,0,0) origin sit on MinAABB.
if (bIsValid)
{
float3 MacroGroupSize = Bound.Max - Bound.Min;
if (CPU_bUseCPUData > 0)
{
PageIndexResolution = CPU_PageIndexResolution[MacroGroupId].xyz;
}
else
{
PageIndexResolution = ceil(MacroGroupSize / PageWorldSize);
}
Bound.Max = (PageIndexResolution * PageWorldSize) + Bound.Min; // Snap Bound's Max to page size
}
const uint TotalPageIndex = PageIndexResolution.x * PageIndexResolution.y * PageIndexResolution.z;
PageIndexOffsets[MacroGroupId] = TotalPageIndex;
GroupMemoryBarrierWithGroupSync();
// Postfix sum to instance group are always ordered by index
if (DispatchThreadId.x == 0)
{
bool bValidAllocation = true;
uint PageIndexOffset = 0;
for (uint LocalMacroGroupId = 0; LocalMacroGroupId < MacroGroupCount; ++LocalMacroGroupId)
{
const uint PageCount = PageIndexOffsets[LocalMacroGroupId];
bValidAllocation = bValidAllocation && (PageIndexOffset + PageCount <= TotalPageIndexCount);
PageIndexOffsets[LocalMacroGroupId] = bValidAllocation ? PageIndexOffset : INVALID_OFFSET;
PageIndexOffset += PageCount;
}
}
GroupMemoryBarrierWithGroupSync();
const uint PageIndexOffset = PageIndexOffsets[MacroGroupId];
bIsValid = bIsValid && (PageIndexOffset != INVALID_OFFSET);
if (bIsValid)
{
OutPageIndexResolutionAndOffsetBuffer[MacroGroupId] = uint4(PageIndexResolution, PageIndexOffset);
WriteHairAABB(MacroGroupId, Bound, MacroGroupVoxelAlignedAABBBuffer);
}
else
{
// Clear all output if the allocation is not valid
OutPageIndexResolutionAndOffsetBuffer[MacroGroupId] = uint4(0, 0, 0, 0);
WriteDispatchIndirectArgs(OutPageIndexAllocationIndirectBufferArgs, MacroGroupId, 0, 1, 1);
}
}
if (!bIsValid)
{
return;
}
// Prepare indirect buffer for doing the actual page index allocation and filling the page index
{
const uint AllocatedPageIndexCount = PageIndexResolution.x * PageIndexResolution.y * PageIndexResolution.z;
WriteDispatchIndirectArgs(OutPageIndexAllocationIndirectBufferArgs, MacroGroupId, DivideAndRoundUp(AllocatedPageIndexCount, IndirectDispatchGroupSize), 1, 1);
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_MARKVALID_PREPARE
uint InstanceRegisteredIndex;
uint ClusterOffset;
uint ClusterCount;
uint MacroGroupId;
uint bUseMacroGroupBoundCPU;
float3 MacroGroupBoundCPU_TranslatedWorldMinAABB;
float3 MacroGroupBoundCPU_TranslatedWorldMaxAABB;
float3 TranslatedWorldOffsetCorrection;
Buffer<int> GroupAABBsBuffer;
Buffer<int> ClusterAABBsBuffer;
Buffer<int> MacroGroupVoxelAlignedAABBBuffer;
Buffer<uint4> PageIndexResolutionAndOffsetBuffer;
RWBuffer<uint> OutValidPageIndexBuffer;
// PageIndexBuffer is sampled with linear coordinate computed from the 3d page coordinate. VALID NODE ARE NOT COMPACTED. It contains the LINEAR PAGE INDEX (to map to the 3d volume).
// PageIndexCoordBuffer is sampled with linear coordinate for allocated nodes. VALID NODE ARE COMPACTED. It contains the 3d page coordinate and ClustedId. Only used for opaque voxel injection.
#if PERMUTATION_USE_CLUSTER
[numthreads(GROUP_SIZE, 1, 1)]
void MarkValid_PrepareCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
const uint ClusterIndex = DispatchThreadId.x;
if (ClusterIndex >= ClusterCount)
{
return;
}
const uint BaseClusterIndex = 6 * (ClusterOffset + ClusterIndex);
FHairAABB ClusterBound;
ClusterBound.Min.x = float(ClusterAABBsBuffer[BaseClusterIndex + 0]);
ClusterBound.Min.y = float(ClusterAABBsBuffer[BaseClusterIndex + 1]);
ClusterBound.Min.z = float(ClusterAABBsBuffer[BaseClusterIndex + 2]);
ClusterBound.Max.x = float(ClusterAABBsBuffer[BaseClusterIndex + 3]);
ClusterBound.Max.y = float(ClusterAABBsBuffer[BaseClusterIndex + 4]);
ClusterBound.Max.z = float(ClusterAABBsBuffer[BaseClusterIndex + 5]);
if (any(ClusterBound.Min >= ClusterBound.Max))
return;
if (any(!IsFinite(ClusterBound.Min)) || any(!IsFinite(ClusterBound.Max)))
return;
const uint4 PageIndexResolutionAndOffset = PageIndexResolutionAndOffsetBuffer.Load(MacroGroupId);
FHairAABB MacroGroupBound = ReadHairAABB(MacroGroupId, MacroGroupVoxelAlignedAABBBuffer);
const int3 PageIndexResolution = PageIndexResolutionAndOffset.xyz;
const uint PageIndexOffset = PageIndexResolutionAndOffset.w;
if (any(MacroGroupBound.Min >= MacroGroupBound.Max))
return;
if (any(!IsFinite(MacroGroupBound.Min)) || any(!IsFinite(MacroGroupBound.Max)))
return;
uint3 MinCoord = PositionToCoord(ClusterBound.Min, MacroGroupBound.Min, MacroGroupBound.Max, PageIndexResolution);
uint3 MaxCoord = PositionToCoord(ClusterBound.Max, MacroGroupBound.Min, MacroGroupBound.Max, PageIndexResolution);
uint3 PageIndexResolutionMinusOne = uint3(PageIndexResolution - 1);
MinCoord = clamp(MinCoord, uint3(0, 0, 0), PageIndexResolutionMinusOne);
MaxCoord = clamp(MaxCoord, uint3(0, 0, 0), PageIndexResolutionMinusOne);
const uint3 CoordResolution = (MaxCoord - MinCoord) + 1;
const uint ScatterCount = CoordResolution.x * CoordResolution.y * CoordResolution.z;
// Arbitrary large number (e.g., 100x10x10 pages covered)
// This acts as guards against degenerated case, where the sim would deformed strands large position making the cluster arbitratry large.
if (ScatterCount > 10000)
return;
if (any(!IsFinite(float3(MinCoord))))
return;
if (any(!IsFinite(float3(MaxCoord))))
return;
// Find a good sweet spot
for (uint z = MinCoord.z; z <= MaxCoord.z; ++z)
{
for (uint y = MinCoord.y; y <= MaxCoord.y; ++y)
{
for (uint x = MinCoord.x; x <= MaxCoord.x; ++x)
{
const uint3 PageIndexCoord = uint3(x, y, z);
const uint LinearPageIndexCoord = CoordToIndex(PageIndexCoord, PageIndexResolution, PageIndexOffset);
InterlockedOr(OutValidPageIndexBuffer[LinearPageIndexCoord], 1u);
}
}
}
}
#else // PERMUTATION_USE_CLUSTER
[numthreads(GROUP_SIZE, GROUP_SIZE, GROUP_SIZE)]
void MarkValid_PrepareCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
const uint4 PageIndexResolutionAndOffset = PageIndexResolutionAndOffsetBuffer.Load(MacroGroupId);
const uint3 PageIndexResolution = PageIndexResolutionAndOffset.xyz;
const uint PageIndexOffset = PageIndexResolutionAndOffset.w;
const uint3 Coord = DispatchThreadId;
if (any(Coord >= PageIndexResolution))
return;
FHairAABB MacroGroupBound;
FHairAABB GroupBound;
if (bUseMacroGroupBoundCPU)
{
MacroGroupBound.Min = MacroGroupBoundCPU_TranslatedWorldMinAABB;
MacroGroupBound.Max = MacroGroupBoundCPU_TranslatedWorldMaxAABB;
// HAIR_TODO: Can we have reliable primitive AABB to have tigher bound?
GroupBound.Min = MacroGroupBoundCPU_TranslatedWorldMinAABB;
GroupBound.Max = MacroGroupBoundCPU_TranslatedWorldMaxAABB;
}
else
{
MacroGroupBound = ReadHairAABB(MacroGroupId, MacroGroupVoxelAlignedAABBBuffer);
GroupBound = ReadHairAABB(InstanceRegisteredIndex, GroupAABBsBuffer);
// Correct View0 translated world offset to ViewX translated world offset
GroupBound.Min += TranslatedWorldOffsetCorrection;
GroupBound.Max += TranslatedWorldOffsetCorrection;
}
const uint3 MinCoord = PositionToCoord(GroupBound.Min, MacroGroupBound.Min, MacroGroupBound.Max, PageIndexResolution);
const uint3 MaxCoord = PositionToCoord(GroupBound.Max, MacroGroupBound.Min, MacroGroupBound.Max, PageIndexResolution);
if (any(!IsFinite(float3(MinCoord))))
return;
if (any(!IsFinite(float3(MaxCoord))))
return;
if (all(Coord >= MinCoord) && all(Coord <= MaxCoord))
{
const uint3 PageIndexCoord = uint3(Coord.x, Coord.y, Coord.z);
const uint LinearPageIndexCoord = CoordToIndex(PageIndexCoord, PageIndexResolution, PageIndexOffset);
InterlockedOr(OutValidPageIndexBuffer[LinearPageIndexCoord], 1u);
}
}
#endif // PERMUTATION_USE_CLUSTER
#endif // SHADER_MARKVALID_PREPARE
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_ALLOCATE
uint MacroGroupId;
uint PageCount;
uint CPU_PageIndexCount;
uint CPU_PageIndexOffset;
uint3 CPU_PageIndexResolution;
Buffer<uint4> PageIndexResolutionAndOffsetBuffer;
Buffer<int> IndirectBufferArgs;
RWBuffer<uint> RWPageIndexGlobalCounter;
RWBuffer<uint> RWPageIndexBuffer;
RWBuffer<uint> RWPageToPageIndexBuffer;
RWBuffer<uint4> RWPageIndexCoordBuffer;
groupshared uint LocalCounter;
groupshared uint GroupBase[2];
[numthreads(GROUP_SIZE, 1, 1)]
void AllocateCS(uint GroupIndex : SV_GroupIndex, uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (GroupIndex == 0)
{
GroupBase[0] = 0;
GroupBase[1] = 0;
LocalCounter = 0;
}
GroupMemoryBarrierWithGroupSync();
#if PERMUTATION_GPU_DRIVEN == 1
const uint4 PageIndexResolutionAndOffset = PageIndexResolutionAndOffsetBuffer.Load(MacroGroupId);
const uint3 PageIndexResolution = PageIndexResolutionAndOffset.xyz;
const uint PageIndexOffset = PageIndexResolutionAndOffset.w;
const uint PageIndexCount = PageIndexResolution.x * PageIndexResolution.y * PageIndexResolution.z;
#else
const uint3 PageIndexResolution = CPU_PageIndexResolution;
const uint PageIndexOffset = CPU_PageIndexOffset;
const uint PageIndexCount = CPU_PageIndexCount;
#endif
const uint GridIndex = DispatchThreadId.x + PageIndexOffset;
bool bIsValid = false;
if (DispatchThreadId.x < PageIndexCount)
{
bIsValid = RWPageIndexBuffer[GridIndex] > 0;
}
uint Offset = 0;
if (bIsValid)
{
InterlockedAdd(LocalCounter, 1u, Offset);
}
GroupMemoryBarrierWithGroupSync();
if (GroupIndex == 0)
{
// * Add page count to global counter for global tacking
// * Add page count to the group for per group work
InterlockedAdd(RWPageIndexGlobalCounter[GLOBAL_PAGE_COUNTER_INDEX], LocalCounter, GroupBase[0]);
InterlockedAdd(RWPageIndexGlobalCounter[GROUPS_PAGE_COUNTER_INDEX + MacroGroupId], LocalCounter, GroupBase[1]);
}
GroupMemoryBarrierWithGroupSync();
if (bIsValid)
{
const uint PageIndex0 = GroupBase[0] + Offset; // Global page index
const uint PageIndex1 = GroupBase[1] + Offset; // Group page index
const bool bIsAllocationValid = PageIndex0 < PageCount;
RWPageIndexBuffer[GridIndex] = bIsAllocationValid ? PageIndex0 : INVALID_VOXEL_PAGE_INDEX;
if (bIsAllocationValid)
{
RWPageToPageIndexBuffer[PageIndex0] = GridIndex;
}
// Output the coordinates of the allocated page for indirect dispatch usage
// If the allocated failed (run out of page), then we mark the IndexCoord with a invalid GroupID
const uint LinearIndex = DispatchThreadId.x;
const uint3 PageIndexCoord = IndexToCoord(LinearIndex, PageIndexResolution);
RWPageIndexCoordBuffer[PageIndexOffset + PageIndex1] = uint4(PageIndexCoord, bIsAllocationValid ? MacroGroupId : INVALID_MACRO_GROUP_ID);
}
// Mark page index as invalid
// Insure that even if write more (due to larger dispatch count that needed), we do not stomp other instance group page index
else if (DispatchThreadId.x < PageIndexCount)
{
RWPageIndexBuffer[GridIndex] = INVALID_VOXEL_PAGE_INDEX;
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_ADDDESC
float3 CPU_TranslatedWorldMinAABB;
uint MacroGroupId;
float3 CPU_TranslatedWorldMaxAABB;
uint CPU_PageIndexOffset;
int3 CPU_PageIndexResolution;
float CPU_VoxelWorldSize;
uint bUseCPUVoxelWorldSize; // When adaptive voxel size is disabled, we use CPU voxel size value
Buffer<float> GPU_VoxelWorldSize;
Buffer<int> MacroGroupVoxelAlignedAABBBuffer;
Buffer<float> MacroGroupVoxelSizeBuffer;
Buffer<uint4> PageIndexResolutionAndOffsetBuffer;
RWStructuredBuffer<FPackedVirtualVoxelNodeDesc> OutNodeDescBuffer;
[numthreads(1, 1, 1)]
void AddDescCS(uint GroupIndex : SV_GroupIndex, uint3 DispatchThreadId : SV_DispatchThreadID)
{
FVirtualVoxelNodeDesc Node;
#if PERMUTATION_GPU_DRIVEN == 1
const uint4 PageIndexResolutionAndOffset = PageIndexResolutionAndOffsetBuffer.Load(MacroGroupId);
const FHairAABB TranslatedWorldBound = ReadHairAABB(MacroGroupId, MacroGroupVoxelAlignedAABBBuffer);
const float VoxelWorldSize = MacroGroupVoxelSizeBuffer[MacroGroupId];
Node.TranslatedWorldMinAABB = TranslatedWorldBound.Min;
Node.TranslatedWorldMaxAABB = TranslatedWorldBound.Max;
Node.PageIndexResolution = PageIndexResolutionAndOffset.xyz;
Node.PageIndexOffset = PageIndexResolutionAndOffset.w;
Node.VoxelWorldSize = bUseCPUVoxelWorldSize ? CPU_VoxelWorldSize : max(GPU_VoxelWorldSize[0], VoxelWorldSize);
#else
Node.TranslatedWorldMinAABB = CPU_TranslatedWorldMinAABB;
Node.TranslatedWorldMaxAABB = CPU_TranslatedWorldMaxAABB;
Node.PageIndexResolution = CPU_PageIndexResolution;
Node.PageIndexOffset = CPU_PageIndexOffset;
Node.VoxelWorldSize = CPU_VoxelWorldSize;
#endif
FPackedVirtualVoxelNodeDesc PackedNode = PackVoxelNode(Node);
OutNodeDescBuffer[MacroGroupId] = PackedNode;
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
// Preapare indirect buffer
#if SHADER_ADDINDIRECTBUFFER
uint PageResolution;
uint MacroGroupCount;
int3 IndirectGroupSize;
Buffer<uint> PageIndexGlobalCounter;
RWBuffer<uint> OutIndirectArgsBuffer;
void WriteArgs(uint WriteIndex, uint AllocatedPageCount)
{
const uint VoxelCountPerPage = PageResolution * PageResolution * PageResolution;
const uint DispatchCountX = DivideAndRoundUp(VoxelCountPerPage, IndirectGroupSize.x);
const uint DispatchCountZ = DivideAndRoundUp(AllocatedPageCount, IndirectGroupSize.z);
WriteDispatchIndirectArgs(OutIndirectArgsBuffer, WriteIndex, DispatchCountX, 1, DispatchCountZ);
}
[numthreads(GROUP_SIZE, 1, 1)]
void AddIndirectBufferCS(uint GroupIndex : SV_GroupIndex, uint3 DispatchThreadId : SV_DispatchThreadID)
{
// Total pages allocated across *all* macro groups
if (DispatchThreadId.x == 0)
{
WriteArgs(GLOBAL_PAGE_COUNTER_INDEX, PageIndexGlobalCounter[GLOBAL_PAGE_COUNTER_INDEX]);
}
// Pages allocated for a particular macro group
const uint MacroGroupId = DispatchThreadId.x;
if (MacroGroupId < MacroGroupCount)
{
WriteArgs(GROUPS_PAGE_COUNTER_INDEX + MacroGroupId, PageIndexGlobalCounter[GROUPS_PAGE_COUNTER_INDEX + MacroGroupId]);
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
// Indirect clear
#if SHADER_INDPAGECLEAR
Buffer<uint> PageIndexGlobalCounter;
uint VirtualVoxelParams_PageResolution;
int3 VirtualVoxelParams_PageCountResolution;
Buffer<uint4> VirtualVoxelParams_PageIndexCoordBuffer;
RWTexture3D<uint> OutPageTexture;
[numthreads(GROUP_SIZE_X, 1, GROUP_SIZE_Z)]
void VoxelIndPageClearCS(uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID)
{
const uint TotalAllocatedPageCount = PageIndexGlobalCounter[GLOBAL_PAGE_COUNTER_INDEX];
const uint VoxelCountPerPage = VirtualVoxelParams_PageResolution * VirtualVoxelParams_PageResolution * VirtualVoxelParams_PageResolution;
const uint LinearVoxelCoord = DispatchThreadId.x;
const uint AllocatedPageIndex = DispatchThreadId.z;
if (AllocatedPageIndex < TotalAllocatedPageCount && LinearVoxelCoord < VoxelCountPerPage)
{
const uint3 VoxelCoordOffset = IndexToCoord(LinearVoxelCoord, VirtualVoxelParams_PageResolution.xxx);
const uint PageIndex = AllocatedPageIndex; // PageIndexBuffer is not needed, we already know those tiles are allocated linearly in 3D within OutPageTexture.
const uint3 PageCoord = IndexToCoord(PageIndex, VirtualVoxelParams_PageCountResolution);
const int3 VoxelPageBase = PageCoord * VirtualVoxelParams_PageResolution;
const int3 VoxelCoord = VoxelPageBase + VoxelCoordOffset;
OutPageTexture[VoxelCoord] = 0;
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_ADAPTIVE_FEEDBACK
#define DEBUG_ENABLE 0
#if DEBUG_ENABLE
#include "../ShaderPrint.ush"
#endif
uint CPUAllocatedPageCount;
float CPUMinVoxelWorldSize;
float AdaptiveCorrectionThreshold;
float AdaptiveCorrectionSpeed;
Buffer<uint> PageIndexGlobalCounter;
Buffer<float> CurrGPUMinVoxelWorldSize;
RWBuffer<float> NextGPUMinVoxelWorldSize;
float RoundHairVoxelSize(float In)
{
// Round voxel size to 0.01f to avoid oscillation issue
return floor(In * 1000.f + 0.5f) * 0.001f;
}
[numthreads(1, 1, 1)]
void FeedbackCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
const float CurrVoxelWorldSize = RoundHairVoxelSize(CurrGPUMinVoxelWorldSize[0]);
// Voxel pages are represent a volume. To derive a better estimate of the ratio by which voxel size needs to be scale,
// compute the cubic root of this ratio.
//
// AllocatedPage AllocatedRes^3
// ------------- = -------------- = VolumeRatio = LinearRatio^3
// MaxPage MaxRes^3
// Ratio used for predicting voxel size increase
const uint GPUAllocatedPageCount = PageIndexGlobalCounter[GLOBAL_PAGE_COUNTER_INDEX];
const float VolumeRatio = float(GPUAllocatedPageCount) / float(CPUAllocatedPageCount);
const float LinearRatio = pow(VolumeRatio, 1.f / 3.f);
// Ratio used for predicting voxel size decrease (i.e. when requested allocation fit,
// but the voxel size does not match the (more precise) target).
// In this case, we add a threshold/margin to to the target, so that there is no oscillation.
const float VolumeRatio_Thres = float(GPUAllocatedPageCount) / float(CPUAllocatedPageCount * AdaptiveCorrectionThreshold);
const float LinearRatio_Thres = pow(max(VolumeRatio_Thres, 0.f), 1.f / 3.f);
// If the page pool is not large enough increase voxel size
float NextVoxelWorldSize = CPUMinVoxelWorldSize;
if (GPUAllocatedPageCount > CPUAllocatedPageCount)
{
//NextVoxelWorldSize = CurrVoxelWorldSize * LinearRatio;
NextVoxelWorldSize = CurrVoxelWorldSize * LinearRatio_Thres;
}
// If the page pool is large enough but the voxel are larger than the requested size decrease voxel size
else if (GPUAllocatedPageCount < CPUAllocatedPageCount && CurrVoxelWorldSize > CPUMinVoxelWorldSize)
{
const float TargetVoxelWorldSize = CurrVoxelWorldSize * LinearRatio_Thres;
NextVoxelWorldSize = max(CPUMinVoxelWorldSize, lerp(CurrVoxelWorldSize, TargetVoxelWorldSize, AdaptiveCorrectionSpeed));
}
//else if (GPUAllocatedPageCount > CPUAllocatedPageCount * AdaptiveCorrectionThreshold)
//{
// const float TargetVoxelWorldSize = CurrVoxelWorldSize * LinearRatio_Thres;
// NextVoxelWorldSize = max(CPUMinVoxelWorldSize, lerp(CurrVoxelWorldSize, TargetVoxelWorldSize, AdaptiveCorrectionSpeed));
//}
else
{
NextVoxelWorldSize = CPUMinVoxelWorldSize;
}
// Clamp voxel size into a reasonable amount (e.g. 0.1mm - 100mm)
const float ClampMinVoxelWorldSize = 0.01f;
const float ClampMaxVoxelWorldSize = 10.0f;
NextVoxelWorldSize = clamp(RoundHairVoxelSize(NextVoxelWorldSize), ClampMinVoxelWorldSize, ClampMaxVoxelWorldSize);
// Debug
#if DEBUG_ENABLE
FFontColor CPUColor = FontEmerald;
FFontColor GPUColor = FontOrange;
FFontColor CstColor = FontSilver;
FShaderPrintContext Context = InitShaderPrintContext(true, uint2(700, 50));
Print(Context, TEXT(" ------------------------------- "), FontSilver); Newline(Context);
Print(Context, TEXT("| Allocations |"), FontSilver); Newline(Context);
Print(Context, TEXT(" ------------------------------- "), FontSilver); Newline(Context);
Print(Context, TEXT("GPU Allocated "), GPUColor);
Print(Context, GPUAllocatedPageCount, GPUColor);
Newline(Context);
Print(Context, TEXT("CPU Allocated "), CPUColor);
Print(Context, CPUAllocatedPageCount, CPUColor);
Newline(Context);
Print(Context, TEXT("GPU Curr Min. Size "), GPUColor);
Print(Context, CurrVoxelWorldSize, GPUColor);
Newline(Context);
Print(Context, TEXT("GPU Next Min. Size "), GPUColor);
Print(Context, NextVoxelWorldSize, GPUColor);
Newline(Context);
Print(Context, TEXT("CPU Min. Size "), CPUColor);
Print(Context, CPUMinVoxelWorldSize, CPUColor);
Newline(Context);
Print(Context, TEXT("Correction Thres. "), CstColor);
Print(Context, AdaptiveCorrectionThreshold, CstColor);
Newline(Context);
Print(Context, TEXT("Correction Speed "), CstColor);
Print(Context, AdaptiveCorrectionSpeed, CstColor);
Newline(Context);
#endif
// Update state data
NextGPUMinVoxelWorldSize[0] = RoundHairVoxelSize(NextVoxelWorldSize);
}
#endif // SHADER_ADAPTIVE_FEEDBACK
///////////////////////////////////////////////////////////////////////////
// Voxel Raster Compute
#if SHADER_RASTERCOMPUTE
uint MaxRasterCount;
uint FrameIdMod8;
uint MacroGroupId;
uint VertexCount;
uint VirtualVoxelParams_PageIndexCount;
uint VirtualVoxelParams_PageResolution;
uint3 VirtualVoxelParams_PageCountResolution;
uint3 VirtualVoxelParams_PageTextureResolution;
Buffer<uint> VirtualVoxelParams_PageIndexBuffer;
StructuredBuffer<FPackedVirtualVoxelNodeDesc> VirtualVoxelParams_NodeDescBuffer;
RWTexture3D<uint> OutPageTexture;
float CoverageScale;
#define JITTER_ENABLE 0
float3 GetHairVoxelJitter(uint2 PixelCoord, uint Seed)
{
return float3(
InterleavedGradientNoise(PixelCoord.xy, Seed),
InterleavedGradientNoise(PixelCoord.xy, Seed * 117),
InterleavedGradientNoise(PixelCoord.xy, Seed * 7901));
}
[numthreads(GROUP_SIZE, 1, 1)]
void MainCS(uint2 DispatchThreadID : SV_DispatchThreadID)
{
uint VertexIndex0 = DispatchThreadID.x;
uint VertexIndex1 = VertexIndex0 + 1;
bool bIsValid = VertexIndex0 < VertexCount && VertexIndex1 < VertexCount;
if (!bIsValid)
return;
#if PERMUTATION_CULLING == 1
if (HairStrandsVF_bCullingEnable)
{
const uint VertexCountAfterCulling = HairStrandsVF_CullingIndirectBuffer[3];
uint FetchIndex0 = VertexIndex0;
uint FetchIndex1 = VertexIndex1;
bIsValid = FetchIndex0 < VertexCountAfterCulling&& FetchIndex1 < VertexCountAfterCulling;
if (!bIsValid)
{
return;
}
FetchIndex1 = min(FetchIndex0 + 1, VertexCountAfterCulling - 1);
VertexIndex0 = HairStrandsVF_CullingIndexBuffer[FetchIndex0];
VertexIndex1 = HairStrandsVF_CullingIndexBuffer[FetchIndex1];
}
#endif
const float3 PositionOffset = HairStrandsVF_GetHairInstancePositionOffset();
const FHairControlPoint CP0 = ReadHairControlPoint(
HairStrandsVF_PositionBuffer,
VertexIndex0,
PositionOffset,
HairStrandsVF_Radius,
HairStrandsVF_RootScale,
HairStrandsVF_TipScale);
if (CP0.Type == HAIR_CONTROLPOINT_END)
return;
const FHairControlPoint CP1 = ReadHairControlPoint(
HairStrandsVF_PositionBuffer,
VertexIndex1,
PositionOffset,
HairStrandsVF_Radius,
HairStrandsVF_RootScale,
HairStrandsVF_TipScale);
const FVirtualVoxelNodeDesc NodeDesc = UnpackVoxelNode(VirtualVoxelParams_NodeDescBuffer[MacroGroupId], VirtualVoxelParams_PageResolution);
const float DiameterToRadius = 0.5f;
const float3 TranslatedWP0 = mul(float4(CP0.Position, 1), HairStrandsVF_LocalToTranslatedWorldPrimitiveTransform).xyz;
const float HairCoverage0 = CP0.WorldRadius / max(CP0.WorldRadius, DiameterToRadius * NodeDesc.VoxelWorldSize);
const float3 TranslatedWP1 = mul(float4(CP1.Position, 1), HairStrandsVF_LocalToTranslatedWorldPrimitiveTransform).xyz;
const float HairCoverage1 = CP1.WorldRadius / max(CP1.WorldRadius, DiameterToRadius * NodeDesc.VoxelWorldSize);
// In order to reduce aliasing, we increase the number of steps. This makes the result more comparable to the raster pass.
const float LineStepMultiplier = 1.5f;
const float ScaledVoxelWorldSize = NodeDesc.VoxelWorldSize / LineStepMultiplier;
FVirtualVoxelCommonDesc CommonDesc;
CommonDesc.PageCountResolution = VirtualVoxelParams_PageCountResolution;
CommonDesc.PageTextureResolution = VirtualVoxelParams_PageTextureResolution;
CommonDesc.PageResolution = VirtualVoxelParams_PageResolution;
CommonDesc.PageResolutionLog2 = VirtualVoxel.PageResolutionLog2;
// Count the number of fibers which are within a cylinder defined by the voxel size,
// and the distance between the origin and the extent of the volume
// This assumes that the voxel volume is cubic (i.e. equal dimensions on all sides)
const float3 LineSegment = TranslatedWP1 - TranslatedWP0;
const float LineLength = length(LineSegment);
const float3 StepD = normalize(LineSegment) * ScaledVoxelWorldSize;
// Step according to voxel size
int3 CurrentPageIndexCoord = -1;
bool bIsPageValid = false;
uint3 PageCoord = 0;
// If we have long segment we could break them into batch (e.g. 8 voxels long), queue them, and indirect dispatch them.
// This would make the workload more uniform/coherent. Currently, breaking into smaller batch, does not seems to save a
// lot of cost
#if JITTER_ENABLE
const float3 Jitter = GetHairVoxelJitter(frac(CP0.Position.xy), FrameIdMod8, VirtualVoxel.JitterMode) * 2 - 1;
#else
const float3 Jitter = 0;
#endif
const float fMaxStep = LineLength / ScaledVoxelWorldSize;
const float MaxStep = float(min(ceil(fMaxStep), MaxRasterCount));
int3 PreviousCoord = -1;
for (float StepIt = 0.0f; StepIt < MaxStep; ++StepIt)
{
const float U = (StepIt + 0.5f) / float(MaxStep);
const float Radius = lerp(CP0.WorldRadius, CP1.WorldRadius, U);
const float3 HitP = TranslatedWP0 + StepIt * StepD + Jitter * Radius;
const int3 VolumeCoord = clamp((HitP - NodeDesc.TranslatedWorldMinAABB) / NodeDesc.VoxelWorldSize, 0, NodeDesc.VirtualResolution-1);
const int3 PageIndexCoord = VolumeCoord / CommonDesc.PageResolution;
// Update page index only when needed
const bool bHasPageIndexChanged = any(PageIndexCoord != CurrentPageIndexCoord);
if (bHasPageIndexChanged)
{
CurrentPageIndexCoord = PageIndexCoord;
const uint LinearPageIndexCoord = CoordToIndex(PageIndexCoord, NodeDesc.PageIndexResolution, NodeDesc.PageIndexOffset);
const uint PageIndex = VirtualVoxelParams_PageIndexBuffer.Load(LinearPageIndexCoord);
bIsPageValid = PageIndex != INVALID_VOXEL_PAGE_INDEX;
if (bIsPageValid)
{
PageCoord = IndexToCoord(PageIndex, CommonDesc.PageCountResolution);
}
}
if (bIsPageValid)
{
const int3 VoxelPageBase = PageCoord * CommonDesc.PageResolution;
const int3 VoxelPageOffset = VolumeCoord - PageIndexCoord * CommonDesc.PageResolution;
const int3 VoxelPageCoord = VoxelPageBase + VoxelPageOffset;
// Insure we don't write multiple time within the same voxel.
// This can happen for small hair segment, where both start & end points could be writtent into the same voxel.
const bool bRasterize = any(VoxelPageCoord != PreviousCoord);
if (bRasterize)
{
const float VoxelFixPointScale = GetVoxelDensityFixPointScale();
const float HairCoverage = lerp(HairCoverage0, HairCoverage1, U);
uint RawData = HairCoverage * VoxelFixPointScale * HairStrandsVF_Density * CoverageScale;
InterlockedAdd(OutPageTexture[VoxelPageCoord], RawData);
// Groom having raytraced geometry will cast-shadow on opaque geomtry with their RT-geometry, not with their
// voxelization. To avoid for doubling/incorrect shadowing, we mark voxel with no-shadow casting flag.
// This adds a significant cost when used.
if (HasHairFlags(HairStrandsVF_Flags, HAIR_FLAGS_RAYTRACING_GEOMETRY))
{
InterlockedOr(OutPageTexture[VoxelPageCoord], VOXEL_CAST_NO_SHADOW_MASK);
}
}
PreviousCoord = VoxelPageCoord;
}
}
}
#endif // SHADER_RASTERCOMPUTE
///////////////////////////////////////////////////////////////////////////
// Inject opaque surface into voxels
#if SHADER_INJECTOPAQUE_VIRTUALVOXEL
uint MacroGroupId;
float2 SceneDepthResolution;
uint VoxelBiasCount;
uint VoxelMarkCount;
RWTexture3D<uint> OutPageTexture;
uint VirtualVoxelParams_PageCount;
uint VirtualVoxelParams_PageIndexCount;
uint VirtualVoxelParams_PageResolution;
uint3 VirtualVoxelParams_PageCountResolution;
Buffer<uint> VirtualVoxelParams_PageIndexBuffer;
Buffer<uint> VirtualVoxelParams_AllocatedPageCountBuffer;
Buffer<uint4>VirtualVoxelParams_PageIndexCoordBuffer;
StructuredBuffer<FPackedVirtualVoxelNodeDesc> VirtualVoxelParams_NodeDescBuffer;
[numthreads(GROUP_SIZE_X, 1, GROUP_SIZE_Z)]
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
const uint LinearVoxelCoord = DispatchThreadId.x;
const uint AllocatedPageIndex = DispatchThreadId.z;
const uint VoxelCountPerPage = VirtualVoxelParams_PageResolution.x * VirtualVoxelParams_PageResolution.x * VirtualVoxelParams_PageResolution.x;
const bool bValidVoxel = AllocatedPageIndex < VirtualVoxelParams_AllocatedPageCountBuffer[0] && LinearVoxelCoord < VirtualVoxelParams_PageCount * VoxelCountPerPage;
if (!bValidVoxel)
{
return;
}
uint3 VoxelCoordOffset = IndexToCoord(LinearVoxelCoord, VirtualVoxelParams_PageResolution.xxx);
VoxelCoordOffset.y = VoxelCoordOffset.y % VirtualVoxelParams_PageResolution.x;
VoxelCoordOffset.z = VoxelCoordOffset.z % VirtualVoxelParams_PageResolution.x;
const FVirtualVoxelNodeDesc NodeDesc = UnpackVoxelNode(VirtualVoxelParams_NodeDescBuffer[MacroGroupId], VirtualVoxelParams_PageResolution);
const uint4 PageIndexCoord = VirtualVoxelParams_PageIndexCoordBuffer[NodeDesc.PageIndexOffset + AllocatedPageIndex];
const uint LinearPageIndexCoord = CoordToIndex(PageIndexCoord.xyz, NodeDesc.PageIndexResolution, NodeDesc.PageIndexOffset);
// PageIndexCoord have invalid .w component if we run out of available page during the allocation
const bool bIsValid = LinearPageIndexCoord < VirtualVoxelParams_PageIndexCount&& PageIndexCoord.w != INVALID_MACRO_GROUP_ID;
if (bIsValid)
{
const uint PageIndex = VirtualVoxelParams_PageIndexBuffer.Load(LinearPageIndexCoord);
if (PageIndex != INVALID_VOXEL_PAGE_INDEX)
{
const uint3 VoxelCoordBase = PageIndexCoord.xyz * VirtualVoxelParams_PageResolution;
const uint3 VoxelCoord = VoxelCoordBase + VoxelCoordOffset;
const float3 TranslatedWorldPosition = VoxelCoord * NodeDesc.VoxelWorldSize + NodeDesc.TranslatedWorldMinAABB;
float4 ClipPos = mul(float4(TranslatedWorldPosition, 1), PrimaryView.TranslatedWorldToClip);
ClipPos /= ClipPos.w;
const float DepthBias = VoxelBiasCount * NodeDesc.VoxelWorldSize;
const float VoxelDepth = ConvertFromDeviceZ(ClipPos.z) - DepthBias;
float2 SceneUV = float2(0.5f * (ClipPos.x + 1), 1 - 0.5f * (ClipPos.y + 1));
SceneUV = ViewportUVToBufferUV(SceneUV);
const float2 ScenePixelCoord = SceneUV * SceneDepthResolution;
const bool bIsOnScreen = SceneUV.x >= 0 && SceneUV.x < 1 && SceneUV.y >= 0 && SceneUV.y < 1;
if (!bIsOnScreen)
return;
const float ClosestDepth = ConvertFromDeviceZ(SceneDepthTexture.Load(uint3(ScenePixelCoord, 0)).x);
const float3 SceneTranslatedWorldPos = ReconstructTranslatedWorldPositionFromDepth(SceneUV, ClosestDepth);
const bool bIsInVolume =
SceneTranslatedWorldPos.x >= NodeDesc.TranslatedWorldMinAABB.x && SceneTranslatedWorldPos.x < NodeDesc.TranslatedWorldMaxAABB.x&&
SceneTranslatedWorldPos.y >= NodeDesc.TranslatedWorldMinAABB.y && SceneTranslatedWorldPos.y < NodeDesc.TranslatedWorldMaxAABB.y&&
SceneTranslatedWorldPos.z >= NodeDesc.TranslatedWorldMinAABB.z && SceneTranslatedWorldPos.z < NodeDesc.TranslatedWorldMaxAABB.z;
if (!bIsInVolume)
return;
// Inject opaque depth on a thin layer (Dist < DistThreshold) for avoiding weird projection
if (ClosestDepth < VoxelDepth && abs(ClosestDepth - VoxelDepth) < VoxelMarkCount * NodeDesc.VoxelWorldSize)
{
const uint3 VoxelPageIndexCoord = VoxelCoord / VirtualVoxelParams_PageResolution;
const uint3 VoxelIndexCoordBase = VoxelPageIndexCoord * VirtualVoxelParams_PageResolution;
const uint3 VoxelPageOffset = VoxelCoord - VoxelIndexCoordBase;
const uint3 PageCoord = IndexToCoord(PageIndex, VirtualVoxelParams_PageCountResolution);
const int3 VoxelPageBase = PageCoord * VirtualVoxelParams_PageResolution;
const int3 VoxelPageCoord = VoxelPageOffset + VoxelPageBase;
InterlockedOr(OutPageTexture[VoxelPageCoord], VOXEL_OPAQUE_ADD);
}
}
}
}
#endif // SHADER_INJECTOPAQUE_VIRTUALVOXEL
///////////////////////////////////////////////////////////////////////////
#if SHADER_DEPTH_INJECTION
float2 OutputResolution;
uint MacroGroupId;
uint AtlasSlotIndex;
float3 LightDirection;
uint bIsDirectional;
float3 TranslatedLightPosition;
StructuredBuffer<FDeepShadowViewInfo> DeepShadowViewInfoBuffer;
void MainVS(
uint VertexId : SV_VertexID,
out float4 OutPosition : SV_POSITION,
out float3 OutTranslatedWorldPosition : WORLD_POSITION)
{
const FPackedVirtualVoxelNodeDesc PackedNode = VirtualVoxel.NodeDescBuffer[MacroGroupId];
const FVirtualVoxelNodeDesc NodeDesc = UnpackVoxelNode(PackedNode, VirtualVoxel.PageResolution);
// Move this to an actual vertex/index buffer
const float3 Min = NodeDesc.TranslatedWorldMinAABB;
const float3 Max = NodeDesc.TranslatedWorldMaxAABB;
const float3 Center = (Min + Max) * 0.5f;
const float3 Extent = (Max - Min) * 0.5f;
const float3 Position0 = Center + float3(-Extent.x, -Extent.y, -Extent.z);
const float3 Position1 = Center + float3(+Extent.x, -Extent.y, -Extent.z);
const float3 Position2 = Center + float3(+Extent.x, +Extent.y, -Extent.z);
const float3 Position3 = Center + float3(-Extent.x, +Extent.y, -Extent.z);
const float3 Position4 = Center + float3(-Extent.x, -Extent.y, +Extent.z);
const float3 Position5 = Center + float3(+Extent.x, -Extent.y, +Extent.z);
const float3 Position6 = Center + float3(+Extent.x, +Extent.y, +Extent.z);
const float3 Position7 = Center + float3(-Extent.x, +Extent.y, +Extent.z);
float3 TranslatedWorldPosition = 0;
switch (VertexId)
{
case 0: TranslatedWorldPosition = Position0; break;
case 1: TranslatedWorldPosition = Position1; break;
case 2: TranslatedWorldPosition = Position2; break;
case 3: TranslatedWorldPosition = Position0; break;
case 4: TranslatedWorldPosition = Position2; break;
case 5: TranslatedWorldPosition = Position3; break;
case 6: TranslatedWorldPosition = Position4; break;
case 7: TranslatedWorldPosition = Position5; break;
case 8: TranslatedWorldPosition = Position6; break;
case 9: TranslatedWorldPosition = Position4; break;
case 10: TranslatedWorldPosition = Position6; break;
case 11: TranslatedWorldPosition = Position7; break;
case 12: TranslatedWorldPosition = Position0; break;
case 13: TranslatedWorldPosition = Position1; break;
case 14: TranslatedWorldPosition = Position5; break;
case 15: TranslatedWorldPosition = Position0; break;
case 16: TranslatedWorldPosition = Position5; break;
case 17: TranslatedWorldPosition = Position4; break;
case 18: TranslatedWorldPosition = Position2; break;
case 19: TranslatedWorldPosition = Position3; break;
case 20: TranslatedWorldPosition = Position7; break;
case 21: TranslatedWorldPosition = Position2; break;
case 22: TranslatedWorldPosition = Position7; break;
case 23: TranslatedWorldPosition = Position6; break;
case 24: TranslatedWorldPosition = Position1; break;
case 25: TranslatedWorldPosition = Position2; break;
case 26: TranslatedWorldPosition = Position6; break;
case 27: TranslatedWorldPosition = Position1; break;
case 28: TranslatedWorldPosition = Position6; break;
case 29: TranslatedWorldPosition = Position5; break;
case 30: TranslatedWorldPosition = Position3; break;
case 31: TranslatedWorldPosition = Position0; break;
case 32: TranslatedWorldPosition = Position4; break;
case 33: TranslatedWorldPosition = Position3; break;
case 34: TranslatedWorldPosition = Position4; break;
case 35: TranslatedWorldPosition = Position7; break;
}
const FDeepShadowViewInfo DeepShadowViewInfo = DeepShadowViewInfoBuffer[AtlasSlotIndex];
const float4x4 TranslatedWorldToClipMatrix = DeepShadowViewInfo.TranslatedWorldToClipScaledBiased;
OutTranslatedWorldPosition = TranslatedWorldPosition;
OutPosition = mul(float4(TranslatedWorldPosition, 1), TranslatedWorldToClipMatrix);
}
//#define VOXEL_TRAVERSAL_TYPE VOXEL_TRAVERSAL_LINEAR_MIPMAP
#define VOXEL_TRAVERSAL_TYPE VOXEL_TRAVERSAL_LINEAR
#include "HairStrandsVoxelPageTraversal.ush"
void MainPS(
in float4 InPosition : SV_POSITION,
in float3 InTranslatedWorldPosition : WORLD_POSITION,
out float OutDepth : SV_DEPTH)
{
OutDepth = 0;
const float2 PixelCoord = InPosition.xy;
const float2 UV = PixelCoord / float2(OutputResolution); // todo view rect offset
const float DistanceThreshold = 1000;
const bool bDebugEnabled = false;
const float3 SampleRandom = GetHairVoxelJitter(PixelCoord, View.StateFrameIndexMod8, VirtualVoxel.JitterMode);
const float3 TracingDirection = bIsDirectional ? LightDirection : normalize(InTranslatedWorldPosition - TranslatedLightPosition);
const float3 TranslatedWP0 = InTranslatedWorldPosition;
const float3 TranslatedWP1 = InTranslatedWorldPosition + TracingDirection * DistanceThreshold;
FVirtualVoxelCommonDesc CommonDesc;
CommonDesc.PageCountResolution = VirtualVoxel.PageCountResolution;
CommonDesc.PageTextureResolution = VirtualVoxel.PageTextureResolution;
CommonDesc.PageResolution = VirtualVoxel.PageResolution;
CommonDesc.PageResolutionLog2 = VirtualVoxel.PageResolutionLog2;
const FPackedVirtualVoxelNodeDesc PackedNode = VirtualVoxel.NodeDescBuffer[MacroGroupId];
const FVirtualVoxelNodeDesc NodeDesc = UnpackVoxelNode(PackedNode, VirtualVoxel.PageResolution);
FHairTraversalSettings TraversalSettings = InitHairTraversalSettings();
TraversalSettings.DensityScale = VirtualVoxel.DensityScale;
TraversalSettings.CountThreshold = 0.9f; // GetOpaqueVoxelValue();
TraversalSettings.DistanceThreshold = DistanceThreshold;
TraversalSettings.bDebugEnabled = bDebugEnabled;
TraversalSettings.SteppingScale = VirtualVoxel.SteppingScale_Shadow;
TraversalSettings.Random = SampleRandom;
TraversalSettings.TanConeAngle = 0;
TraversalSettings.bIsPrimaryRay = true;
TraversalSettings.bUseOpaqueVisibility = true;
TraversalSettings.PixelRadius = -1;
TraversalSettings.ForcedMip = -1;
const FHairTraversalResult TraversalResult = ComputeHairCountVirtualVoxel(
TranslatedWP0,
TranslatedWP1,
CommonDesc,
NodeDesc,
VirtualVoxel.PageIndexBuffer,
VirtualVoxel.PageTexture,
TraversalSettings);
bool bIsValid = TraversalResult.HairCount > 0;
if (bIsValid)
{
const FDeepShadowViewInfo DeepShadowViewInfo = DeepShadowViewInfoBuffer[AtlasSlotIndex];
const float4x4 TranslatedWorldToClipMatrix = DeepShadowViewInfo.TranslatedWorldToClipScaledBiased;
const float3 HitP = TranslatedWP0 + normalize(TranslatedWP1 - TranslatedWP0) * TraversalResult.HitT;
float4 ClipP = mul(float4(HitP, 1), TranslatedWorldToClipMatrix);
OutDepth = ClipP.z /= ClipP.w;
}
else
{
discard;
}
}
#endif //SHADER_DEPTH_INJECTION
///////////////////////////////////////////////////////////////////////////
// Common function for mipmapping voxels
#if SHADER_MIP_VIRTUALVOXEL || SHADER_MIP_INDIRECTARGS
uint ComputeMipDensity(
const uint RawDensity0,
const uint RawDensity1,
const uint RawDensity2,
const uint RawDensity3,
const uint RawDensity4,
const uint RawDensity5,
const uint RawDensity6,
const uint RawDensity7)
{
const float TotalOpaque =
((RawDensity0 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity1 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity2 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity3 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity4 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity5 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity6 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT) +
((RawDensity7 & VOXEL_OPAQUE_MASK) >> VOXEL_OPAQUE_SHIFT);
const uint OutTotalOpaque = uint(clamp(TotalOpaque / 8.f, TotalOpaque > 0 ? 1 : 0, 0x7F)) << VOXEL_OPAQUE_SHIFT;
// Propagate no shadow casting flag, only if other valid/non-empty voxels are also no-shadow caster
const bool bHasShadowCaster =
((RawDensity0 & VOXEL_HAIR_MASK) > 0 && (RawDensity0 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity1 & VOXEL_HAIR_MASK) > 0 && (RawDensity1 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity2 & VOXEL_HAIR_MASK) > 0 && (RawDensity2 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity3 & VOXEL_HAIR_MASK) > 0 && (RawDensity3 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity4 & VOXEL_HAIR_MASK) > 0 && (RawDensity4 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity5 & VOXEL_HAIR_MASK) > 0 && (RawDensity5 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity6 & VOXEL_HAIR_MASK) > 0 && (RawDensity6 & VOXEL_CAST_NO_SHADOW_MASK) == 0) ||
((RawDensity7 & VOXEL_HAIR_MASK) > 0 && (RawDensity7 & VOXEL_CAST_NO_SHADOW_MASK) == 0);
uint TotalHair =
(RawDensity0 & VOXEL_HAIR_MASK) +
(RawDensity1 & VOXEL_HAIR_MASK) +
(RawDensity2 & VOXEL_HAIR_MASK) +
(RawDensity3 & VOXEL_HAIR_MASK) +
(RawDensity4 & VOXEL_HAIR_MASK) +
(RawDensity5 & VOXEL_HAIR_MASK) +
(RawDensity6 & VOXEL_HAIR_MASK) +
(RawDensity7 & VOXEL_HAIR_MASK);
const bool bHasData = TotalHair > 0;
TotalHair /= 8;
// Insure that if a voxel contains some hair data, its total hair remains > 0 after averaging.
// This is important for pruning invalid/empty page later on, to not remove non-empty page (which
// could arise due to numerical precision).
return min(uint(VOXEL_HAIR_MASK), bHasData ? max(TotalHair, 1u) : 0u) | OutTotalOpaque | (bHasShadowCaster ? 0u : VOXEL_CAST_NO_SHADOW_MASK);
}
#endif // SHADER_MIP_VIRTUALVOXEL || SHADER_MIP_INDIRECTARGS
///////////////////////////////////////////////////////////////////////////
#if SHADER_MIP_VIRTUALVOXEL
#include "../MortonCode.ush"
uint bPatchEmptyPage;
int3 PageCountResolution;
uint PageResolution;
uint SourceMip;
uint TargetMip;
Buffer<uint> AllocatedPageCountBuffer;
Texture3D<uint> InDensityTexture;
RWTexture3D<uint> OutDensityTexture;
uint MortonEncode3(uint3 Voxel)
{
return MortonCode3(Voxel.x) | MortonCode3(Voxel.y) << 1 | MortonCode3(Voxel.z) << 2;
}
uint3 MortonDecode3(uint Morton)
{
uint3 Voxel = uint3(ReverseMortonCode3(Morton), ReverseMortonCode3(Morton >> 1), ReverseMortonCode3(Morton >> 2));
return Voxel;
}
#if PERMUTATION_MIP_AGGREGATE
Buffer<uint> PageToPageIndexBuffer;
RWBuffer<uint> OutPageIndexBuffer;
RWTexture3D<uint> OutDensityTexture2;
RWTexture3D<uint> OutDensityTexture1;
groupshared uint g_Density4[64][GROUP_SIZE_Z];
groupshared uint g_Density2[8][GROUP_SIZE_Z];
#endif
// GroupSize is [64u,1u,16u]
[numthreads(GROUP_SIZE_X, 1, GROUP_SIZE_Z)]
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
{
const uint PageIndex = DispatchThreadId.z;
const uint3 PageCoord = IndexToCoord(PageIndex, PageCountResolution);
const bool bValid = PageIndex < AllocatedPageCountBuffer[0];
// Dummy initialization for pleasing compiler
#if PERMUTATION_MIP_AGGREGATE && COMPILER_FXC
if (GroupThreadId.x < 8 && GroupThreadId.z < GROUP_SIZE_Z)
{
g_Density2[GroupThreadId.x][GroupThreadId.z] = 0;
}
#endif
uint Total = 0;
if (bValid)
{
const uint InPageResolution = PageResolution >> SourceMip;
const uint OutPageResolution = PageResolution >> TargetMip;
const uint TotalVoxelPerOutPageResolution = OutPageResolution * OutPageResolution * OutPageResolution;
if (DispatchThreadId.x < TotalVoxelPerOutPageResolution)
{
const uint VoxelIndex = DispatchThreadId.x;
const uint3 OutVoxelCoordOffset = MortonDecode3(VoxelIndex);
const uint3 InVoxelCoordOffset = OutVoxelCoordOffset << 1;
const uint3 OutVoxelCoord = PageCoord * OutPageResolution + OutVoxelCoordOffset;
const uint3 InVoxelCoord = PageCoord * InPageResolution + InVoxelCoordOffset;
const uint3 InVoxelCoord0 = InVoxelCoord;
const uint3 InVoxelCoord1 = InVoxelCoord0 + uint3(1, 0, 0);
const uint3 InVoxelCoord2 = InVoxelCoord0 + uint3(0, 1, 0);
const uint3 InVoxelCoord3 = InVoxelCoord0 + uint3(1, 1, 0);
const uint3 InVoxelCoord4 = InVoxelCoord0 + uint3(0, 0, 1);
const uint3 InVoxelCoord5 = InVoxelCoord0 + uint3(1, 0, 1);
const uint3 InVoxelCoord6 = InVoxelCoord0 + uint3(0, 1, 1);
const uint3 InVoxelCoord7 = InVoxelCoord0 + uint3(1, 1, 1);
const uint RawDensity0 = InDensityTexture[InVoxelCoord0];
const uint RawDensity1 = InDensityTexture[InVoxelCoord1];
const uint RawDensity2 = InDensityTexture[InVoxelCoord2];
const uint RawDensity3 = InDensityTexture[InVoxelCoord3];
const uint RawDensity4 = InDensityTexture[InVoxelCoord4];
const uint RawDensity5 = InDensityTexture[InVoxelCoord5];
const uint RawDensity6 = InDensityTexture[InVoxelCoord6];
const uint RawDensity7 = InDensityTexture[InVoxelCoord7];
Total = ComputeMipDensity(
RawDensity0,
RawDensity1,
RawDensity2,
RawDensity3,
RawDensity4,
RawDensity5,
RawDensity6,
RawDensity7);
OutDensityTexture[OutVoxelCoord] = Total;
#if PERMUTATION_MIP_AGGREGATE
// Store 4x4x4 values
if (GroupThreadId.x < 64)
{
uint StoreIndex = MortonEncode3(OutVoxelCoordOffset);
g_Density4[StoreIndex][GroupThreadId.z] = Total;
}
#endif
}
}
#if PERMUTATION_MIP_AGGREGATE
GroupMemoryBarrierWithGroupSync();
// Target Page Res: 2x2x2
if (bValid && GroupThreadId.x < 8)
{
const uint Total4 = ComputeMipDensity(
g_Density4[GroupThreadId.x*8+0][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+1][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+2][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+3][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+4][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+5][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+6][GroupThreadId.z],
g_Density4[GroupThreadId.x*8+7][GroupThreadId.z]);
const uint3 StoreVoxel = MortonDecode3(GroupThreadId.x*8)>>1;
const uint StoreIndex = MortonEncode3(StoreVoxel); // GroupThreadId.x >> 3;
g_Density2[StoreIndex][GroupThreadId.z] = Total4;
const uint OutPageResolution = 2;
const uint3 OutVoxelCoord = PageCoord * OutPageResolution + StoreVoxel;
OutDensityTexture2[OutVoxelCoord] = Total4;
}
GroupMemoryBarrierWithGroupSync();
// Target Page Res: 1x1x1
if (bValid && GroupThreadId.x < 1)
{
const uint Total2 = ComputeMipDensity(
g_Density2[0][GroupThreadId.z],
g_Density2[1][GroupThreadId.z],
g_Density2[2][GroupThreadId.z],
g_Density2[3][GroupThreadId.z],
g_Density2[4][GroupThreadId.z],
g_Density2[5][GroupThreadId.z],
g_Density2[6][GroupThreadId.z],
g_Density2[7][GroupThreadId.z]);
const uint OutPageResolution = 1;
const uint3 OutVoxelCoord = PageCoord * OutPageResolution + 0;
OutDensityTexture1[OutVoxelCoord] = Total2;
// Update the page index with invalid page index if the voxel does not contain any data.
// This allow to save tracing cost when evaluating the transmittance.
const bool bIsEmpty =(Total2 & VOXEL_HAIR_MASK) == 0;
if (bPatchEmptyPage > 0 && bIsEmpty)
{
const uint PageIndexOffset = PageToPageIndexBuffer[PageIndex];
OutPageIndexBuffer[PageIndexOffset] = INVALID_VOXEL_PAGE_INDEX;
}
}
#endif
}
#endif
///////////////////////////////////////////////////////////////////////////
#if SHADER_MIP_INDIRECTARGS
#include "HairStrandsVoxelPageCommon.ush"
uint PageResolution;
uint TargetMipIndex;
int3 DispatchGroupSize;
Buffer<uint> InIndirectArgs;
RWBuffer<uint> OutIndirectArgs;
[numthreads(1, 1, 1)]
void MainCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
const uint MacroGroupId = DispatchThreadId.x;
const uint TargetPageResolution = PageResolution >> TargetMipIndex;
const uint TotalVoxelCount = TargetPageResolution * TargetPageResolution * TargetPageResolution;
const uint DispatchX = DivideAndRoundUp(TotalVoxelCount, DispatchGroupSize.x);
WriteDispatchIndirectArgs(OutIndirectArgs, 0, DispatchX, InIndirectArgs[1], InIndirectArgs[2]);
}
#endif
///////////////////////////////////////////////////////////////////////////