Files
UnrealEngine/Engine/Shaders/Private/HairStrands/HairStrandsRaytracingGeometry.usf
2025-05-18 13:04:45 +08:00

410 lines
12 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "../Common.ush"
#include "HairStrandsClusterCommon.ush"
#include "HairStrandsVertexFactoryCommon.ush"
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_RT_GEOMETRY
uint PointCount;
float HairStrandsVF_HairRadius;
float HairStrandsVF_HairRootScale;
float HairStrandsVF_HairTipScale;
uint HairStrandsVF_RegisteredIndex;
StructuredBuffer<float4> PositionOffsetBuffer;
ByteAddressBuffer PositionBuffer;
Buffer<float4> TangentBuffer;
RWBuffer<float4> OutputPositionBuffer;
#if PERMUTATION_PROCEDURAL_PRIMITIVE
uint RaytracingProceduralSplits;
#else
RWStructuredBuffer<uint> OutputIndexBuffer;
#endif
uint HairStrandsVF_bCullingEnable;
#if PERMUTATION_CULLING == 1
Buffer<uint> HairStrandsVF_CullingIndirectBuffer;
Buffer<uint> HairStrandsVF_CullingIndexBuffer;
Buffer<uint> HairStrandsVF_CullingIndirectBufferArgs;
#endif
FHairControlPoint GetControlPoint(uint ControlPointId)
{
FHairControlPoint Out = (FHairControlPoint)0;
if (ControlPointId < PointCount)
{
Out = ReadHairControlPoint(
PositionBuffer,
ControlPointId,
ReadRenPositionOffset(PositionOffsetBuffer, HairStrandsVF_RegisteredIndex),
HairStrandsVF_HairRadius,
HairStrandsVF_HairRootScale,
HairStrandsVF_HairTipScale);
}
return Out;
}
float3x3 GetTangentBasis(uint ControlPointId)
{
float3 Tx = TangentBuffer[2 * ControlPointId ].xyz;
float3 Tz = TangentBuffer[2 * ControlPointId + 1].xyz;
float3 Ty = cross(Tz, Tx);
return float3x3(Tx, Ty, Tz);
}
#if PERMUTATION_PROCEDURAL_PRIMITIVE
void SetAABB(uint InVertexIndex, float3 Lo, float3 Hi)
{
OutputPositionBuffer[2 * InVertexIndex + 0] = float4(Lo.x, Lo.y, Lo.z, Hi.x);
OutputPositionBuffer[2 * InVertexIndex + 1] = float4(Hi.y, Hi.z, 0.0, 0.0);
}
#endif
// a whole segment of invalid values
void SetInvalidSegment(uint InVertexIndex)
{
#if PERMUTATION_PROCEDURAL_PRIMITIVE
const float NaN = asfloat(0x7FC00000);
for (uint i = 0; i < RaytracingProceduralSplits; i++)
SetAABB(RaytracingProceduralSplits * InVertexIndex + i, NaN, NaN);
#else
const uint BasePoint = InVertexIndex * 4;
const uint TrisPerSegment = 8;
const uint IdxsPerSegment = TrisPerSegment * 3;
const uint BaseOutIndex = InVertexIndex * IdxsPerSegment;
UNROLL
for (uint Index = 0; Index < IdxsPerSegment; Index++)
{
OutputIndexBuffer[BaseOutIndex + Index] = BasePoint;
}
#endif
}
// TODO: this is not used at the moment
void SetCapSegment(uint InVertexIndex)
{
#if 1
SetInvalidSegment(InVertexIndex);
#else
const uint BasePoint = InVertexIndex * 4;
const uint TrisPerSegment = 8;
const uint IdxsPerSegment = TrisPerSegment * 3;
const uint BaseOutIndex = InVertexIndex * IdxsPerSegment;
OutputIndexBuffer[BaseOutIndex + 0] = BasePoint;
OutputIndexBuffer[BaseOutIndex + 1] = BasePoint + 1;
OutputIndexBuffer[BaseOutIndex + 2] = BasePoint + 2;
OutputIndexBuffer[BaseOutIndex + 3] = BasePoint;
OutputIndexBuffer[BaseOutIndex + 4] = BasePoint + 2;
OutputIndexBuffer[BaseOutIndex + 5] = BasePoint + 3;
// fill the reset with degenerate triangles
UNROLL
for (uint Index = 6; Index < IdxsPerSegment; Index++)
{
OutputIndexBuffer[BaseOutIndex + Index] = BasePoint;
}
#endif
}
#if !PERMUTATION_PROCEDURAL_PRIMITIVE
void SetBodySegment(uint InVertexIndex)
{
const uint BotPoint = InVertexIndex * 4;
const uint TopPoint = BotPoint + 4;
const uint TrisPerSegment = 8;
const uint IdxsPerSegment = TrisPerSegment * 3;
const uint BaseOutIndex = InVertexIndex * IdxsPerSegment;
UNROLL
for (uint PrevIdx = 3, Idx = 0, OutIndex = BaseOutIndex; Idx < 4; PrevIdx = Idx, Idx++, OutIndex += 6)
{
// Note that this vertex ordering is carefully chosen to work with the Vertex Factory in RT mode
// In particular, we _must_ put one of the bottom point as the last vertex of the triangle
// so that the interpolation step will copy the right HairControlPointId onwards
OutputIndexBuffer[OutIndex + 0] = TopPoint + PrevIdx;
OutputIndexBuffer[OutIndex + 1] = TopPoint + Idx;
OutputIndexBuffer[OutIndex + 2] = BotPoint + PrevIdx;
OutputIndexBuffer[OutIndex + 3] = TopPoint + Idx;
OutputIndexBuffer[OutIndex + 4] = BotPoint + Idx;
OutputIndexBuffer[OutIndex + 5] = BotPoint + PrevIdx;
}
}
#endif
float GetAABBHalfAreaForSegment(float4 A, float4 B)
{
float3 BoxLo = min(A.xyz - A.w, B.xyz - B.w);
float3 BoxHi = max(A.xyz + A.w, B.xyz + B.w);
float3 BoxDiag = BoxHi - BoxLo;
return dot(BoxDiag.xyz, BoxDiag.yzx);
}
bool IsValidPosition(FHairControlPoint P)
{
return all(!isinf(P.Position)) && all(!isnan(P.Position)) && all(P.Position <= INFINITE_FLOAT);
}
[numthreads(GROUP_SIZE, 1, 1)]
void MainCS(uint DispatchThreadId : SV_DispatchThreadID)
{
uint VertexIndex0 = DispatchThreadId;
uint VertexIndex1 = VertexIndex0 + 1;
const bool bIsValid = VertexIndex0 < PointCount && VertexIndex1 < PointCount;
if (!bIsValid)
{
SetInvalidSegment(VertexIndex0);
return;
}
uint SourceIndex0 = VertexIndex0;
uint SourceIndex1 = VertexIndex1;
#if PERMUTATION_CULLING == 1
if (HairStrandsVF_bCullingEnable)
{
const uint VertexCountAfterCulling = HairStrandsVF_CullingIndirectBuffer[3];
if (VertexIndex1 >= VertexCountAfterCulling)
{
SetInvalidSegment(VertexIndex0);
return;
}
SourceIndex0 = HairStrandsVF_CullingIndexBuffer[VertexIndex0];
SourceIndex1 = HairStrandsVF_CullingIndexBuffer[VertexIndex1];
}
#endif
FHairControlPoint P0 = GetControlPoint(SourceIndex0);
#if PERMUTATION_PROCEDURAL_PRIMITIVE
if (P0.Type == HAIR_CONTROLPOINT_END || !IsValidPosition(P0))
{
// this vertex doesn't map to a valid segment
SetInvalidSegment(VertexIndex0);
}
else
{
// fetch the other point and create the aabb around the line
FHairControlPoint P1 = GetControlPoint(SourceIndex1);
if (!IsValidPosition(P1))
{
SetInvalidSegment(VertexIndex1);
}
else
{
float4 A = float4(P0.Position, P0.WorldRadius);
float4 B = float4(P1.Position, P1.WorldRadius);
if ((A.w == 0 && B.w == 0) || length2(B.xyz - A.xyz) == 0 || any(!IsFinite(A)) || any(!IsFinite(B)))
{
// this segment is degenerate, don't bother adding it
SetInvalidSegment(VertexIndex0);
}
else
{
// measure aabb surface area prior to splitting
float FullArea = GetAABBHalfAreaForSegment(A, B);
float4 SplitA = A;
float SumArea = 0.0;
for (uint i = 0; i < RaytracingProceduralSplits; i++)
{
float4 SplitB = lerp(A, B, float(i + 1) / float(RaytracingProceduralSplits));
float3 Lo = min(SplitA.xyz - SplitA.w, SplitB.xyz - SplitB.w);
float3 Hi = max(SplitA.xyz + SplitA.w, SplitB.xyz + SplitB.w);
SumArea += GetAABBHalfAreaForSegment(SplitA, SplitB);
SetAABB(RaytracingProceduralSplits * VertexIndex0 + i, Lo, Hi);
SplitA = SplitB;
}
#if 1
if (FullArea <= 2 * SumArea)
{
// not worth splitting? use full bbox and make other splits degenerate
float3 Lo = min(A.xyz - A.w, B.xyz - B.w);
float3 Hi = max(A.xyz + A.w, B.xyz + B.w);
SetAABB(RaytracingProceduralSplits * VertexIndex0 + 0, Lo, Hi);
for (uint i = 1; i < RaytracingProceduralSplits; i++)
{
const float NaN = asfloat(0x7FC00000);
SetAABB(RaytracingProceduralSplits * VertexIndex0 + i, NaN, NaN);
}
}
#endif
}
}
}
#else
// expand current vertex into 4 points around the curve
const float3x3 TangentBasis = GetTangentBasis(SourceIndex0);
const float3 N = TangentBasis[0];
const float3 B = TangentBasis[1];
const uint BaseOutIndex = VertexIndex0 * 4;
{
const float3 Px0 = P0.Position - N * P0.WorldRadius;
const float3 Px1 = P0.Position + N * P0.WorldRadius;
const float3 Py0 = P0.Position - B * P0.WorldRadius;
const float3 Py1 = P0.Position + B * P0.WorldRadius;
OutputPositionBuffer[BaseOutIndex ] = float4(Px0, 1);
OutputPositionBuffer[BaseOutIndex + 1] = float4(Py0, 1);
OutputPositionBuffer[BaseOutIndex + 2] = float4(Px1, 1);
OutputPositionBuffer[BaseOutIndex + 3] = float4(Py1, 1);
}
if (P0.Type == HAIR_CONTROLPOINT_END || !IsValidPosition(P0))
{
// last vertex of a curve, we could create a cap, but this would complicate the logic in the vertex factory
// ignore this for now on the assumption that most hairs will have a small tip width
SetInvalidSegment(VertexIndex0);
}
else
{
// regular point, build the body of the cylinder
SetBodySegment(VertexIndex0);
}
#endif
}
#endif // SHADER_RT_GEOMETRY
///////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_POSITION_CHANGED
#include "../ShaderPrint.ush"
#include "HairStrandsAABBCommon.ush"
#if GROUP_SIZE != 1024
#error GROUP_SIZE has changed, please update the reduction code.
#endif
uint InstanceResgisteredIndex;
uint PointCount;
uint bDrawInvalidElement;
float PositionThreshold2;
uint HairStrandsVF_bCullingEnable;
Buffer<uint> HairStrandsVF_CullingIndirectBuffer;
Buffer<uint> HairStrandsVF_CullingIndexBuffer;
ByteAddressBuffer CurrPositionBuffer;
ByteAddressBuffer PrevPositionBuffer;
Buffer<int> GroupAABBBuffer;
RWBuffer<uint> InvalidationBuffer;
RWBuffer<uint> InvalidationPrintCounter;
groupshared uint s_InvalidMask[GROUP_SIZE];
[numthreads(GROUP_SIZE, 1, 1)]
void MainCS(uint2 DispatchThreadId : SV_DispatchThreadID, uint LinearIndex : SV_GroupIndex)
{
// Early out: avoid full comparison if previous groom have already invalidated the frame
const bool bIsAlreadyInvalid = InvalidationBuffer[0] > 0;
s_InvalidMask[LinearIndex] = 0;
if (!bIsAlreadyInvalid)
{
// Fetch vertex index if culling is enabled
uint VertexIndex0 = DispatchThreadId.x;
bool bIsValid = VertexIndex0 < PointCount;
if (bIsValid && HairStrandsVF_bCullingEnable)
{
const uint VertexCountAfterCulling = HairStrandsVF_CullingIndirectBuffer[3];
const uint FetchIndex0 = DispatchThreadId.x;
bIsValid = FetchIndex0 < VertexCountAfterCulling;
if (bIsValid)
{
VertexIndex0 = HairStrandsVF_CullingIndexBuffer[FetchIndex0];
}
}
// Position comparison
if (bIsValid)
{
const FHairControlPoint Curr = ReadHairControlPoint(CurrPositionBuffer, VertexIndex0, float3(0, 0, 0), 1, 1, 1);
const FHairControlPoint Prev = ReadHairControlPoint(PrevPositionBuffer, VertexIndex0, float3(0, 0, 0), 1, 1, 1);
const float3 Diff = Curr.Position - Prev.Position;
const bool bIsInvalid = dot(Diff, Diff) > PositionThreshold2;
// Draw red bounding box around the groom having some changed position
if (bIsInvalid && bDrawInvalidElement)
{
uint Offset = 0;
InterlockedAdd(InvalidationPrintCounter[0], 1, Offset);
if (Offset < 2)
{
const FHairAABB Bound = ReadHairAABB(InstanceResgisteredIndex, GroupAABBBuffer);
AddAABBTWS(Bound.Min, Bound.Max, ColorRed);
}
}
s_InvalidMask[LinearIndex] = bIsInvalid ? 1u : 0u;
}
}
// Reduction
if (LinearIndex < 512)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 512];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 256)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 256];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 128)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 128];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 64)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 64];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 32];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 16];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 8)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 8];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 4];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
s_InvalidMask[LinearIndex] = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 2];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 1)
{
const uint InvalidMask = s_InvalidMask[LinearIndex] | s_InvalidMask[LinearIndex + 1];
if (InvalidMask > 0)
{
InterlockedOr(InvalidationBuffer[0], 1u);
}
}
}
#endif // SHADER_POSITION_CHANGED