Files
UnrealEngine/Engine/Shaders/Private/GpuSkinCacheComputeShader.usf
2025-05-18 13:04:45 +08:00

395 lines
15 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
GpuSkinCacheComputeShader.usf: Perform vertex skinning into a buffer to avoid skinning in the vertex shader.
=============================================================================*/
#include "Common.ush"
#include "Definitions.usf"
#include "SceneData.ush"
#include "VertexFactoryCommon.ush"
#include "GpuSkinCommon.ush"
#include "DynamicMeshBounds.ush"
#include "WaveOpUtil.ush"
// 0/1 setup by CPU
// #define GPUSKIN_APEX_CLOTH
// 0/1 setup by CPU
// #define GPUSKIN_MORPH_BLEND
// Set to 1 to enable passing through values from vertex buffer (ie do not skin)
#define GPUSKIN_DEBUG_PASSTHROUGH 0
#define FBoneMatrix float3x4
STRONG_TYPE Buffer<float4> BoneMatrices;
#if GPUSKIN_MORPH_BLEND
// float3 DeltaPosition, PackedNormal, see FMorphGPUSkinVertex
STRONG_TYPE Buffer<float> MorphBuffer;
// data offset to start indexing the data in MorphBuffer, in float units
uint MorphBufferOffset;
void GetMorphBuffer(uint VertexIndex, out float3 DeltaPosition, out half3 DeltaTangentZ)
{
// see CPU code: "check(MorphStride == sizeof(float) * 6);"
// in floats
// 3 + 3 floats because of the format defined in FMorphGPUSkinVertex
uint Offset = VertexIndex * (3 + 3);
DeltaPosition = float3(
MorphBuffer[Offset + 0],
MorphBuffer[Offset + 1],
MorphBuffer[Offset + 2]);
DeltaTangentZ = half3(
MorphBuffer[Offset + 3],
MorphBuffer[Offset + 4],
MorphBuffer[Offset + 5]);
}
#endif
#if GPUSKIN_APEX_CLOTH
// This must match NUM_INFLUENCES_PER_VERTEX in ClothingMeshUtils.cpp and GpuSkinVertexFactory.ush
// It represents the maximum number of influences so perhaps should be renamed. Right now we are keeping the name matching for easy search.
// TODO: Make this easier to change in without messing things up
#define NUM_INFLUENCES_PER_VERTEX 5
uint ClothNumInfluencesPerVertex;
struct FVertexTriangleInfluence
{
float4 PositionBaryCoordsAndDist;
float4 NormalBaryCoordsAndDist;
float4 TangentBaryCoordsAndDist;
uint4 SourceMeshVertIndices;
float Weight;
};
// In ClothBuffer:
#define NUM_FLOAT4S_PER_VERTEX_INFLUENCE 4;
Buffer<float4> ClothBuffer;
Buffer<float2> ClothPositionsAndNormalsBuffer;
uint ClothBufferOffset;
float ClothBlendWeight;
/** Transform from cloth space (relative to cloth root bone) to local/component space */
float4x4 ClothToLocal;
/** Scale of the owner actor */
float3 WorldScale;
void GetClothBuffer(uint VertexIndex, uint ClothIndexOffset, out FVertexTriangleInfluence Influences[NUM_INFLUENCES_PER_VERTEX])
{
const uint VertexOffset = ClothNumInfluencesPerVertex * VertexIndex + ClothIndexOffset;
UNROLL_N(NUM_INFLUENCES_PER_VERTEX)
for (int i = 0; i < ClothNumInfluencesPerVertex; ++i )
{
const uint Offset = (VertexOffset + i) * NUM_FLOAT4S_PER_VERTEX_INFLUENCE;
Influences[i].PositionBaryCoordsAndDist = ClothBuffer[Offset];
Influences[i].NormalBaryCoordsAndDist = ClothBuffer[Offset + 1];
Influences[i].TangentBaryCoordsAndDist = ClothBuffer[Offset + 2];
uint4 PackedIndices = asuint(ClothBuffer[Offset + 3]);
Influences[i].SourceMeshVertIndices.yw = (PackedIndices.xy >> 16) & 0xffff;
Influences[i].SourceMeshVertIndices.xz = PackedIndices.xy & 0xffff;
Influences[i].Weight = asfloat(PackedIndices[2]);
}
}
#endif
struct FVertexUnpacked
{
half4 TangentX;
half4 TangentZ;
#if !GPUSKIN_UNLIMITED_BONE_INFLUENCE
FGPUSkinIndexAndWeight IndexAndWeights;
#endif // GPUSKIN_UNLIMITED_BONE_INFLUENCE
#if GPUSKIN_MORPH_BLEND
// morph target, added to the Position
float3 DeltaPosition;
// morph target, added to the TangentZ and then used to derive new TangentX and TangentY, -2..2
half3 DeltaTangentZ;
#endif
#if GPUSKIN_APEX_CLOTH
FVertexTriangleInfluence Influences[NUM_INFLUENCES_PER_VERTEX];
#endif
};
struct FVertexFactoryIntermediates
{
// Blend Matrix (used for position/tangents)
FBoneMatrix BlendMatrix;
// Unpacked position (includes DeltaPosition if GPUSKIN_MORPH_BLEND)
float3 UnpackedPosition;
// Tangent Basis (includes DeltaTangentZ if GPUSKIN_MORPH_BLEND)
half3x3 TangentToLocal;
#if GPUSKIN_APEX_CLOTH
float3 SimulatedPosition;
#endif
};
#if GPUSKIN_UNLIMITED_BONE_INFLUENCE
// Bits 0-7 => Size of the bone weight index in bytes / bits 8-15 => Size of the bone weight weights value in bytes
uint InputWeightIndexSize;
Buffer<uint> InputWeightLookupStream;
#endif
Buffer<uint> InputWeightStream;
Buffer<SNORM float4> TangentInputBuffer;
RWBuffer<TANGENT_RWBUFFER_FORMAT> TangentBufferUAV;
Buffer<float> PositionInputBuffer;
RWBuffer<float> PositionBufferUAV;
uint InputStreamStart;
uint SkinCacheStart;
uint NumVertices;
uint InputWeightStart;
uint InputWeightStride;
#if ENABLE_DYNAMIC_MESH_BOUNDS
RWStructuredBuffer<int4> OutBoundsBufferUAV;
int DynamicBoundsOffset;
#endif
FVertexUnpacked UnpackedVertex(uint MorphIndex, uint ClothIndex, uint ClothIndexOffset, uint VertexIndex, uint WeightOffset)
{
FVertexUnpacked Unpacked;
Unpacked.TangentX = TangentBias_SkinCache(TangentInputBuffer[2 * VertexIndex + GPUSKIN_VB_OFFSET_TANGENT_X]);
Unpacked.TangentZ = TangentBias_SkinCache(TangentInputBuffer[2 * VertexIndex + GPUSKIN_VB_OFFSET_TANGENT_Z]);
#if !GPUSKIN_UNLIMITED_BONE_INFLUENCE
Unpacked.IndexAndWeights = GetBlendIndicesAndWeights(InputWeightStream, WeightOffset);
#endif
#if GPUSKIN_MORPH_BLEND
GetMorphBuffer(MorphIndex, Unpacked.DeltaPosition, Unpacked.DeltaTangentZ);
#endif
#if GPUSKIN_APEX_CLOTH
GetClothBuffer(ClothIndex, ClothIndexOffset, Unpacked.Influences);
#endif
return Unpacked;
}
/** transform position by weighted sum of skinning matrices */
float3 SkinPosition( FVertexFactoryIntermediates Intermediates )
{
float4 Position = float4(Intermediates.UnpackedPosition,1);
// Note the use of mul(Matrix,Vector), bone matrices are stored transposed
// for tighter packing.
return mul( Intermediates.BlendMatrix, Position );
}
[numthreads(64,1,1)]
void SkinCacheUpdateBatchCS(uint3 GroupID : SV_GroupID,
uint3 DispatchThreadID : SV_DispatchThreadID,
uint3 GroupThreadID : SV_GroupThreadID)
{
uint VertexIndex = DispatchThreadID.x;
if (VertexIndex >= NumVertices)
{
return;
}
// Find offset for regular (position/tangent/UV) vertex buffer
uint InputOffset = InputStreamStart + VertexIndex;
uint OutputOffset = SkinCacheStart + VertexIndex;
// Find offset for skin weight buffer
uint WeightOffset = InputWeightStart + (VertexIndex * (InputWeightStride/4));
#if GPUSKIN_MORPH_BLEND
#if GPUSKIN_APEX_CLOTH
FVertexUnpacked Unpacked = UnpackedVertex(MorphBufferOffset + VertexIndex, VertexIndex, ClothBufferOffset, InputOffset, WeightOffset);
#else
FVertexUnpacked Unpacked = UnpackedVertex(MorphBufferOffset + VertexIndex, VertexIndex, 0, InputOffset, WeightOffset);
#endif
#else
#if GPUSKIN_APEX_CLOTH
FVertexUnpacked Unpacked = UnpackedVertex(VertexIndex, VertexIndex, ClothBufferOffset, InputOffset, WeightOffset);
#else
FVertexUnpacked Unpacked = UnpackedVertex(VertexIndex, VertexIndex, 0, InputOffset, WeightOffset);
#endif
#endif
// Perform the skinning
FVertexFactoryIntermediates Intermediates = (FVertexFactoryIntermediates)0;
#if 0
// Test no blend mtx
Intermediates.BlendMatrix[0] = float4(1,0,0,0);
Intermediates.BlendMatrix[1] = float4(0,1,0,0);
Intermediates.BlendMatrix[2] = float4(0,0,1,0);
#elif GPUSKIN_UNLIMITED_BONE_INFLUENCE
Intermediates.BlendMatrix = ComputeBoneMatrixWithUnlimitedInfluences(BoneMatrices, InputWeightStream, InputWeightIndexSize, InputWeightLookupStream[InputOffset]);
#else
Intermediates.BlendMatrix = ComputeBoneMatrixWithLimitedInfluences(BoneMatrices, Unpacked.IndexAndWeights);
#endif
Intermediates.UnpackedPosition.x = PositionInputBuffer[InputOffset * 3 + 0];
Intermediates.UnpackedPosition.y = PositionInputBuffer[InputOffset * 3 + 1];
Intermediates.UnpackedPosition.z = PositionInputBuffer[InputOffset * 3 + 2];
half3 LocalTangentX = Unpacked.TangentX.xyz;
half3 LocalTangentZ = Unpacked.TangentZ.xyz;
#if GPUSKIN_MORPH_BLEND
{
Intermediates.UnpackedPosition += Unpacked.DeltaPosition;
// calc new normal by offseting it with the delta
LocalTangentZ = normalize( LocalTangentZ + Unpacked.DeltaTangentZ);
// derive the new tangent by orthonormalizing the new normal against
// the base tangent vector (assuming these are normalized)
LocalTangentX = normalize( LocalTangentX - (dot(LocalTangentX, LocalTangentZ) * LocalTangentZ) );
}
#else
#if GPUSKIN_APEX_CLOTH
float3 ClothTangentX = float3(0,0,0);
float3 ClothTangentZ = float3(0,0,0);
Intermediates.SimulatedPosition = float3(0,0,0);
float3 NormalPosition = float3(0,0,0);
float3 TangentPosition = float3(0,0,0);
int NumInfluences = 0;
float SumWeights = 0.0;
float ClothWeight = 0.0;
const float3 WorldScaleAbs = abs(WorldScale); // World scale can't be used mirrored to calculate the clothing positions and tangents since the cloth normals are then reversed
UNROLL_N(NUM_INFLUENCES_PER_VERTEX)
for (int i = 0; i < ClothNumInfluencesPerVertex; ++i )
{
const FVertexTriangleInfluence Influence = Unpacked.Influences[i];
if( Influence.SourceMeshVertIndices.w < 0xFFFF )
{
++NumInfluences;
float3 A = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.x * 3], ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.x * 3 + 1].x);
float3 B = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.y * 3], ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.y * 3 + 1].x);
float3 C = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.z * 3], ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.z * 3 + 1].x);
float3 NA = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.x * 3 + 1].y, ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.x * 3 + 2]);
float3 NB = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.y * 3 + 1].y, ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.y * 3 + 2]);
float3 NC = float3(ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.z * 3 + 1].y, ClothPositionsAndNormalsBuffer[Influence.SourceMeshVertIndices.z * 3 + 2]);
ClothWeight += ClothBlendWeight * (1.0f - (Influence.SourceMeshVertIndices.w / 65535.0f));
float Weight = 1.0f;
if ( ClothNumInfluencesPerVertex > 1 )
{
// Weight is packed in the last coordinate
Weight = Influence.Weight;
SumWeights += Weight;
}
else
{
// Single influence, weight is 1.0
Weight = 1.0f;
SumWeights = 1.0f;
}
NormalPosition += Weight * (Influence.NormalBaryCoordsAndDist.x * (A + NA * Influence.NormalBaryCoordsAndDist.w * WorldScaleAbs.x)
+ Influence.NormalBaryCoordsAndDist.y * (B + NB * Influence.NormalBaryCoordsAndDist.w * WorldScaleAbs.y)
+ Influence.NormalBaryCoordsAndDist.z * (C + NC * Influence.NormalBaryCoordsAndDist.w * WorldScaleAbs.z));
TangentPosition += Weight * (Influence.TangentBaryCoordsAndDist.x * (A + NA * Influence.TangentBaryCoordsAndDist.w * WorldScaleAbs.x)
+ Influence.TangentBaryCoordsAndDist.y * (B + NB * Influence.TangentBaryCoordsAndDist.w * WorldScaleAbs.y)
+ Influence.TangentBaryCoordsAndDist.z * (C + NC * Influence.TangentBaryCoordsAndDist.w * WorldScaleAbs.z));
float3 TriangleBary = float3(Influence.PositionBaryCoordsAndDist.x,
Influence.PositionBaryCoordsAndDist.y,
1.0f - Influence.PositionBaryCoordsAndDist.x - Influence.PositionBaryCoordsAndDist.y);
float3 SimPosition = TriangleBary.x*(A+NA*Influence.PositionBaryCoordsAndDist.w * WorldScaleAbs.x)
+ TriangleBary.y*(B+NB*Influence.PositionBaryCoordsAndDist.w * WorldScaleAbs.y)
+ TriangleBary.z*(C+NC*Influence.PositionBaryCoordsAndDist.w * WorldScaleAbs.z);
Intermediates.SimulatedPosition += Weight * SimPosition;
}
}
if ( NumInfluences > 0 && SumWeights > 1e-4f )
{
float InvWeight = 1.0f / SumWeights;
Intermediates.SimulatedPosition *= InvWeight;
TangentPosition *= InvWeight;
NormalPosition *= InvWeight;
ClothTangentX = normalize(TangentPosition - Intermediates.SimulatedPosition);
ClothTangentZ = normalize(NormalPosition - Intermediates.SimulatedPosition);
// Simulated cloth data are in cloth space so need to change into local space
ClothTangentX = mul(ClothTangentX, (half3x3)ClothToLocal);
ClothTangentZ = mul(ClothTangentZ, (half3x3)ClothToLocal);
}
else
{
Intermediates.SimulatedPosition = float3(0, 0, 0);
}
if (ClothNumInfluencesPerVertex > 1)
{
ClothWeight /= ClothNumInfluencesPerVertex;
}
#endif // GPUSKIN_APEX_CLOTH
#endif // GPUSKIN_MORPH_BLEND
float3 SPos = SkinPosition(Intermediates);
#if GPUSKIN_APEX_CLOTH
// Transform simulated position from cloth space into local space and blend with skinned position
float4 TransformedSimulatedPos = mul(float4(Intermediates.SimulatedPosition.xyz, 1), ClothToLocal);
SPos = lerp(SPos.xyz, TransformedSimulatedPos.xyz, ClothWeight);
#endif
half3 TangentX = normalize(mul((half3x3)Intermediates.BlendMatrix, LocalTangentX));
half3 TangentZ = normalize(mul((half3x3)Intermediates.BlendMatrix, LocalTangentZ));
#if GPUSKIN_APEX_CLOTH
TangentX = ClothTangentX * ClothWeight + TangentX * (1.0f - ClothWeight);
TangentZ = ClothTangentZ * ClothWeight + TangentZ * (1.0f - ClothWeight);
#endif
PositionBufferUAV[OutputOffset * 3 + 0] = SPos.x;
PositionBufferUAV[OutputOffset * 3 + 1] = SPos.y;
PositionBufferUAV[OutputOffset * 3 + 2] = SPos.z;
// If wave vote is not supported this just hits atomic operations directly, which is slow.
#if ENABLE_DYNAMIC_MESH_BOUNDS && COMPILER_SUPPORTS_WAVE_VOTE
if (DynamicBoundsOffset >= 0)
{
WaveInterlockedMin(OutBoundsBufferUAV[DynamicBoundsOffset * 2].x, int(floor(SPos.x)));
WaveInterlockedMin(OutBoundsBufferUAV[DynamicBoundsOffset * 2].y, int(floor(SPos.y)));
WaveInterlockedMin(OutBoundsBufferUAV[DynamicBoundsOffset * 2].z, int(floor(SPos.z)));
WaveInterlockedMax(OutBoundsBufferUAV[DynamicBoundsOffset * 2 + 1].x, int(ceil(SPos.x)));
WaveInterlockedMax(OutBoundsBufferUAV[DynamicBoundsOffset * 2 + 1].y, int(ceil(SPos.y)));
WaveInterlockedMax(OutBoundsBufferUAV[DynamicBoundsOffset * 2 + 1].z, int(ceil(SPos.z)));
}
#endif
TangentBufferUAV[2 * OutputOffset + GPUSKIN_RWBUFFER_OFFSET_TANGENT_X] = TangentUnbias_SkinCache(half4(TangentX, Unpacked.TangentX.w));
TangentBufferUAV[2 * OutputOffset + GPUSKIN_RWBUFFER_OFFSET_TANGENT_Z] = TangentUnbias_SkinCache(half4(TangentZ, Unpacked.TangentZ.w));
#if GPUSKIN_DEBUG_PASSTHROUGH
// Passthrough debug code
PositionBufferUAV[OutputOffset * 3 + 0] = PositionInputBuffer[InputOffset * 3 + 0];
PositionBufferUAV[OutputOffset * 3 + 1] = PositionInputBuffer[InputOffset * 3 + 1];
PositionBufferUAV[OutputOffset * 3 + 2] = PositionInputBuffer[InputOffset * 3 + 2];
TangentBufferUAV[2 * OutputOffset + GPUSKIN_RWBUFFER_OFFSET_TANGENT_X] = TangentInputBuffer[2 * InputOffset + GPUSKIN_VB_OFFSET_TANGENT_X];
TangentBufferUAV[2 * OutputOffset + GPUSKIN_RWBUFFER_OFFSET_TANGENT_Z] = TangentInputBuffer[2 * InputOffset + GPUSKIN_VB_OFFSET_TANGENT_Z];
#endif
}