183 lines
5.5 KiB
HLSL
183 lines
5.5 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*==============================================================================
|
|
ParticleBoundsShader.usf: Shader to compute the bounds of GPU particles.
|
|
==============================================================================*/
|
|
|
|
/*------------------------------------------------------------------------------
|
|
Compile time parameters:
|
|
|
|
THREAD_COUNT - The number of threads to launch per workgroup.
|
|
------------------------------------------------------------------------------*/
|
|
|
|
#include "Common.ush"
|
|
#include "ParticleSimulationCommon.ush"
|
|
|
|
/** The number of threads with which to perform raking. */
|
|
#define RAKING_THREADS 32
|
|
/** Padding required for raking operations. */
|
|
#define RAKING_PADDING (RAKING_THREADS >> 1)
|
|
/** Maximum floating point value. */
|
|
#define MAX_FLT (+3.402823466e+38f)
|
|
/** Minimum floating point value. */
|
|
#define MIN_FLT (-3.402823466e+38f)
|
|
|
|
/** Input buffer containing particle indices. */
|
|
Buffer<float4> InParticleIndices;
|
|
float2 TilePageScale;
|
|
|
|
/** Texture containing particle positions. */
|
|
Texture2D PositionTexture;
|
|
|
|
/** Output bounds buffer: MinXYZ, MaxXYZ. */
|
|
RWBuffer<float4> OutBounds;
|
|
|
|
/** Local storage for bounds to be reduced by this workgroup. */
|
|
groupshared float3 LocalBounds[2 * THREAD_COUNT];
|
|
|
|
/** Particle texture dimensions */
|
|
int TextureSizeX;
|
|
int TextureSizeY;
|
|
|
|
[numthreads(THREAD_COUNT,1,1)]
|
|
void ComputeParticleBounds(
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint3 GroupIdXYZ : SV_GroupID )
|
|
{
|
|
uint i;
|
|
|
|
// Thread-private particle bounds.
|
|
float3 PrivateBounds[2];
|
|
|
|
// Determine group and thread IDs.
|
|
const uint ThreadId = GroupThreadId.x;
|
|
const uint GroupId = GroupIdXYZ.x;
|
|
|
|
// Raking index.
|
|
const uint RakingThreadId = ThreadId & (RAKING_THREADS - 1);
|
|
const uint RakingThreadOffset = (ThreadId < RAKING_THREADS) ? 0 : RAKING_THREADS + RAKING_PADDING;
|
|
const uint RakingIndex = RakingThreadId + RakingThreadOffset;
|
|
|
|
// Setup chunk indices.
|
|
uint FirstChunkIndex;
|
|
uint ChunkCount;
|
|
if ( GroupId < ParticleBounds.ExtraChunkCount )
|
|
{
|
|
FirstChunkIndex = GroupId * (ParticleBounds.ChunksPerGroup + 1);
|
|
ChunkCount = ParticleBounds.ChunksPerGroup + 1;
|
|
}
|
|
else
|
|
{
|
|
FirstChunkIndex = GroupId * ParticleBounds.ChunksPerGroup + ParticleBounds.ExtraChunkCount;
|
|
ChunkCount = ParticleBounds.ChunksPerGroup;
|
|
}
|
|
|
|
// Initialize bounds.
|
|
PrivateBounds[0] = float3( MAX_FLT, MAX_FLT, MAX_FLT );
|
|
PrivateBounds[1] = float3( MIN_FLT, MIN_FLT, MIN_FLT );
|
|
|
|
// Buffer offsets.
|
|
uint InputIndex = FirstChunkIndex * THREAD_COUNT + ThreadId;
|
|
uint OutputIndex = 2 * GroupId;
|
|
|
|
for ( uint ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex )
|
|
{
|
|
if ( InputIndex < ParticleBounds.ParticleCount )
|
|
{
|
|
// Read in the particle index and its position.
|
|
float3 ParticleIndices = InParticleIndices[InputIndex].xyz;
|
|
const float2 ParticleIndex = ParticleIndices.xy * TilePageScale.xy + GetTilePageOffset(ParticleIndices.z, TilePageScale);
|
|
int3 ParticleTexel = int3(ParticleIndex.xy * int2(TextureSizeX, TextureSizeY), 0);
|
|
const float4 ParticlePosition = PositionTexture.Load(ParticleTexel);
|
|
if ( ParticlePosition.w <= 1.0f )
|
|
{
|
|
PrivateBounds[0] = min( PrivateBounds[0], ParticlePosition.xyz );
|
|
PrivateBounds[1] = max( PrivateBounds[1], ParticlePosition.xyz );
|
|
}
|
|
}
|
|
InputIndex += THREAD_COUNT;
|
|
}
|
|
|
|
// Store bounds in shared memory.
|
|
LocalBounds[ThreadId] = PrivateBounds[0];
|
|
LocalBounds[ThreadId + THREAD_COUNT] = PrivateBounds[1];
|
|
|
|
// Acquire LocalBounds.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Perform thread-local reductions.
|
|
if ( ThreadId < RAKING_THREADS )
|
|
{
|
|
PrivateBounds[0] = LocalBounds[RakingThreadId];
|
|
for ( i = 0; i < THREAD_COUNT / RAKING_THREADS; ++i )
|
|
{
|
|
PrivateBounds[0] = min( PrivateBounds[0], LocalBounds[i * RAKING_THREADS + RakingThreadId] );
|
|
}
|
|
}
|
|
else if ( ThreadId < RAKING_THREADS * 2 )
|
|
{
|
|
PrivateBounds[1] = LocalBounds[RakingThreadId + THREAD_COUNT];
|
|
for ( i = 0; i < THREAD_COUNT / RAKING_THREADS; ++i )
|
|
{
|
|
PrivateBounds[1] = max( PrivateBounds[1], LocalBounds[i * RAKING_THREADS + RakingThreadId + THREAD_COUNT] );
|
|
}
|
|
}
|
|
|
|
// Release LocalBounds.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Store reductions.
|
|
if ( ThreadId < RAKING_THREADS )
|
|
{
|
|
LocalBounds[RakingThreadId] = PrivateBounds[0];
|
|
}
|
|
else if ( ThreadId < RAKING_THREADS * 2 )
|
|
{
|
|
LocalBounds[RakingThreadId + RAKING_THREADS + RAKING_PADDING] = PrivateBounds[1];
|
|
}
|
|
|
|
// Clear raking padding.
|
|
if ( ThreadId < RAKING_PADDING )
|
|
{
|
|
LocalBounds[ThreadId + RAKING_THREADS] = MAX_FLT;
|
|
}
|
|
else if ( ThreadId < RAKING_PADDING * 2 )
|
|
{
|
|
LocalBounds[ThreadId + RAKING_THREADS * 2 + RAKING_PADDING] = MIN_FLT;
|
|
}
|
|
|
|
// Acquire LocalBounds.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Intra-warp reductions.
|
|
if ( ThreadId < RAKING_THREADS )
|
|
{
|
|
PrivateBounds[0] = LocalBounds[RakingIndex];
|
|
[unroll]
|
|
for ( uint RakingOffset = 1; RakingOffset < RAKING_THREADS; RakingOffset <<= 1 )
|
|
{
|
|
PrivateBounds[0] = min( PrivateBounds[0], LocalBounds[RakingIndex + RakingOffset] );
|
|
LocalBounds[RakingIndex] = PrivateBounds[0];
|
|
}
|
|
}
|
|
else if ( ThreadId < RAKING_THREADS * 2 )
|
|
{
|
|
PrivateBounds[0] = LocalBounds[RakingIndex];
|
|
[unroll]
|
|
for ( uint RakingOffset = 1; RakingOffset < RAKING_THREADS; RakingOffset <<= 1 )
|
|
{
|
|
PrivateBounds[0] = max( PrivateBounds[0], LocalBounds[RakingIndex + RakingOffset] );
|
|
LocalBounds[RakingIndex] = PrivateBounds[0];
|
|
}
|
|
}
|
|
|
|
// Acquire LocalBounds.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Store bounds for this work group.
|
|
if ( ThreadId < 2 )
|
|
{
|
|
OutBounds[GroupId * 2 + ThreadId] = float4(LocalBounds[ThreadId * (RAKING_THREADS + RAKING_PADDING)],0);
|
|
}
|
|
}
|