562 lines
14 KiB
HLSL
562 lines
14 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
WaveBroadcastIntrinsics.ush: Exposes intrisics to perform broadcasting
|
|
within lanes of a same wave.
|
|
=============================================================================*/
|
|
|
|
#pragma once
|
|
|
|
#include "Platform.ush"
|
|
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_PERMUTE
|
|
#define PLATFORM_SUPPORTS_WAVE_READ_AT 1
|
|
#elif FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
#define PLATFORM_SUPPORTS_WAVE_READ_AT 1
|
|
#else
|
|
#define PLATFORM_SUPPORTS_WAVE_READ_AT 0
|
|
#endif
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN
|
|
#define PLATFORM_SUPPORTS_WAVE_BROADCAST 1
|
|
#elif FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
#define PLATFORM_SUPPORTS_WAVE_BROADCAST 1
|
|
#else
|
|
#define PLATFORM_SUPPORTS_WAVE_BROADCAST 0
|
|
#endif
|
|
|
|
#if defined(COMPILER_SUPPORTS_WAVE_SWIZZLE_RDNA) && COMPILER_SUPPORTS_WAVE_SWIZZLE_RDNA
|
|
#define PLATFORM_SUPPORTS_WAVE_ROTATE 1
|
|
#elif FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
#define PLATFORM_SUPPORTS_WAVE_ROTATE 1
|
|
#else
|
|
#define PLATFORM_SUPPORTS_WAVE_ROTATE 0
|
|
#endif
|
|
|
|
|
|
/** Different instruction available for WaveBroadcast() */
|
|
#define WAVE_BROADCAST_NOP 0
|
|
#define WAVE_BROADCAST_READ_LANE_AT 1
|
|
#define WAVE_BROADCAST_READ_LDS_AT 2
|
|
#define WAVE_BROADCAST_GCN_SWIZZLE 3
|
|
#define WAVE_BROADCAST_ROTATE 4
|
|
|
|
/** Compile time structure to choose which broadcasting should be done. */
|
|
struct FWaveBroadcastSettings
|
|
{
|
|
// Broadcast operations.
|
|
uint Operation;
|
|
|
|
// Lane index to read from
|
|
uint SourceLaneIndex;
|
|
|
|
// index of the thread.
|
|
uint GroupThreadIndex;
|
|
|
|
// Informations about the broadcast for GCN's ds_swizzle.
|
|
uint SwizzleAnd;
|
|
uint SwizzleOr;
|
|
uint SwizzleXor;
|
|
|
|
// Information about the broadcast for RDNA's ds_swizzle's rotate functionality
|
|
int Rotate;
|
|
uint RotateFixMask;
|
|
};
|
|
|
|
/** Group shared memory . */
|
|
#if defined(WAVE_BROADCAST_GROUPSIZE)
|
|
groupshared uint SharedBroadcastLDS[WAVE_BROADCAST_GROUPSIZE];
|
|
#endif
|
|
|
|
/** Returns the index of the source lane for a dest lane index. */
|
|
CALL_SITE_DEBUGLOC
|
|
uint GetWaveBroadcastSourceLaneIndex(const FWaveBroadcastSettings Settings, uint DestLaneIndex)
|
|
{
|
|
if (0)
|
|
{
|
|
return DestLaneIndex;
|
|
}
|
|
else if (Settings.Operation == WAVE_BROADCAST_READ_LANE_AT)
|
|
{
|
|
return Settings.SourceLaneIndex;
|
|
}
|
|
else if (Settings.Operation == WAVE_BROADCAST_READ_LDS_AT)
|
|
{
|
|
return Settings.SourceLaneIndex;
|
|
}
|
|
else if (Settings.Operation == WAVE_BROADCAST_GCN_SWIZZLE)
|
|
{
|
|
return (((DestLaneIndex & Settings.SwizzleAnd) | Settings.SwizzleOr) ^ Settings.SwizzleXor) % 32;
|
|
}
|
|
else if (Settings.Operation == WAVE_BROADCAST_ROTATE)
|
|
{
|
|
return (Settings.RotateFixMask & DestLaneIndex) | ((~Settings.RotateFixMask) & uint(DestLaneIndex + Settings.Rotate));
|
|
}
|
|
return DestLaneIndex;
|
|
}
|
|
|
|
/** Generic broadcast instruction. */
|
|
CALL_SITE_DEBUGLOC
|
|
uint WaveBroadcast(const FWaveBroadcastSettings Settings, uint x)
|
|
{
|
|
if (0)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
#if PLATFORM_SUPPORTS_WAVE_READ_AT
|
|
else if (Settings.Operation == WAVE_BROADCAST_READ_LANE_AT)
|
|
{
|
|
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
{
|
|
return WaveReadLaneAt(x, Settings.SourceLaneIndex);
|
|
}
|
|
#else
|
|
#error Unimplemented
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#if defined(WAVE_BROADCAST_GROUPSIZE)
|
|
else if (Settings.Operation == WAVE_BROADCAST_READ_LDS_AT)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
SharedBroadcastLDS[Settings.GroupThreadIndex] = x;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
return SharedBroadcastLDS[Settings.SourceLaneIndex];
|
|
}
|
|
#endif
|
|
|
|
#if PLATFORM_SUPPORTS_WAVE_BROADCAST
|
|
else if (Settings.Operation == WAVE_BROADCAST_GCN_SWIZZLE)
|
|
{
|
|
#if COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN
|
|
{
|
|
return WaveLaneSwizzleGCN(x, Settings.SwizzleAnd, Settings.SwizzleOr, Settings.SwizzleXor);
|
|
}
|
|
#elif FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
{
|
|
return WaveReadLaneAt(x, GetWaveBroadcastSourceLaneIndex(Settings, WaveGetLaneIndex()));
|
|
}
|
|
#else
|
|
#error Unimplemented
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#if PLATFORM_SUPPORTS_WAVE_ROTATE
|
|
else if (Settings.Operation == WAVE_BROADCAST_ROTATE)
|
|
{
|
|
#if defined(COMPILER_SUPPORTS_WAVE_SWIZZLE_RDNA) && COMPILER_SUPPORTS_WAVE_SWIZZLE_RDNA
|
|
{
|
|
return WaveLaneRotateSwizzleRDNA(x, /* rotate_amount = */ Settings.Rotate, Settings.RotateFixMask & 0x1F);
|
|
}
|
|
#elif FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
|
|
{
|
|
return WaveReadLaneAt(x, GetWaveBroadcastSourceLaneIndex(Settings, WaveGetLaneIndex()));
|
|
}
|
|
#else
|
|
#error Unimplemented
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
return x;
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint2 WaveBroadcast(const FWaveBroadcastSettings Settings, uint2 v)
|
|
{
|
|
return uint2(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint3 WaveBroadcast(const FWaveBroadcastSettings Settings, uint3 v)
|
|
{
|
|
return uint3(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint4 WaveBroadcast(const FWaveBroadcastSettings Settings, uint4 v)
|
|
{
|
|
return uint4(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z),
|
|
WaveBroadcast(Settings, v.w));
|
|
}
|
|
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int WaveBroadcast(const FWaveBroadcastSettings Settings, int x)
|
|
{
|
|
return asint(WaveBroadcast(Settings, asuint(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int2 WaveBroadcast(const FWaveBroadcastSettings Settings, int2 v)
|
|
{
|
|
return int2(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int3 WaveBroadcast(const FWaveBroadcastSettings Settings, int3 v)
|
|
{
|
|
return int3(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int4 WaveBroadcast(const FWaveBroadcastSettings Settings, int4 v)
|
|
{
|
|
return int4(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z),
|
|
WaveBroadcast(Settings, v.w));
|
|
}
|
|
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
float WaveBroadcast(const FWaveBroadcastSettings Settings, float x)
|
|
{
|
|
return asfloat(WaveBroadcast(Settings, asuint(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
float2 WaveBroadcast(const FWaveBroadcastSettings Settings, float2 v)
|
|
{
|
|
return float2(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
float3 WaveBroadcast(const FWaveBroadcastSettings Settings, float3 v)
|
|
{
|
|
return float3(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
float4 WaveBroadcast(const FWaveBroadcastSettings Settings, float4 v)
|
|
{
|
|
return float4(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z),
|
|
WaveBroadcast(Settings, v.w));
|
|
}
|
|
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
bool WaveBroadcast(const FWaveBroadcastSettings Settings, bool x)
|
|
{
|
|
return bool(WaveBroadcast(Settings, uint(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
bool2 WaveBroadcast(const FWaveBroadcastSettings Settings, bool2 v)
|
|
{
|
|
return bool2(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
bool3 WaveBroadcast(const FWaveBroadcastSettings Settings, bool3 v)
|
|
{
|
|
return bool3(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
bool4 WaveBroadcast(const FWaveBroadcastSettings Settings, bool4 v)
|
|
{
|
|
return bool4(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y),
|
|
WaveBroadcast(Settings, v.z),
|
|
WaveBroadcast(Settings, v.w));
|
|
}
|
|
|
|
|
|
#if PLATFORM_SUPPORTS_REAL_TYPES
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint16_t WaveBroadcast(const FWaveBroadcastSettings Settings, uint16_t x)
|
|
{
|
|
return uint16_t(WaveBroadcast(Settings, uint(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint16_t2 WaveBroadcast(const FWaveBroadcastSettings Settings, uint16_t2 v)
|
|
#if COMPILER_SUPPORT_UINT16_BITCAST
|
|
{
|
|
uint vu = bit_cast_uint(v);
|
|
uint bvu = WaveBroadcast(Settings, vu);
|
|
return bit_cast_uint16_t2(bvu);
|
|
}
|
|
#elif 1
|
|
{
|
|
uint vu = uint(v.x) | (uint(v.y) << 16u);
|
|
uint bvu = WaveBroadcast(Settings, vu);
|
|
return uint16_t2(bvu, bvu >> 16u);
|
|
}
|
|
#else
|
|
{
|
|
return uint16_t2(
|
|
WaveBroadcast(Settings, v.x),
|
|
WaveBroadcast(Settings, v.y));
|
|
}
|
|
#endif
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint16_t3 WaveBroadcast(const FWaveBroadcastSettings Settings, uint16_t3 v)
|
|
{
|
|
return uint16_t3(
|
|
WaveBroadcast(Settings, v.xy),
|
|
WaveBroadcast(Settings, v.z));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
uint16_t4 WaveBroadcast(const FWaveBroadcastSettings Settings, uint16_t4 v)
|
|
{
|
|
return uint16_t4(
|
|
WaveBroadcast(Settings, v.xy),
|
|
WaveBroadcast(Settings, v.zw));
|
|
}
|
|
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int16_t WaveBroadcast(const FWaveBroadcastSettings Settings, int16_t x)
|
|
{
|
|
return asint16(WaveBroadcast(Settings, asuint16(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int16_t2 WaveBroadcast(const FWaveBroadcastSettings Settings, int16_t2 v)
|
|
{
|
|
return asint16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int16_t3 WaveBroadcast(const FWaveBroadcastSettings Settings, int16_t3 v)
|
|
{
|
|
return asint16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
int16_t4 WaveBroadcast(const FWaveBroadcastSettings Settings, int16_t4 v)
|
|
{
|
|
return asint16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
half WaveBroadcast(const FWaveBroadcastSettings Settings, half x)
|
|
{
|
|
return asfloat16(WaveBroadcast(Settings, asuint16(x)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
half2 WaveBroadcast(const FWaveBroadcastSettings Settings, half2 v)
|
|
{
|
|
return asfloat16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
half3 WaveBroadcast(const FWaveBroadcastSettings Settings, half3 v)
|
|
{
|
|
return asfloat16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
CALL_SITE_DEBUGLOC
|
|
half4 WaveBroadcast(const FWaveBroadcastSettings Settings, half4 v)
|
|
{
|
|
return asfloat16(WaveBroadcast(Settings, asuint16(v)));
|
|
}
|
|
|
|
#endif // PLATFORM_SUPPORTS_REAL_TYPES
|
|
|
|
|
|
|
|
/** Init a nop broadcast */
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitNopBroadcast()
|
|
{
|
|
FWaveBroadcastSettings Settings;
|
|
Settings.Operation = WAVE_BROADCAST_NOP;
|
|
Settings.SourceLaneIndex = 0;
|
|
Settings.GroupThreadIndex = 0;
|
|
Settings.SwizzleAnd = 0x00;
|
|
Settings.SwizzleOr = 0x00;
|
|
Settings.SwizzleXor = 0x00;
|
|
Settings.Rotate = +0;
|
|
Settings.RotateFixMask = 0x00;
|
|
return Settings;
|
|
}
|
|
|
|
|
|
/** Dynamically read another lane's register
|
|
*
|
|
* Requires PLATFORM_SUPPORTS_WAVE_READ_AT.
|
|
*
|
|
* return src[SourceLaneIndex];
|
|
*
|
|
* Note the SourceLaneIndex can be dynamic according to SM 6.0's WaveReadLaneAt()
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveReadLaneAt(uint SourceLaneIndex)
|
|
{
|
|
FWaveBroadcastSettings Settings = InitNopBroadcast();
|
|
Settings.Operation = WAVE_BROADCAST_READ_LANE_AT;
|
|
Settings.SourceLaneIndex = SourceLaneIndex;
|
|
return Settings;
|
|
}
|
|
|
|
/** Dynamically read another group thread's register through LDS
|
|
*
|
|
* Requires setting WAVE_BROADCAST_GROUPSIZE to allocate groupshared memory.
|
|
*
|
|
* LDS[GroupThreadIndex] = src;
|
|
* return LDS[SourceLaneIndex];
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveReadLDS(uint SourceLaneIndex, uint GroupThreadIndex)
|
|
{
|
|
FWaveBroadcastSettings Settings = InitNopBroadcast();
|
|
Settings.Operation = WAVE_BROADCAST_READ_LDS_AT;
|
|
Settings.SourceLaneIndex = SourceLaneIndex;
|
|
Settings.GroupThreadIndex = GroupThreadIndex;
|
|
return Settings;
|
|
}
|
|
|
|
/** Converts a broadcast operation to be performed in SM5.
|
|
*
|
|
* Requires setting WAVE_BROADCAST_GROUPSIZE to allocate groupshared memory.
|
|
*
|
|
* Returns the exact sames data as Settings would otherwise return, but through LDS. Example:
|
|
*
|
|
* const FWaveBroadcastSettings BroadcastSettings = InitWaveReadLaneAt(SourceLaneIndex);
|
|
*
|
|
* #if PLATFORM_SUPPORTS_WAVE_READ_AT
|
|
* uint Desc = WaveBroadcast(BroadcastSettings, Src);
|
|
* #else
|
|
* uint Desc = WaveBroadcast(ConvertWaveBroadcastToLDS(BroadcastSettings, GroupThreadIndex), Src);
|
|
* #endif
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings ConvertWaveBroadcastToLDS(const FWaveBroadcastSettings Settings, uint GroupThreadIndex)
|
|
{
|
|
return InitWaveReadLDS(GetWaveBroadcastSourceLaneIndex(Settings, /* DestLaneIndex = */ GroupThreadIndex), GroupThreadIndex);
|
|
}
|
|
|
|
/** Broadcast in butterfly pattern.
|
|
*
|
|
* Requires PLATFORM_SUPPORTS_WAVE_BROADCAST.
|
|
*
|
|
* return src[laneId ^ XorButterFly];
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveXorButterfly(const uint XorButterFly)
|
|
{
|
|
FWaveBroadcastSettings Settings = InitNopBroadcast();
|
|
Settings.Operation = WAVE_BROADCAST_GCN_SWIZZLE;
|
|
Settings.SwizzleAnd = 0x1F;
|
|
Settings.SwizzleOr = 0x00;
|
|
Settings.SwizzleXor = XorButterFly;
|
|
return Settings;
|
|
}
|
|
|
|
/** Swap left lane with righ lanes within lane group (size is power of two in [2; 64]).
|
|
*
|
|
* Requires PLATFORM_SUPPORTS_WAVE_BROADCAST.
|
|
*
|
|
* If a lane is not active, the VGPR value returned is 0.
|
|
*
|
|
* LaneGroupSize = 8
|
|
* LaneId = 1
|
|
*
|
|
* | lane group (size=8) |
|
|
* x = | 0 1 2 3| 4 5 6 7| 8 9 ...
|
|
*
|
|
* return | 4 5 6 7| 0 1 2 3|12 13 ...
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveSwapWithinLaneGroup(const uint LaneGroupSize)
|
|
{
|
|
return InitWaveXorButterfly(/* XorButterFly = */ LaneGroupSize >> 1);
|
|
}
|
|
|
|
/** Broadcast inner lane group over a lane group (size is power of two in [2; 64]).
|
|
*
|
|
* Requires PLATFORM_SUPPORTS_WAVE_BROADCAST.
|
|
*
|
|
* If a lane is not active, the VGPR value returned is 0.
|
|
*
|
|
* LaneGroupSize = 8
|
|
* InnerLaneGroupSize = 2
|
|
* InnerLaneGroupId = 1
|
|
*
|
|
* | lane group (size=8) |
|
|
* x = | 0 1 2 3 4 5 6 7| 8 9 ...
|
|
*
|
|
* return | 2 3 2 3 2 3 2 3|10 11 ...
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveBroadcastLaneGroup(const uint LaneGroupSize, const uint InnerLaneGroupSize, const uint InnerLaneGroupId)
|
|
{
|
|
const uint InnerGroupCount = LaneGroupSize / InnerLaneGroupSize;
|
|
|
|
FWaveBroadcastSettings Settings = InitNopBroadcast();
|
|
Settings.Operation = WAVE_BROADCAST_GCN_SWIZZLE;
|
|
Settings.SwizzleAnd = ~((InnerGroupCount - 1) * InnerLaneGroupSize);
|
|
Settings.SwizzleOr = InnerLaneGroupId * InnerLaneGroupSize;
|
|
Settings.SwizzleXor = 0x00;
|
|
return Settings;
|
|
}
|
|
|
|
/** Rotate lane (-127 to +127) over a lane group (size is power of two in [2; 128]).
|
|
*
|
|
* Notes: RDNA can only do this with LaneGroupSize <= 32
|
|
*
|
|
* Requires PLATFORM_SUPPORTS_WAVE_ROTATE.
|
|
*
|
|
* If a lane is not active, the VGPR value returned is 0.
|
|
*
|
|
* LaneGroupSize = 8
|
|
* LaneRotation = +3
|
|
*
|
|
* | lane group (size=8) |
|
|
* x = | 0 1 2 3 4 5 6 7| 8 9 ...
|
|
*
|
|
* return | 3 4 5 6 7 0 1 2|11 12 ...
|
|
*/
|
|
CALL_SITE_DEBUGLOC
|
|
FWaveBroadcastSettings InitWaveRotateLaneGroup(const uint LaneGroupSize, const int LaneRotation)
|
|
{
|
|
FWaveBroadcastSettings Settings;
|
|
Settings.Operation = WAVE_BROADCAST_ROTATE;
|
|
Settings.Rotate = LaneRotation;
|
|
Settings.RotateFixMask = ~(LaneGroupSize - 1);
|
|
return Settings;
|
|
}
|