Files
UnrealEngine/Engine/Shaders/Private/DoubleFloat.ush
2025-05-18 13:04:45 +08:00

639 lines
22 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
// A high-precision floating point type, consisting of two 32-bit floats (High & Low).
// 'High' stores the core value, 'Low' stores the residual error that couldn't fit in 'High'.
// This combination has 2x23=46 significant bits, providing twice the precision a float offers.
// Operations are slower than floats, but faster than doubles on consumer GPUs (with potentially greater support)
// Platforms that don't support fused multiply-add and INVARIANT may see decreased performance or reduced precision.
//
// Based on:
// [0] Thall, A. (2006), Extended-precision floating-point numbers for GPU computation.
// [1] Mioara Maria Joldes, Jean-Michel Muller, Valentina Popescu. (2017), Tight and rigourous error bounds for basic building blocks of double-word arithmetic.
// [2] Vincent Lefevre, Nicolas Louvet, Jean-Michel Muller, Joris Picot, and Laurence Rideau. (2022), Accurate Calculation of Euclidean Norms using Double-Word Arithmetic
// [3] Jean-Michel Muller and Laurence Rideau. (2022), Formalization of Double-Word Arithmetic, and Comments on "Tight and Rigorous Error Bounds for Basic Building Blocks of Double-Word Arithmetic"
// [4] T. J. Dekker. (1971), A floating-point technique for extending the available precision.
#ifndef UE_DF_FORCE_FP32_OPS
#define UE_DF_FORCE_FP32_OPS 0
#endif
#ifndef UE_DF_NO_FAST_MATH
#define UE_DF_NO_FAST_MATH 0
#endif
#include "RandomPCG.ush"
/* FDFScalar */
#define FDFType FDFScalar
#define FDFTypeDeriv FDFScalarDeriv
#define FFloatType float
#define FBoolType bool
#define GNumComponents 1
#define DFConstructor MakeDFScalar
#if COMPILER_SUPPORTS_HLSL2021
struct FDFType;
FDFType DFAdd(FDFType Lhs, FDFType Rhs);
FDFType DFAdd(FDFType Lhs, FFloatType Rhs);
FDFType DFSubtract(FDFType Lhs, FDFType Rhs);
FDFType DFSubtract(FDFType Lhs, FFloatType Rhs);
FDFType DFMultiply(FDFType Lhs, FDFType Rhs);
FDFType DFMultiply(FDFType Lhs, FFloatType Rhs);
FDFType DFDivide(FDFType Lhs, FDFType Rhs);
FDFType DFDivide(FDFType Lhs, FFloatType Rhs);
FDFType DFNegate(FDFType V);
FBoolType DFGreater(FDFType Lhs, FFloatType Rhs);
FBoolType DFLess(FDFType Lhs, FFloatType Rhs);
FFloatType DFDemote(FDFType V);
#endif
struct FDFScalar
{
float High;
float Low;
#if COMPILER_SUPPORTS_HLSL2021
/*FDFType operator+(FDFType Rhs) { return DFAdd(this, Rhs); }
FDFType operator+(FFloatType Rhs) { return DFAdd(this, Rhs); }
FDFType operator-(FDFType Rhs) { return DFSubtract(this, Rhs); }
FDFType operator-(FFloatType Rhs) { return DFSubtract(this, Rhs); }
FDFType operator*(FDFType Rhs) { return DFMultiply(this, Rhs); }
FDFType operator*(FFloatType Rhs) { return DFMultiply(this, Rhs); }
FDFType operator/(FDFType Rhs) { return DFDivide(this, Rhs); }
FDFType operator/(FFloatType Rhs) { return DFDivide(this, Rhs); }*/
//Commented out until supported by HlslParser
/*FDFType operator-() { return DFNegate(this); }
FBoolType operator>(FFloatType Rhs) { return DFGreater(this, Rhs); }
FBoolType operator<(FFloatType Rhs) { return DFLess(this, Rhs); }*/
FFloatType Demote() { return DFDemote(this); }
#endif
};
#include "DoubleFloatOperations.ush"
#undef FDFType
#undef FDFTypeDeriv
#undef FFloatType
#undef FBoolType
#undef GNumComponents
#undef DFConstructor
FDFScalar MakeDFVector(FDFScalar X) { return X; }
FDFScalar DFGetComponent(float V, int C) { return MakeDFScalar(V, 0); }
FDFScalar DFGetComponent(FDFScalar V, int C) { return V; }
float DFLength(float V) { return abs(V); }
float DFNormalize(float V) { return (float)sign(V); }
FDFScalar DFLength(FDFScalar V) { return DFAbs(V); }
FDFScalar DFNormalize(FDFScalar V) { return DFPromote(V); }
float DFNormalizeDemote(FDFScalar V) { return DFSign(V); }
/* FDFVector2 */
#define FDFType FDFVector2
#define FDFTypeDeriv FDFVector2Deriv
#define FFloatType float2
#define FBoolType bool2
#define GNumComponents 2
#define DFConstructor MakeDFVector2
#define DFBroadcastFunc DFBroadcast2
#include "DoubleFloatVectorDefinition.ush"
#undef FDFType
#undef FDFTypeDeriv
#undef FFloatType
#undef FBoolType
#undef GNumComponents
#undef DFConstructor
#undef DFBroadcastFunc
FDFVector2 MakeDFVector(FDFScalar X, FDFScalar Y) { return MakeDFVector2(float2(X.High, Y.High), float2(X.Low, Y.Low)); }
/* FDFVector3 */
#define FDFType FDFVector3
#define FDFTypeDeriv FDFVector3Deriv
#define FFloatType float3
#define FBoolType bool3
#define GNumComponents 3
#define DFConstructor MakeDFVector3
#define DFBroadcastFunc DFBroadcast3
#include "DoubleFloatVectorDefinition.ush"
#undef FDFType
#undef FDFTypeDeriv
#undef FFloatType
#undef FBoolType
#undef GNumComponents
#undef DFConstructor
#undef DFBroadcastFunc
FDFVector3 MakeDFVector(FDFScalar X, FDFScalar Y, FDFScalar Z) { return MakeDFVector3(float3( X.High, Y.High, Z.High), float3( X.Low, Y.Low, Z.Low)); }
FDFVector3 MakeDFVector(FDFScalar X, FDFVector2 YZ) { return MakeDFVector3(float3( X.High, YZ.High), float3( X.Low, YZ.Low)); }
FDFVector3 MakeDFVector(FDFVector2 XY, FDFScalar Z) { return MakeDFVector3(float3(XY.High, Z.High), float3(XY.Low, Z.Low)); }
/* FDFVector4 */
#define FDFType FDFVector4
#define FDFTypeDeriv FDFVector4Deriv
#define FFloatType float4
#define FBoolType bool4
#define GNumComponents 4
#define DFConstructor MakeDFVector4
#define DFBroadcastFunc DFBroadcast4
#include "DoubleFloatVectorDefinition.ush"
#undef FDFType
#undef FDFTypeDeriv
#undef FFloatType
#undef FBoolType
#undef GNumComponents
#undef DFConstructor
#undef DFBroadcastFunc
FDFVector4 MakeDFVector(FDFScalar X, FDFScalar Y, FDFScalar Z, FDFScalar W) { return MakeDFVector4(float4( X.High, Y.High, Z.High, W.High), float4( X.Low, Y.Low, Z.Low, W.Low)); }
FDFVector4 MakeDFVector(FDFScalar X, FDFScalar Y, FDFVector2 ZW) { return MakeDFVector4(float4( X.High, Y.High, ZW.High), float4( X.Low, Y.Low, ZW.Low)); }
FDFVector4 MakeDFVector(FDFScalar X, FDFVector2 YZ, FDFScalar W) { return MakeDFVector4(float4( X.High, YZ.High, W.High), float4( X.Low, YZ.Low, W.Low)); }
FDFVector4 MakeDFVector(FDFVector2 XY, FDFScalar Z, FDFScalar W) { return MakeDFVector4(float4( XY.High, Z.High, W.High), float4( XY.Low, Z.Low, W.Low)); }
FDFVector4 MakeDFVector(FDFVector2 XY, FDFVector2 ZW) { return MakeDFVector4(float4( XY.High, ZW.High), float4( XY.Low, ZW.Low)); }
FDFVector4 MakeDFVector(FDFScalar X, FDFVector3 YZW) { return MakeDFVector4(float4( X.High, YZW.High), float4( X.Low, YZW.Low)); }
FDFVector4 MakeDFVector(FDFVector3 XYZ, FDFScalar W) { return MakeDFVector4(float4(XYZ.High, W.High), float4(XYZ.Low, W.Low)); }
FDFVector4 MakeDFVector(FDFVector3 In, float W) { return MakeDFVector4(float4(In.High, W), float4(In.Low, 0)); }
#define DFGetX(V) DFGetComponent(V, 0)
#define DFGetY(V) DFGetComponent(V, 1)
#define DFGetZ(V) DFGetComponent(V, 2)
#define DFGetW(V) DFGetComponent(V, 3)
FDFVector3 DFGetXYZ(FDFVector4 V)
{
return MakeDFVector3(V.High.xyz, V.Low.xyz);
}
// Put the components of V at the specified indices in a new DF vector
FDFScalar DFSwizzle(FDFScalar V, int C0) { return V; }
FDFScalar DFSwizzle(FDFVector2 V, int C0) { return DFGetComponent(V, C0); }
FDFScalar DFSwizzle(FDFVector3 V, int C0) { return DFGetComponent(V, C0); }
FDFScalar DFSwizzle(FDFVector4 V, int C0) { return DFGetComponent(V, C0); }
FDFVector2 DFSwizzle(FDFScalar V, int C0, int C1) { return MakeDFVector(V, V); }
FDFVector2 DFSwizzle(FDFVector2 V, int C0, int C1) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1)); }
FDFVector2 DFSwizzle(FDFVector3 V, int C0, int C1) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1)); }
FDFVector2 DFSwizzle(FDFVector4 V, int C0, int C1) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1)); }
FDFVector3 DFSwizzle(FDFScalar V, int C0, int C1, int C2) { return MakeDFVector(V, V, V); }
FDFVector3 DFSwizzle(FDFVector2 V, int C0, int C1, int C2) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2)); }
FDFVector3 DFSwizzle(FDFVector3 V, int C0, int C1, int C2) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2)); }
FDFVector3 DFSwizzle(FDFVector4 V, int C0, int C1, int C2) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2)); }
FDFVector4 DFSwizzle(FDFScalar V, int C0, int C1, int C2, int C3) { return MakeDFVector(V, V, V, V); }
FDFVector4 DFSwizzle(FDFVector2 V, int C0, int C1, int C2, int C3) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2), DFGetComponent(V, C3)); }
FDFVector4 DFSwizzle(FDFVector3 V, int C0, int C1, int C2, int C3) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2), DFGetComponent(V, C3)); }
FDFVector4 DFSwizzle(FDFVector4 V, int C0, int C1, int C2, int C3) { return MakeDFVector(DFGetComponent(V, C0), DFGetComponent(V, C1), DFGetComponent(V, C2), DFGetComponent(V, C3)); }
#define DFHackToFloat DFDemote
// Transforms *to* absolute world space
struct FDFMatrix
{
float4x4 M;
float3 PostTranslation; // Added to result, *after* multiplying 'M'
};
float4x4 Make4x3Matrix(float4x4 M)
{
// Explicitly ignore the 4th column of the input
float4x4 Result;
Result[0] = float4(M[0].xyz, 0.0f);
Result[1] = float4(M[1].xyz, 0.0f);
Result[2] = float4(M[2].xyz, 0.0f);
Result[3] = float4(M[3].xyz, 1.0f);
return Result;
}
float4x4 MakeTranslationMatrix(float3 Offset)
{
float4x4 Result;
Result[0] = float4(1.0f, 0.0f, 0.0f, 0.0f);
Result[1] = float4(0.0f, 1.0f, 0.0f, 0.0f);
Result[2] = float4(0.0f, 0.0f, 1.0f, 0.0f);
Result[3] = float4(Offset, 1.0f);
return Result;
}
FDFMatrix MakeDFMatrix4x3(float3 PostTranslation, float4x4 InMatrix)
{
FDFMatrix Result;
Result.PostTranslation = PostTranslation;
Result.M = Make4x3Matrix(InMatrix);
return Result;
}
FDFMatrix MakeDFMatrix(float3 PostTranslation, float4x4 InMatrix)
{
FDFMatrix Result;
Result.PostTranslation = PostTranslation;
Result.M = InMatrix;
return Result;
}
FDFVector3 DFGetOrigin(FDFMatrix InMatrix)
{
#if UE_DF_FORCE_FP32_OPS
return DFPromote(InMatrix.PostTranslation + InMatrix.M[3].xyz);
#elif UE_DF_NO_FAST_MATH
return DFTwoSum(InMatrix.PostTranslation, InMatrix.M[3].xyz);
#else
return DFFastTwoSum(InMatrix.PostTranslation, InMatrix.M[3].xyz);
#endif
}
// Transforms *from* absolute world space
struct FDFInverseMatrix
{
float4x4 M;
float3 PreTranslation; // Subtracted from input position *before* multiplying 'M'
#if !COMPILER_SUPPORTS_HLSL2021
int Dummy; // avoid problem with HLSL overloading/implicit cast (should be DCE'd)
#endif
};
FDFInverseMatrix MakeDFInverseMatrix(float3 PreTranslation, float4x4 InMatrix)
{
FDFInverseMatrix Result;
Result.PreTranslation = PreTranslation;
Result.M = InMatrix;
#if !COMPILER_SUPPORTS_HLSL2021
Result.Dummy = 0;
#endif
return Result;
}
FDFInverseMatrix MakeDFInverseMatrix4x3(float3 PreTranslation, float4x4 InMatrix)
{
FDFInverseMatrix Result;
Result.PreTranslation = PreTranslation;
Result.M = Make4x3Matrix(InMatrix);
#if !COMPILER_SUPPORTS_HLSL2021
Result.Dummy = 0;
#endif
return Result;
}
FDFVector3 DFMultiply(float3 Vector, FDFMatrix InMatrix)
{
float4 Result = mul(float4(Vector, 1), InMatrix.M);
//Result /= Result.w;
FDFVector3 V = DFTwoSum(InMatrix.PostTranslation, Result.xyz);
return V;
}
FDFVector4 DFMultiply(float4 Vector, FDFMatrix InMatrix)
{
float4 Result = mul(Vector, InMatrix.M);
FDFVector4 V = DFTwoSum(float4(InMatrix.PostTranslation, 0), Result);
return V;
}
float3 DFMultiplyDemote(FDFVector3 Vector, FDFInverseMatrix InMatrix)
{
float3 V = DFFastSubtractDemote(Vector, InMatrix.PreTranslation);
float4 Result = mul(float4(V, 1), InMatrix.M);
//Result /= Result.w;
return Result.xyz;
}
float3 DFFastMultiplyDemote(FDFVector3 Vector, FDFInverseMatrix InMatrix)
{
float3 V = DFFastLocalSubtractDemote(Vector, InMatrix.PreTranslation);
float4 Result = mul(float4(V, 1), InMatrix.M);
return Result.xyz;
}
float4 DFMultiplyDemote(FDFVector4 Vector, FDFInverseMatrix InMatrix)
{
float4 V = DFFastSubtractDemote(Vector, float4(InMatrix.PreTranslation, 0));
return mul(V, InMatrix.M);
}
float4 DFFastMultiplyDemote(FDFVector4 Vector, FDFInverseMatrix InMatrix)
{
float4 V = DFFastLocalSubtractDemote(Vector, float4(InMatrix.PreTranslation, 0));
return mul(V, InMatrix.M);
}
FDFInverseMatrix DFMultiply(FDFInverseMatrix Lhs, float4x4 Rhs)
{
float4x4 M = mul(Lhs.M, Rhs);
float3 PreTranslation = Lhs.PreTranslation;
return MakeDFInverseMatrix(PreTranslation, M);
}
FDFMatrix DFMultiply(float4x4 Lhs, FDFMatrix Rhs)
{
float4x4 M = mul(Lhs, Rhs.M);
float3 PostTranslation = Rhs.PostTranslation;
return MakeDFMatrix(PostTranslation, M);
}
float4x4 MultiplyTranslation(float4x4 M, float3 Translation)
{
float4x4 Result = M;
// M * translate(PostTranslation)
Result[3].xyz += Translation;
return Result;
}
float4x4 MultiplyTranslation(float3 Translation, float4x4 M)
{
return mul(MakeTranslationMatrix(Translation), M);
}
// Perform Lhs * T, where T is a translation matrix with the specified vector
float4x4 DFMultiplyTranslationDemote(FDFMatrix Lhs, FDFVector3 Rhs)
{
float3 Translation = DFFastAddDemote(Lhs.PostTranslation, Rhs);
float4x4 Result = MultiplyTranslation(Lhs.M, Translation);
return Result;
}
// Same as DFMultiplyTranslationDemote, but assumes the translation vector
// roughly cancels out the translation in the matrix.
// More formally, Lhs.PostTranslation and -Rhs should meet the requirements of DFFastLocalSubtractDemote
float4x4 DFFastMultiplyTranslationDemote(FDFMatrix Lhs, FDFVector3 Rhs)
{
#if UE_DF_NO_FAST_MATH
return DFMultiplyTranslationDemote(Lhs, Rhs);
#else
float3 Translation = DFFastLocalSubtractDemote(Lhs.PostTranslation, DFNegate(Rhs));
float4x4 Result = MultiplyTranslation(Lhs.M, Translation);
return Result;
#endif
}
float4x4 DFMultiplyTranslationDemote(FDFMatrix Lhs, float3 Rhs)
{
float3 Translation = INVARIANT_ADD(Lhs.PostTranslation, Rhs);
float4x4 Result = MultiplyTranslation(Lhs.M, Translation);
return Result;
}
FDFMatrix DFMultiplyTranslation(FDFMatrix Lhs, float3 Rhs)
{
FDFVector3 Translation = DFTwoSum(Lhs.PostTranslation, Rhs);
float3 PostTranslation = Translation.High;
#if UE_DF_FORCE_FP32_OPS
float4x4 M = Lhs.M;
#else
float4x4 M = MultiplyTranslation(Lhs.M, Translation.Low);
#endif
return MakeDFMatrix(PostTranslation, M);
}
FDFMatrix DFMultiplyTranslation(float4x4 Lhs, FDFVector3 Rhs)
{
FDFVector3 Translation = Rhs;
float3 PostTranslation = Translation.High;
#if UE_DF_FORCE_FP32_OPS
float4x4 M = Lhs;
#else
float4x4 M = MultiplyTranslation(Lhs, Translation.Low);
#endif
return MakeDFMatrix(PostTranslation, M);
}
// Perform Lhs * T, where T is a translation matrix with the specified vector
FDFInverseMatrix DFMultiplyTranslation(FDFVector3 Lhs, FDFInverseMatrix Rhs)
{
FDFVector3 Translation = DFFastAdd(Lhs, -Rhs.PreTranslation);
float3 PreTranslation = -Translation.High;
#if UE_DF_FORCE_FP32_OPS
float4x4 M = Rhs.M;
#else
float4x4 M = MultiplyTranslation(Translation.Low, Rhs.M);
#endif
return MakeDFInverseMatrix(PreTranslation, M);
}
float4x4 DFMultiplyTranslationDemote(FDFVector3 Lhs, FDFInverseMatrix Rhs)
{
float3 Translation = DFFastAddDemote(Lhs, -Rhs.PreTranslation);
float4x4 Result = MultiplyTranslation(Translation, Rhs.M);
return Result;
}
// Same as DFMultiplyTranslationDemote, but assumes the translation vector
// roughly cancels out the translation in the matrix.
// More formally, Lhs and Rhs.PreTranslation should meet the requirements of DFFastLocalSubtractDemote
float4x4 DFFastMultiplyTranslationDemote(FDFVector3 Lhs, FDFInverseMatrix Rhs)
{
#if UE_DF_NO_FAST_MATH
return DFMultiplyTranslationDemote(Lhs, Rhs);
#else
float3 Translation = DFFastLocalSubtractDemote(Lhs, Rhs.PreTranslation);
float4x4 Result = MultiplyTranslation(Translation, Rhs.M);
return Result;
#endif
}
FDFInverseMatrix DFMultiplyTranslation(float3 Lhs, FDFInverseMatrix Rhs)
{
FDFVector3 Translation = DFTwoSum(Lhs, -Rhs.PreTranslation);
float3 PreTranslation = -Translation.High;
#if UE_DF_FORCE_FP32_OPS
float4x4 M = Rhs.M;
#else
float4x4 M = MultiplyTranslation(Translation.Low, Rhs.M);
#endif
return MakeDFInverseMatrix(PreTranslation, M);
}
FDFInverseMatrix DFMultiplyTranslation(FDFVector3 Lhs, float4x4 Rhs)
{
float3 PreTranslation = -Lhs.High;
#if UE_DF_FORCE_FP32_OPS
float4x4 M = Rhs;
#else
float4x4 M = MultiplyTranslation(Lhs.Low, Rhs);
#endif
return MakeDFInverseMatrix(PreTranslation, M);
}
float3x3 DFToFloat3x3(FDFMatrix Value)
{
return (float3x3)Value.M;
}
float3x3 DFToFloat3x3(FDFInverseMatrix Value)
{
return (float3x3)Value.M;
}
float3 DFMultiplyVector(float3 Vector, FDFMatrix InMatrix)
{
return mul(Vector, (float3x3)InMatrix.M);
}
float3 DFMultiplyVector(float3 Vector, FDFInverseMatrix InMatrix)
{
return mul(Vector, (float3x3)InMatrix.M);
}
FDFMatrix DFPromote(FDFMatrix Value) { return Value; }
FDFInverseMatrix DFPromoteInverse(FDFInverseMatrix Value) { return Value; }
FDFMatrix DFPromote(float4x4 Value) { return MakeDFMatrix((float3)0, Value); }
FDFInverseMatrix DFPromoteInverse(float4x4 Value) { return MakeDFInverseMatrix((float3)0, Value); }
float4x4 DFDemote(FDFMatrix V)
{
return MultiplyTranslation(V.M, V.PostTranslation);
}
float4x4 DFDemote(float4x4 V)
{
return V;
}
float4x4 DFDemote(FDFInverseMatrix V)
{
return MultiplyTranslation(-V.PreTranslation, V.M);
}
float4 DFTransformLocalToTranslatedWorld(float3 LocalPosition, FDFMatrix LocalToWorld, FDFVector3 PreViewTranslation)
{
// Multiply local position with LocalToWorld 3x3 first for high precision rotation and scale, then move the world position part to translated world before adding it
float3 RotatedScaledPosition = FMA(LocalPosition.xxx, LocalToWorld.M[0].xyz, FMA(LocalPosition.yyy, LocalToWorld.M[1].xyz, LocalPosition.zzz * LocalToWorld.M[2].xyz));
#if UE_DF_NO_FAST_MATH
float3 TranslatedWorldPositionOrigin = DFAddDemote(DFGetOrigin(LocalToWorld), PreViewTranslation);
#else
float3 TranslatedWorldPositionOrigin = DFFastLocalSubtractDemote(LocalToWorld.PostTranslation, DFNegate(PreViewTranslation)) + LocalToWorld.M[3].xyz;
#endif
return float4(RotatedScaledPosition + TranslatedWorldPositionOrigin, 1.0f);
}
// Translate the input vector with PreViewTranslation. Assumes the result fits in FP32 (i.e. is close to the view origin)
float3 DFFastToTranslatedWorld(FDFVector3 WorldPosition, FDFVector3 PreViewTranslation)
{
return DFFastLocalSubtractDemote(WorldPosition, DFNegate(PreViewTranslation));
}
float3 DFFastToTranslatedWorld(float3 WorldPosition, FDFVector3 PreViewTranslation)
{
return DFFastLocalSubtractDemote(WorldPosition, DFNegate(PreViewTranslation));
}
// Transforms a WorldToX matrix to a TranslatedWorldToX matrix.
float4x4 DFFastToTranslatedWorld(FDFInverseMatrix WorldToLocal, FDFVector3 PreViewTranslation)
{
return DFFastMultiplyTranslationDemote(DFNegate(PreViewTranslation), WorldToLocal);
}
// Transforms a XToWorld matrix to a XToTranslatedWorld matrix.
float4x4 DFFastToTranslatedWorld(FDFMatrix LocalToWorld, FDFVector3 PreViewTranslation)
{
return DFFastMultiplyTranslationDemote(LocalToWorld, PreViewTranslation);
}
FDFVector3 DFMultiplyLHSAndFastTwoSum(float3 Lhs, float Factor, float3 Rhs)
{
const float3 S = INVARIANT_FMA(Lhs, Factor, Rhs);
const float3 E = INVARIANT_SUB(Rhs, INVARIANT_FMA(-Lhs, Factor, S));
return MakeDFVector3(S, E);
}
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM6 || PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
FDFMatrix WaveReadLaneAt(FDFMatrix In, uint SrcIndex)
{
FDFMatrix Result;
Result.M[0] = WaveReadLaneAt(In.M[0], SrcIndex);
Result.M[1] = WaveReadLaneAt(In.M[1], SrcIndex);
Result.M[2] = WaveReadLaneAt(In.M[2], SrcIndex);
Result.M[3] = WaveReadLaneAt(In.M[3], SrcIndex);
Result.PostTranslation = WaveReadLaneAt(In.PostTranslation, SrcIndex);
return Result;
}
FDFInverseMatrix WaveReadLaneAt(FDFInverseMatrix In, uint SrcIndex)
{
FDFInverseMatrix Result;
Result.M[0] = WaveReadLaneAt(In.M[0], SrcIndex);
Result.M[1] = WaveReadLaneAt(In.M[1], SrcIndex);
Result.M[2] = WaveReadLaneAt(In.M[2], SrcIndex);
Result.M[3] = WaveReadLaneAt(In.M[3], SrcIndex);
Result.PreTranslation = WaveReadLaneAt(In.PreTranslation, SrcIndex);
return Result;
}
#endif
uint4 DFHashPCG(FDFVector2 In)
{
int4 V1 = asint(float4(In.High.x, In.High.y, In.Low.x, In.Low.y));
return Rand4DPCG32(V1);
}
uint DFHash_LCG(FDFVector2 In)
{
const uint M = 1664525u;
const uint A = 1013904223u;
uint V1 = asuint(In.High.x) + asuint(In.High.y)
+ asuint(In.Low.x) + asuint(In.Low.y);
uint V2 = V1 * M + A;
// second round improves visual quality significantly, even though the compiler premultiplies these values, so no extra ops
uint V3 = V2 * M + A;
return V3;
}
// High-quality hash, but more expensive.
// If you need a statistically robust hash, choose this.
// For noise that looks visually plausible, choose LCG.
uint3 DFHashPCG(FDFVector3 In)
{
int3 V1 = asint(In.High);
int3 V2 = asint(In.Low);
uint3 H1 = Rand3DPCG32(V1);
uint3 H2 = Rand3DPCG32(H1 + V2);
return H2;
}
// Low-quality hash, but instruction count is roughly 1/8th of PCG approach.
uint DFHash_LCG(FDFVector3 In)
{
const uint M = 1664525u;
const uint A = 1013904223u;
uint V1 = asuint(In.High.x) + asuint(In.High.y) + asuint(In.High.z)
+ asuint(In.Low.x) + asuint(In.Low.y) + asuint(In.Low.z);
uint V2 = V1 * M + A;
// second round improves visual quality significantly, even though the compiler premultiplies these values, so no extra ops
uint V3 = V2 * M + A;
return V3;
}
uint4 DFHash_PCG(FDFVector4 In)
{
int4 V1 = asint(In.High);
int4 V2 = asint(In.Low);
uint4 H1 = Rand4DPCG32(V1);
uint4 H2 = Rand4DPCG32(H1 + V2);
return H2;
}
uint DFHash_LCG(FDFVector4 In)
{
const uint M = 1664525u;
const uint A = 1013904223u;
uint V1 = asuint(In.High.x) + asuint(In.High.y) + asuint(In.High.z) + asuint(In.High.w)
+ asuint(In.Low.x) + asuint(In.Low.y) + asuint(In.Low.z) + asuint(In.Low.w);
uint V2 = V1 * M + A;
// second round improves visual quality significantly, even though the compiler premultiplies these values, so no extra ops
uint V3 = V2 * M + A;
return V3;
}