1004 lines
27 KiB
HLSL
1004 lines
27 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
// This constructs a FDFScalar, but does not 'rebalance' the input floats.
|
|
// Use DFTwoSum instead if the input is not already in the correct high/low format.
|
|
FDFType DFConstructor(FFloatType High, FFloatType Low)
|
|
{
|
|
FDFType Result;
|
|
Result.High = High;
|
|
Result.Low = Low;
|
|
return Result;
|
|
}
|
|
|
|
FDFType DFPromote(FDFType Value) { return Value; }
|
|
|
|
// Convert to double float
|
|
FDFType DFPromote(FFloatType Value) { return DFConstructor(Value, (FFloatType)0); }
|
|
|
|
// Truncate to single precision float
|
|
// (This could just return Value.High if you're sure the input is in the correct DF format)
|
|
FFloatType DFDemote(FDFType Value) { return Value.High + Value.Low; }
|
|
|
|
FFloatType DFDemote(FFloatType Value) { return Value; }
|
|
|
|
FDFType DFNegate(FDFType Value)
|
|
{
|
|
return DFConstructor(-Value.High, -Value.Low);
|
|
}
|
|
|
|
FFloatType DFSign(FDFType Value)
|
|
{
|
|
return (FFloatType)sign(Value.High);
|
|
}
|
|
|
|
FDFType DFAbs(FDFType Value)
|
|
{
|
|
const FFloatType Sign = DFSign(Value);
|
|
return DFConstructor(Sign * Value.High, Sign * Value.Low);
|
|
}
|
|
|
|
/**************/
|
|
/** ADDITION **/
|
|
/**************/
|
|
|
|
// Sum up two single precision vectors to one double precision vector.
|
|
// [1] Algorithm 2
|
|
FDFType DFTwoSum(FFloatType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs + Rhs);
|
|
#else
|
|
const FFloatType S = INVARIANT_ADD(Lhs, Rhs);
|
|
const FFloatType V = INVARIANT_SUB(S, Lhs);
|
|
const FFloatType Q = INVARIANT_SUB(S, V);
|
|
const FFloatType R = INVARIANT_SUB(Lhs, Q);
|
|
const FFloatType T = INVARIANT_SUB(Rhs, V);
|
|
const FFloatType Y = INVARIANT_ADD(R, T);
|
|
return DFConstructor(S, Y);
|
|
#endif
|
|
}
|
|
|
|
// An optimized version of DFTwoSum, under the assumption that
|
|
// a = 0 or b = 0, or e1 >= e2, where e1 and e2 are the exponents of a and b respectively.
|
|
// [1] Algorithm 1
|
|
// The worst case precision if this assumption is violated, is just regular fp32 precision
|
|
FDFType DFFastTwoSum(FFloatType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs + Rhs);
|
|
#else
|
|
const FFloatType S = INVARIANT_ADD(Lhs, Rhs);
|
|
const FFloatType T = INVARIANT_SUB(S, Lhs);
|
|
const FFloatType E = INVARIANT_SUB(Rhs, T);
|
|
return DFConstructor(S, E);
|
|
#endif
|
|
}
|
|
|
|
// Alias for DFTwoSum
|
|
FDFType DFAdd(FFloatType Lhs, FFloatType Rhs) { return DFTwoSum(Lhs, Rhs); }
|
|
FDFType DFFastAdd(FFloatType Lhs, FFloatType Rhs) { return DFTwoSum(Lhs, Rhs); }
|
|
|
|
// [1] Algorithm 6
|
|
FDFType DFAdd(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High + Rhs.High);
|
|
#else
|
|
// lossless add
|
|
FDFType S = DFTwoSum(Lhs.High, Rhs.High);
|
|
const FDFType T = DFTwoSum(Lhs.Low, Rhs.Low);
|
|
// merge and rebalance
|
|
S.Low += T.High;
|
|
S = DFFastTwoSum(S.High, S.Low);
|
|
S.Low += T.Low;
|
|
S = DFFastTwoSum(S.High, S.Low);
|
|
return S;
|
|
#endif
|
|
}
|
|
|
|
// [1] Algorithm 4
|
|
FDFType DFAdd(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High + Rhs);
|
|
#else
|
|
const FDFType S = DFTwoSum(Lhs.High, Rhs);
|
|
const FFloatType T = Lhs.Low + S.Low;
|
|
return DFFastTwoSum(S.High, T);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFAdd(FFloatType Lhs, FDFType Rhs) { return DFAdd(Rhs, Lhs); }
|
|
|
|
FFloatType DFAddDemote(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High + Rhs.High;
|
|
#else
|
|
// lossless add
|
|
FDFType S = DFTwoSum(Lhs.High, Rhs.High);
|
|
const FDFType T = DFTwoSum(Lhs.Low, Rhs.Low);
|
|
// merge and rebalance
|
|
S.Low += T.High;
|
|
S = DFFastTwoSum(S.High, S.Low);
|
|
S.Low += T.Low;
|
|
return S.High + S.Low;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFAddDemote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High + Rhs;
|
|
#else
|
|
const FDFType S = DFTwoSum(Lhs.High, Rhs);
|
|
const FFloatType T = Lhs.Low + S.Low;
|
|
return S.High + T;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFAddDemote(FFloatType Lhs, FDFType Rhs) { return DFAddDemote(Rhs, Lhs); }
|
|
FFloatType DFAddDemote(FFloatType Lhs, FFloatType Rhs) { return Lhs + Rhs; }
|
|
|
|
FDFType DFFastAdd(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High + Rhs.High);
|
|
#else
|
|
FDFType S = DFTwoSum(Lhs.High, Rhs.High);
|
|
S.Low += Lhs.Low + Rhs.Low;
|
|
S = DFFastTwoSum(S.High, S.Low);
|
|
return S;
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFastAdd(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High + Rhs);
|
|
#else
|
|
FDFType S = DFTwoSum(Lhs.High, Rhs);
|
|
S.Low += Lhs.Low;
|
|
S = DFFastTwoSum(S.High, S.Low);
|
|
return S;
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFastAdd(FFloatType Lhs, FDFType Rhs) { return DFFastAdd(Rhs, Lhs); }
|
|
|
|
FFloatType DFFastAddDemote(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High + Rhs.High;
|
|
#else
|
|
FDFType S = DFTwoSum(Lhs.High, Rhs.High);
|
|
S.Low += Lhs.Low + Rhs.Low;
|
|
return S.High + S.Low;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFastAddDemote(FDFType Lhs, FFloatType Rhs) { return DFAddDemote(Lhs, Rhs); }
|
|
FFloatType DFFastAddDemote(FFloatType Lhs, FDFType Rhs) { return DFFastAddDemote(Rhs, Lhs); }
|
|
FFloatType DFFastAddDemote(FFloatType Lhs, FFloatType Rhs) { return Rhs + Lhs; }
|
|
|
|
/*****************/
|
|
/** SUBTRACTION **/
|
|
/*****************/
|
|
|
|
FDFType DFSubtract(FFloatType Lhs, FFloatType Rhs) { return DFAdd(Lhs, -Rhs); }
|
|
FDFType DFSubtract(FDFType Lhs, FDFType Rhs) { return DFAdd(Lhs, DFNegate(Rhs)); }
|
|
FDFType DFSubtract(FDFType Lhs, FFloatType Rhs) { return DFAdd(Lhs, -Rhs); }
|
|
FDFType DFSubtract(FFloatType Lhs, FDFType Rhs) { return DFAdd(Lhs, DFNegate(Rhs)); }
|
|
|
|
FFloatType DFSubtractDemote(FFloatType Lhs, FFloatType Rhs) { return Lhs - Rhs; }
|
|
FFloatType DFSubtractDemote(FDFType Lhs, FDFType Rhs) { return DFAddDemote(Lhs, DFNegate(Rhs)); }
|
|
FFloatType DFSubtractDemote(FDFType Lhs, FFloatType Rhs) { return DFAddDemote(Lhs, -Rhs); }
|
|
FFloatType DFSubtractDemote(FFloatType Lhs, FDFType Rhs) { return DFAddDemote(Lhs, DFNegate(Rhs)); }
|
|
|
|
FDFType DFFastSubtract(FFloatType Lhs, FFloatType Rhs) { return DFFastAdd(Lhs, -Rhs); }
|
|
FDFType DFFastSubtract(FDFType Lhs, FDFType Rhs) { return DFFastAdd(Lhs, DFNegate(Rhs)); }
|
|
FDFType DFFastSubtract(FDFType Lhs, FFloatType Rhs) { return DFFastAdd(Lhs, -Rhs); }
|
|
FDFType DFFastSubtract(FFloatType Lhs, FDFType Rhs) { return DFFastAdd(Lhs, DFNegate(Rhs)); }
|
|
|
|
FFloatType DFFastSubtractDemote(FFloatType Lhs, FFloatType Rhs) { return Lhs - Rhs; }
|
|
FFloatType DFFastSubtractDemote(FDFType Lhs, FDFType Rhs) { return DFFastAddDemote(Lhs, DFNegate(Rhs)); }
|
|
FFloatType DFFastSubtractDemote(FDFType Lhs, FFloatType Rhs) { return DFFastAddDemote(Lhs, -Rhs); }
|
|
FFloatType DFFastSubtractDemote(FFloatType Lhs, FDFType Rhs) { return DFFastAddDemote(Lhs, DFNegate(Rhs)); }
|
|
|
|
// Subtract using only 2 ops instead of ~20, and convert the result to fp32 (1 op)
|
|
// CAREFUL, this has strict input preconditions to get output that has any precision at all.
|
|
// Assuming you require a precision of 2^-4, the precondition for Lhs and Rhs is
|
|
// (abs(Lhs) < 2^20 && abs(Rhs) < 2^20) ||
|
|
// ( abs(Lhs) < 2^43 && abs(Rhs) < 2^43 && Sign(Lhs) == Sign(Rhs) && abs(trunc(log2(abs(Lhs))) - trunc(log2(abs(Rhs)))) < 1 )
|
|
// As the preconditions imply, this does not work for addition due to FP rounding.
|
|
FFloatType DFFastLocalSubtractDemote(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High - Rhs.High;
|
|
#elif UE_DF_NO_FAST_MATH
|
|
return DFSubtractDemote(Lhs, Rhs);
|
|
#else
|
|
const FFloatType High = INVARIANT_SUB(Lhs.High, Rhs.High);
|
|
const FFloatType Low = INVARIANT_SUB(Lhs.Low, Rhs.Low);
|
|
const FFloatType Sum = INVARIANT_ADD(High, Low);
|
|
return Sum;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFastLocalSubtractDemote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High - Rhs;
|
|
#elif UE_DF_NO_FAST_MATH
|
|
return DFSubtractDemote(Lhs, Rhs);
|
|
#else
|
|
const FFloatType High = INVARIANT_SUB(Lhs.High, Rhs);
|
|
const FFloatType Sum = INVARIANT_ADD(High, Lhs.Low);
|
|
return Sum;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFastLocalSubtractDemote(FFloatType Lhs, FDFType Rhs)
|
|
{
|
|
return DFFastLocalSubtractDemote(DFPromote(Lhs), Rhs);
|
|
}
|
|
|
|
/********************/
|
|
/** MULTIPLICATION **/
|
|
/********************/
|
|
|
|
// [4] Algorithm 'mul12' (Page 241)
|
|
FDFType DFTwoProductNoFMA(FFloatType Lhs, FFloatType Rhs)
|
|
{
|
|
const FFloatType Constant = 4097; // = 2^(t - t/2) + 1, where t is the number of mantissa bits
|
|
const FFloatType Px = INVARIANT_MUL(Lhs, Constant);
|
|
const FFloatType Hx = INVARIANT_ADD(INVARIANT_SUB(Lhs, Px), Px);
|
|
const FFloatType Tx = INVARIANT_SUB(Lhs, Hx);
|
|
const FFloatType Py = INVARIANT_MUL(Rhs, Constant);
|
|
const FFloatType Hy = INVARIANT_ADD(INVARIANT_SUB(Rhs, Py), Py);
|
|
const FFloatType Ty = INVARIANT_SUB(Rhs, Hy);
|
|
const FFloatType P = INVARIANT_MUL(Hx, Hy);
|
|
const FFloatType Q = INVARIANT_ADD(INVARIANT_MUL(Hx, Ty), INVARIANT_MUL(Tx, Hy));
|
|
const FFloatType Z = INVARIANT_ADD(P, Q);
|
|
const FFloatType E = INVARIANT_ADD(INVARIANT_ADD(INVARIANT_SUB(P, Z), Q), INVARIANT_MUL(Tx, Ty));
|
|
return DFFastTwoSum(Z, E);
|
|
}
|
|
|
|
// Multiply up two single precision scalars to one double precision scalar. (AKA Fast2Mult)
|
|
// [1] Algorithm 3
|
|
FDFType DFTwoProduct(FFloatType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs * Rhs);
|
|
#elif !PLATFORM_SUPPORTS_FMA
|
|
return DFTwoProductNoFMA(Lhs, Rhs);
|
|
#else
|
|
const FFloatType P = Lhs * Rhs;
|
|
const FFloatType E = INVARIANT_FMA(Lhs, Rhs, -P);
|
|
return DFConstructor(P, E);
|
|
#endif
|
|
}
|
|
|
|
// Alias for DFTwoProduct
|
|
FDFType DFMultiply(FFloatType Lhs, FFloatType Rhs) { return DFTwoProduct(Lhs, Rhs); }
|
|
|
|
// [1] Algorithm 11.
|
|
// If no FMA is available, mad() expansion makes this equivalent to [1] Algorithm 10
|
|
FDFType DFMultiply(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High * Rhs.High);
|
|
#else
|
|
const FDFType P = DFTwoProduct(Lhs.High, Rhs.High);
|
|
FFloatType T = INVARIANT_MUL(Lhs.High, Rhs.Low);
|
|
T = INVARIANT_FMA(Lhs.Low, Rhs.High, T);
|
|
T = INVARIANT_ADD(T, P.Low);
|
|
|
|
return DFFastTwoSum(P.High, T);
|
|
#endif
|
|
}
|
|
|
|
// [1] Algorithm 9
|
|
// If no FMA is available, mad() expansion makes this equivalent to [1] Algorithm 8
|
|
FDFType DFMultiply(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High * Rhs);
|
|
#else
|
|
FDFType P = DFTwoProduct(Lhs.High, Rhs);
|
|
FFloatType T = INVARIANT_FMA(Lhs.Low, Rhs, P.Low);
|
|
return DFFastTwoSum(P.High, T);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFMultiply(FFloatType Lhs, FDFType Rhs) { return DFMultiply(Rhs, Lhs); }
|
|
|
|
FFloatType DFMultiplyDemote(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High * Rhs.High;
|
|
#else
|
|
const FDFType P = DFTwoProduct(Lhs.High, Rhs.High);
|
|
FFloatType T = INVARIANT_MUL(Lhs.High, Rhs.Low);
|
|
T = INVARIANT_FMA(Lhs.Low, Rhs.High, T);
|
|
T = INVARIANT_ADD(T, P.Low);
|
|
const FFloatType S = INVARIANT_ADD(P.High, T);
|
|
return S;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFMultiplyDemote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High * Rhs;
|
|
#else
|
|
FDFType P = DFTwoProduct(Lhs.High, Rhs);
|
|
P.Low = INVARIANT_FMA(Lhs.Low, Rhs, P.Low);
|
|
const FFloatType S = INVARIANT_ADD(P.High, P.Low);
|
|
return S;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFMultiplyDemote(FFloatType Lhs, FDFType Rhs) { return DFMultiplyDemote(Rhs, Lhs); }
|
|
FFloatType DFMultiplyDemote(FFloatType Lhs, FFloatType Rhs) { return Lhs * Rhs; }
|
|
|
|
// Fast multiplication that assumes the factor is a power of two.
|
|
// If this assumption is valid, no precision is lost.
|
|
FDFType DFMultiplyByPow2(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High * Rhs);
|
|
#else
|
|
return DFConstructor(Lhs.High * Rhs, Lhs.Low * Rhs);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFSqr(FDFType V) { return DFMultiply(V, V); }
|
|
FDFType DFSqr(FFloatType V) { return DFTwoProduct(V, V); }
|
|
|
|
/**************/
|
|
/** DIVISION **/
|
|
/**************/
|
|
|
|
FDFType DFFastDivide(FDFType Lhs, FDFType Rhs);
|
|
|
|
// [1] Algorithm 18
|
|
FDFType DFDivide(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High / Rhs.High);
|
|
#elif !PLATFORM_SUPPORTS_FMA
|
|
return DFFastDivide(Lhs, Rhs);
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(1.0, Rhs.High);
|
|
const FFloatType Rh = INVARIANT_FMA(-Rhs.High, Th, (FFloatType)1);
|
|
const FFloatType Rl = INVARIANT_MUL(-Rhs.Low, Th);
|
|
const FDFType E = DFFastTwoSum(Rh, Rl);
|
|
const FDFType D = DFMultiply(E, Th);
|
|
const FDFType M = DFAdd(D, Th);
|
|
return DFMultiply(Lhs, M);
|
|
#endif
|
|
}
|
|
|
|
// [1] Algorithm 15
|
|
FDFType DFDivide(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High / Rhs);
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(Lhs.High, Rhs);
|
|
const FDFType P = DFTwoProduct(Th, Rhs);
|
|
const FFloatType Dh = INVARIANT_SUB(Lhs.High, P.High);
|
|
const FFloatType Dt = INVARIANT_SUB(Dh, P.Low);
|
|
const FFloatType D = INVARIANT_ADD(Dt, Lhs.Low);
|
|
const FFloatType Tl = INVARIANT_DIV(D, Rhs);
|
|
return DFFastTwoSum(Th, Tl);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFDivide(FFloatType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs / Rhs);
|
|
#else
|
|
return DFDivide(DFPromote(Lhs), Rhs);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFDivide(FFloatType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs / Rhs.High);
|
|
#else
|
|
return DFDivide(DFPromote(Lhs), Rhs);
|
|
#endif
|
|
}
|
|
|
|
// Less precise than 18, but almost half the ops
|
|
// [1] Algorithm 17
|
|
FDFType DFFastDivide(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High / Rhs.High);
|
|
#elif UE_DF_NO_FAST_MATH
|
|
return DFDivide(Lhs, Rhs);
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(Lhs.High, Rhs.High);
|
|
const FDFType R = DFMultiply(Rhs, Th);
|
|
const FFloatType Ph = INVARIANT_SUB(Lhs.High, R.High);
|
|
const FFloatType Dl = INVARIANT_SUB(Lhs.Low, R.Low);
|
|
const FFloatType D = INVARIANT_ADD(Ph, Dl);
|
|
const FFloatType Tl = INVARIANT_DIV(D, Rhs.High);
|
|
return DFFastTwoSum(Th, Tl);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFastDivide(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High / Rhs);
|
|
#elif UE_DF_NO_FAST_MATH && PLATFORM_SUPPORTS_FMA
|
|
return DFDivide(Lhs, Rhs);
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(Lhs.High, Rhs);
|
|
const FDFType R = DFTwoProduct(Rhs, Th);
|
|
const FFloatType Ph = INVARIANT_SUB(Lhs.High, R.High);
|
|
const FFloatType Dl = INVARIANT_SUB(Lhs.Low, R.Low);
|
|
const FFloatType D = INVARIANT_ADD(Ph, Dl);
|
|
const FFloatType Tl = INVARIANT_DIV(D, Rhs);
|
|
return DFFastTwoSum(Th, Tl);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFastDivide(FFloatType Lhs, FDFType Rhs) { return DFFastDivide(DFPromote(Lhs), Rhs); }
|
|
FDFType DFFastDivide(FFloatType Lhs, FFloatType Rhs) { return DFFastDivide(DFPromote(Lhs), Rhs); }
|
|
|
|
FFloatType DFFastDivideDemote(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High / Rhs.High;
|
|
#elif UE_DF_NO_FAST_MATH
|
|
return DFDemote(DFDivide(Lhs, Rhs));
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(Lhs.High, Rhs.High);
|
|
const FDFType R = DFMultiply(Rhs, Th);
|
|
const FFloatType Ph = INVARIANT_SUB(Lhs.High, R.High);
|
|
const FFloatType Dl = INVARIANT_SUB(Lhs.Low, R.Low);
|
|
const FFloatType D = INVARIANT_ADD(Ph, Dl);
|
|
const FFloatType Tl = INVARIANT_DIV(D, Rhs.High);
|
|
return Th + Tl;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFastDivideDemote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High / Rhs;
|
|
#elif UE_DF_NO_FAST_MATH
|
|
return DFDemote(DFDivide(Lhs, Rhs));
|
|
#else
|
|
const FFloatType Th = INVARIANT_DIV(Lhs.High, Rhs);
|
|
const FDFType R = DFTwoProduct(Rhs, Th);
|
|
const FFloatType Ph = INVARIANT_SUB(Lhs.High, R.High);
|
|
const FFloatType Dl = INVARIANT_SUB(Lhs.Low, R.Low);
|
|
const FFloatType D = INVARIANT_ADD(Ph, Dl);
|
|
const FFloatType Tl = INVARIANT_DIV(D, Rhs);
|
|
return Th + Tl;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFastDivideDemote(FFloatType Lhs, FDFType Rhs) { return DFFastDivideDemote(DFPromote(Lhs), Rhs); }
|
|
FFloatType DFFastDivideDemote(FFloatType Lhs, FFloatType Rhs) { return Lhs / Rhs; }
|
|
|
|
// Fast division that assumes the divisor is a power of two.
|
|
// If this assumption is valid, no precision is lost.
|
|
FDFType DFDivideByPow2(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(Lhs.High / Rhs);
|
|
#else
|
|
return DFConstructor(Lhs.High / Rhs, Lhs.Low / Rhs);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFRcp(FDFType V) { return DFFastDivide((FFloatType)1.0, V); }
|
|
FFloatType DFRcpDemote(FDFType V) { return rcp(V.High); }
|
|
|
|
/****************/
|
|
/** COMPARISON **/
|
|
/****************/
|
|
|
|
FBoolType DFEqualsApprox(FDFType Lhs, FDFType Rhs, float Threshold)
|
|
{
|
|
return DFSubtractDemote(Lhs, Rhs) < Threshold;
|
|
}
|
|
FBoolType DFEqualsApprox(FDFType Lhs, FFloatType Rhs, float Threshold)
|
|
{
|
|
return DFSubtractDemote(Lhs, Rhs) < Threshold;
|
|
}
|
|
FBoolType DFEqualsApprox(FFloatType Lhs, FDFType Rhs, float Threshold)
|
|
{
|
|
return DFSubtractDemote(Lhs, Rhs) < Threshold;
|
|
}
|
|
|
|
FBoolType DFEquals(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High == Rhs.High;
|
|
#else
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
return and(Lhs.High == Rhs.High, Lhs.Low == Rhs.Low);
|
|
#else
|
|
return Lhs.High == Rhs.High && Lhs.Low == Rhs.Low;
|
|
#endif
|
|
#endif
|
|
}
|
|
FBoolType DFEquals(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
return DFEquals(Lhs, DFPromote(Rhs));
|
|
}
|
|
FBoolType DFEquals(FFloatType Lhs, FDFType Rhs)
|
|
{
|
|
return DFEquals(DFPromote(Lhs), Rhs);
|
|
}
|
|
|
|
FDFType DFSelect(FBoolType S, FDFType Lhs, FDFType Rhs) { return DFConstructor(select(S, Lhs.High, Rhs.High), select(S, Lhs.Low, Rhs.Low)); }
|
|
FDFType DFSelect(FBoolType S, FDFType Lhs, FFloatType Rhs) { return DFConstructor(select(S, Lhs.High, Rhs), select(S, Lhs.Low, (FFloatType)0)); }
|
|
FDFType DFSelect(FBoolType S, FFloatType Lhs, FDFType Rhs) { return DFConstructor(select(S, Lhs, Rhs.High), select(S, (FFloatType)0, Rhs.Low)); }
|
|
|
|
FBoolType DFGreater(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High > Rhs.High;
|
|
#else
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
return or(Lhs.High > Rhs.High, and(Lhs.High == Rhs.High, Lhs.Low > Rhs.Low));
|
|
#else
|
|
return Lhs.High > Rhs.High || (Lhs.High == Rhs.High && Lhs.Low > Rhs.Low);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
FBoolType DFLess(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High < Rhs.High;
|
|
#else
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
return or(Lhs.High < Rhs.High, and(Lhs.High == Rhs.High, Lhs.Low < Rhs.Low));
|
|
#else
|
|
return Lhs.High < Rhs.High || (Lhs.High == Rhs.High && Lhs.Low < Rhs.Low);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
FBoolType DFGreater(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High > Rhs;
|
|
#else
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
return or(Lhs.High > Rhs, and(Lhs.High == Rhs, Lhs.Low > 0.0));
|
|
#else
|
|
return Lhs.High > Rhs || (Lhs.High == Rhs && Lhs.Low > 0.0);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
FBoolType DFLess(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return Lhs.High < Rhs;
|
|
#else
|
|
#if COMPILER_SUPPORTS_HLSL2021
|
|
return or(Lhs.High < Rhs, and(Lhs.High == Rhs, Lhs.Low < 0.0));
|
|
#else
|
|
return Lhs.High < Rhs || (Lhs.High == Rhs && Lhs.Low < 0.0);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
FBoolType DFGreater(FFloatType Lhs, FDFType Rhs) { return DFLess(Rhs, Lhs); }
|
|
FBoolType DFLess(FFloatType Lhs, FDFType Rhs) { return DFGreater(Rhs, Lhs); }
|
|
|
|
FBoolType DFGreaterEqual(FDFType Lhs, FDFType Rhs) { return !DFLess(Lhs, Rhs); }
|
|
FBoolType DFGreaterEqual(FDFType Lhs, FFloatType Rhs) { return !DFLess(Lhs, Rhs); }
|
|
FBoolType DFGreaterEqual(FFloatType Lhs, FDFType Rhs) { return !DFLess(Lhs, Rhs); }
|
|
|
|
FBoolType DFLessEqual(FDFType Lhs, FFloatType Rhs) { return !DFGreater(Lhs, Rhs); }
|
|
FBoolType DFLessEqual(FDFType Lhs, FDFType Rhs) { return !DFGreater(Lhs, Rhs); }
|
|
FBoolType DFLessEqual(FFloatType Lhs, FDFType Rhs) { return !DFGreater(Lhs, Rhs); }
|
|
|
|
FDFType DFMin(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), Lhs, Rhs);
|
|
}
|
|
|
|
FDFType DFMin(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), Lhs, DFPromote(Rhs));
|
|
}
|
|
|
|
FDFType DFMin(FFloatType Lhs, FDFType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), DFPromote(Lhs), Rhs);
|
|
}
|
|
|
|
FDFType DFMax(FDFType Lhs, FDFType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), Rhs, Lhs);
|
|
}
|
|
|
|
FDFType DFMax(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), DFPromote(Rhs), Lhs);
|
|
}
|
|
|
|
FDFType DFMax(FFloatType Lhs, FDFType Rhs)
|
|
{
|
|
return DFSelect(DFLess(Lhs, Rhs), Rhs, DFPromote(Lhs));
|
|
}
|
|
|
|
/**********/
|
|
/** MISC **/
|
|
/**********/
|
|
|
|
struct FDFTypeDeriv
|
|
{
|
|
FDFType Value;
|
|
FFloatType Ddx;
|
|
FFloatType Ddy;
|
|
};
|
|
|
|
// [2] Algorithm 8
|
|
FDFType DFSqrt(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(sqrt(V.High));
|
|
#else
|
|
const FFloatType Sh = sqrt(V.High);
|
|
const FFloatType P1 = INVARIANT_FMA(-Sh, Sh, V.High); // Note: precision loss here if FMA is not supported
|
|
const FFloatType P2 = INVARIANT_ADD(V.Low, P1);
|
|
|
|
const FFloatType Sl = INVARIANT_DIV(P2, (2.0 * Sh));
|
|
return DFFastTwoSum(Sh, Sl);
|
|
|
|
// equivalent to (remark 3.5)
|
|
//precise FFloatType Tl = P2 / Sh;
|
|
//precise FFloatType Zh = FMA((FFloatType)0.5, Tl, Sh);
|
|
//precise FFloatType D = Zh - Sh;
|
|
//precise FFloatType Zl = FMA((FFloatType)0.5, Tl, -D);
|
|
//return DFConstructor(Zh, Zl);
|
|
#endif
|
|
}
|
|
|
|
// [2] Algorithm 9
|
|
FFloatType DFSqrtDemote(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return sqrt(V.High);
|
|
#else
|
|
const FFloatType Sh = sqrt(V.High);
|
|
const FFloatType P1 = INVARIANT_FMA(-Sh, Sh, V.High); // Note: precision loss here if FMA is not supported
|
|
const FFloatType P2 = INVARIANT_ADD(V.Low, P1);
|
|
|
|
const FFloatType Sl = INVARIANT_DIV(P2, (2.0 * Sh));
|
|
return Sh + Sl;
|
|
#endif
|
|
}
|
|
|
|
FDFType DFRsqrt(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(rsqrt(V.High));
|
|
#else
|
|
return DFDivide((FFloatType)1.0, DFSqrt(V));
|
|
#endif
|
|
}
|
|
FFloatType DFRsqrtDemote(FDFType V) { return rsqrt(V.High); }
|
|
|
|
FFloatType DFSin(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return sin(V.High);
|
|
#else
|
|
FFloatType Sh, Ch;
|
|
sincos(V.High, Sh, Ch);
|
|
FFloatType Sl, Cl;
|
|
sincos(V.Low, Sl, Cl);
|
|
return Sh*Cl + Ch*Sl; // trigonometric identity for sin(h+l)
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFCos(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return cos(V.High);
|
|
#else
|
|
FFloatType Sh, Ch;
|
|
sincos(V.High, Sh, Ch);
|
|
FFloatType Sl, Cl;
|
|
sincos(V.Low, Sl, Cl);
|
|
return Ch*Cl - Sh*Sl; // trigonometric identity for cos(h+l)
|
|
#endif
|
|
}
|
|
|
|
void DFSinCos(FDFType V, out FFloatType Sin, out FFloatType Cos)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
sincos(V.High, Sin, Cos);
|
|
#else
|
|
FFloatType Sh, Ch;
|
|
sincos(V.High, Sh, Ch);
|
|
FFloatType Sl, Cl;
|
|
sincos(V.Low, Sl, Cl);
|
|
Sin = FMA(Sh, Cl, Ch*Sl); // trigonometric identity for sin(h+l)
|
|
Cos = FMA(Ch, Cl, -Sh*Sl); // trigonometric identity for cos(h+l)
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFTan(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return tan(V.High);
|
|
#else
|
|
FFloatType Th = tan(V.High);
|
|
FFloatType Tl = tan(V.Low);
|
|
return (Th + Tl) / (1.0 - Th * Tl); // trigonometric identity for tan(h+l)
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFAsin(FDFType V)
|
|
{
|
|
return asin(DFDemote(V));
|
|
}
|
|
|
|
FFloatType DFAcos(FDFType V)
|
|
{
|
|
return acos(DFDemote(V));
|
|
}
|
|
|
|
FFloatType DFAtan(FDFType V)
|
|
{
|
|
return atan(DFDemote(V));
|
|
}
|
|
|
|
FFloatType DFModf(FDFType V, out FDFType Integer)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
Integer.Low = 0;
|
|
return modf(V.High, Integer.High);
|
|
#else
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(V.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(V.Low, IntLow);
|
|
FFloatType IntSum;
|
|
FFloatType FracSum = modf(FracHigh + FracLow, IntSum);
|
|
Integer = DFTwoSum(IntHigh, IntLow + IntSum);
|
|
return FracSum;
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFModfDemote(FDFType V, out FFloatType Integer)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return modf(V.High, Integer);
|
|
#else
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(V.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(V.Low, IntLow);
|
|
FFloatType IntSum;
|
|
FFloatType FracSum = modf(FracHigh + FracLow, IntSum);
|
|
Integer = IntHigh + IntLow + IntSum;
|
|
return FracSum;
|
|
#endif
|
|
}
|
|
|
|
FDFType DFCeil(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(ceil(V.High));
|
|
#else
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(V.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(V.Low, IntLow);
|
|
IntLow += ceil(FracHigh + FracLow);
|
|
return DFTwoSum(IntHigh, IntLow);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFloor(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(floor(V.High));
|
|
#else
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(V.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(V.Low, IntLow);
|
|
IntLow += floor(FracHigh + FracLow);
|
|
return DFTwoSum(IntHigh, IntLow);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFRound(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(round(V.High));
|
|
#else
|
|
FFloatType Sign = DFSign(V);
|
|
FDFType VAbs = DFConstructor(Sign*V.High, Sign*V.Low);
|
|
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(VAbs.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(VAbs.Low, IntLow);
|
|
|
|
IntLow += floor(FracHigh + FracLow + 0.5);
|
|
return DFTwoSum(Sign * IntHigh, Sign * IntLow);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFTrunc(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(trunc(V.High));
|
|
#else
|
|
FFloatType Sign = DFSign(V);
|
|
FDFType VAbs = DFConstructor(Sign*V.High, Sign*V.Low);
|
|
|
|
FFloatType IntHigh;
|
|
FFloatType FracHigh = modf(VAbs.High, IntHigh);
|
|
FFloatType IntLow;
|
|
FFloatType FracLow = modf(VAbs.Low, IntLow);
|
|
|
|
IntLow += floor(FracHigh + FracLow);
|
|
return DFTwoSum(Sign * IntHigh, Sign * IntLow);
|
|
#endif
|
|
}
|
|
|
|
FDFType DFFrac(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(frac(V.High));
|
|
#else
|
|
return DFSubtract(V, DFFloor(V));
|
|
#endif
|
|
}
|
|
|
|
// Technically, this function has very low precision due to discontinuity+rounding
|
|
// (e.g. 0.99.. ~= 1.0 => frac(0.99..) = frac(1.0) = 0.0)
|
|
// but this is irrelevant in practical cases
|
|
// TODO: test negative values
|
|
FFloatType DFFracDemote(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return frac(V.High);
|
|
#else
|
|
return frac(frac(V.High) + frac(V.Low));
|
|
#endif
|
|
}
|
|
|
|
// Similar to HLSL fmod, this is equivalent to `lhs - rhs * trunc(lhs / rhs)`
|
|
// so fmod(-0.1, 1.0) returns -0.1, not 0.9 (as mod does in GLSL, which is the true mathematical modulo operator)
|
|
FDFType DFFmod(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(fmod(Lhs.High, Rhs));
|
|
#else
|
|
return DFSubtract(Lhs, DFMultiply(DFTrunc(DFDivide(Lhs, Rhs)), Rhs));
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFmodDemote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return fmod(Lhs.High, Rhs);
|
|
#else
|
|
//DF_TODO: optimize. Is equal to frac(Lhs/Rhs)*Rhs?
|
|
//not equal to fmod(fmod(high)+fmod(low)) due to how fmod handles negative values
|
|
return DFSubtractDemote(Lhs, DFMultiply(DFTrunc(DFDivide(Lhs, Rhs)), Rhs));
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFFmodByPow2Demote(FDFType Lhs, FFloatType Rhs)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return fmod(Lhs.High, Rhs);
|
|
#else
|
|
return DFFracDemote(DFDivideByPow2(Lhs, Rhs)) * Rhs;
|
|
#endif
|
|
}
|
|
|
|
FDFType DFLerp(FDFType Lhs, FDFType Rhs, FFloatType S)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(lerp(Lhs.High, Rhs.High, S));
|
|
#else
|
|
return DFTwoSum(lerp(Lhs.High, Rhs.High, S), lerp(Lhs.Low, Rhs.Low, S)); //DF_TODO: precision issues
|
|
#endif
|
|
}
|
|
|
|
FDFType DFSaturate(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(saturate(V.High));
|
|
#else
|
|
V = DFSelect(DFLess(V, 1.0), V, 1.0);
|
|
V = DFSelect(DFLess(V, 0.0), 0.0, V);
|
|
return V;
|
|
#endif
|
|
}
|
|
FFloatType DFSaturateDemote(FDFType V) { return saturate(DFDemote(V)); }
|
|
|
|
FDFType DFSmoothStep(FDFType Lhs, FDFType Rhs, FDFType S)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(smoothstep(Lhs.High, Rhs.High, S.High));
|
|
#else
|
|
FDFType T = DFSaturate(DFDivide(DFSubtract(S, Lhs), DFSubtract(Rhs, Lhs)));
|
|
return DFMultiply(DFSqr(T), DFSubtract(3.0f, DFMultiplyByPow2(T, 2.0f)));
|
|
#endif
|
|
}
|
|
FFloatType DFSmoothStepDemote(FDFType Lhs, FDFType Rhs, FDFType S)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return smoothstep(Lhs.High, Rhs.High, S.High);
|
|
#else
|
|
FFloatType T = DFSaturateDemote(DFDivide(DFSubtract(S, Lhs), DFSubtract(Rhs, Lhs)));
|
|
return T*T*(3.0f - (2.0f*T));
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFStep(FDFType Lhs, FDFType Rhs) { return select(DFGreaterEqual(Rhs, Lhs), (FFloatType)1.0f, (FFloatType)0.0f); }
|
|
FFloatType DFStep(FDFType Lhs, FFloatType Rhs) { return select(DFGreaterEqual(Rhs, Lhs), (FFloatType)1.0f, (FFloatType)0.0f); }
|
|
FFloatType DFStep(FFloatType Lhs, FDFType Rhs) { return select(DFGreaterEqual(Rhs, Lhs), (FFloatType)1.0f, (FFloatType)0.0f); }
|
|
|
|
// No ddxy inside ray tracing shaders
|
|
#if RAYHITGROUPSHADER || RAYMISSHADER || RAYCALLABLESHADER || USE_FORCE_TEXTURE_MIP
|
|
FDFType DFDdx(FDFType V) { return (FDFType)0; }
|
|
FDFType DFDdy(FDFType V) { return (FDFType)0; }
|
|
FFloatType DFDdxDemote(FDFType V) { return (FFloatType)0; }
|
|
FFloatType DFDdyDemote(FDFType V) { return (FFloatType)0; }
|
|
#else
|
|
|
|
FDFType DFDdx(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(ddx(V.High));
|
|
#else
|
|
return DFTwoSum(ddx(V.High), ddx(V.Low));
|
|
#endif
|
|
}
|
|
|
|
FDFType DFDdy(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return DFPromote(ddy(V.High));
|
|
#else
|
|
return DFTwoSum(ddy(V.High), ddy(V.Low));
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFDdxDemote(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return ddx(V.High);
|
|
#else
|
|
return ddx(V.High) + ddx(V.Low);
|
|
#endif
|
|
}
|
|
|
|
FFloatType DFDdyDemote(FDFType V)
|
|
{
|
|
#if UE_DF_FORCE_FP32_OPS
|
|
return ddy(V.High);
|
|
#else
|
|
return ddy(V.High) + ddy(V.Low);
|
|
#endif
|
|
}
|
|
#endif
|