// Copyright Epic Games, Inc. All Rights Reserved. #include "trimdtests/Defs.h" #include "trimd/TRiMD.h" using T128Types = ::testing::Types< trimd::scalar::F128 #ifdef TRIMD_ENABLE_SSE , trimd::sse::F128 #endif // TRIMD_ENABLE_SSE #ifdef TRIMD_ENABLE_NEON , trimd::neon::F128 #endif // TRIMD_ENABLE_NEON >; #ifdef TRIMD_ENABLE_SSE bool equal(const trimd::sse::F128& lhs, const trimd::sse::F128& rhs) { return (std::memcmp(&lhs.data, &rhs.data, sizeof(lhs.data)) == 0); } #endif // TRIMD_ENABLE_SSE #ifdef TRIMD_ENABLE_NEON bool equal(const trimd::neon::F128& lhs, const trimd::neon::F128& rhs) { return (std::memcmp(&lhs.data, &rhs.data, sizeof(lhs.data)) == 0); } #endif // TRIMD_ENABLE_NEON template static TF128 frombits(uint32_t bits0, uint32_t bits1, uint32_t bits2, uint32_t bits3) { return TF128{ trimd::bitcast(bits0), trimd::bitcast(bits1), trimd::bitcast(bits2), trimd::bitcast(bits3) }; } static bool equal(const trimd::scalar::F128& lhs, const trimd::scalar::F128& rhs) { return (std::memcmp(lhs.data.data(), rhs.data.data(), sizeof(float) * trimd::scalar::F128::size()) == 0); } template class T128Test : public ::testing::Test { protected: using T128 = T; }; TYPED_TEST_SUITE(T128Test, T128Types, ); TYPED_TEST(T128Test, CheckSize) { ASSERT_EQ(TestFixture::T128::size(), 4ul); } TYPED_TEST(T128Test, Equality) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{1.5f, 2.0f, 3.0f, 4.0f}; F128 v4{1.0f, 2.5f, 3.0f, 4.0f}; F128 v5{1.0f, 2.0f, 3.5f, 4.0f}; F128 v6{1.0f, 2.0f, 3.0f, 4.5f}; F128 m12 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m13 = frombits(0x00000000u, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m14 = frombits(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m15 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu); F128 m16 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x00000000u); ASSERT_TRUE(equal(v1 == v2, m12)); ASSERT_TRUE(equal(v1 == v3, m13)); ASSERT_TRUE(equal(v1 == v4, m14)); ASSERT_TRUE(equal(v1 == v5, m15)); ASSERT_TRUE(equal(v1 == v6, m16)); } TYPED_TEST(T128Test, Inequality) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{1.5f, 2.0f, 3.0f, 4.0f}; F128 v4{1.0f, 2.5f, 3.0f, 4.0f}; F128 v5{1.0f, 2.0f, 3.5f, 4.0f}; F128 v6{1.0f, 2.0f, 3.0f, 4.5f}; F128 m12 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 m13 = frombits(0xFFFFFFFFu, 0x00000000u, 0x00000000u, 0x00000000u); F128 m14 = frombits(0x00000000u, 0xFFFFFFFFu, 0x00000000u, 0x00000000u); F128 m15 = frombits(0x00000000u, 0x00000000u, 0xFFFFFFFFu, 0x00000000u); F128 m16 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0xFFFFFFFFu); ASSERT_TRUE(equal(v1 != v2, m12)); ASSERT_TRUE(equal(v1 != v3, m13)); ASSERT_TRUE(equal(v1 != v4, m14)); ASSERT_TRUE(equal(v1 != v5, m15)); ASSERT_TRUE(equal(v1 != v6, m16)); } TYPED_TEST(T128Test, LessThan) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{2.0f, 3.0f, 4.0f, 5.0f}; F128 v4{0.5f, 1.5f, 2.5f, 3.5f}; F128 m12 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 m13 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m14 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); ASSERT_TRUE(equal(v1 < v2, m12)); ASSERT_TRUE(equal(v1 < v3, m13)); ASSERT_TRUE(equal(v1 < v4, m14)); } TYPED_TEST(T128Test, LessThanOrEqual) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{2.0f, 3.0f, 4.0f, 5.0f}; F128 v4{0.5f, 1.5f, 2.5f, 3.5f}; F128 m12 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m13 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m14 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); ASSERT_TRUE(equal(v1 <= v2, m12)); ASSERT_TRUE(equal(v1 <= v3, m13)); ASSERT_TRUE(equal(v1 <= v4, m14)); } TYPED_TEST(T128Test, GreaterThan) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{2.0f, 3.0f, 4.0f, 5.0f}; F128 v4{0.5f, 1.5f, 2.5f, 3.5f}; F128 m12 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 m13 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 m14 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); ASSERT_TRUE(equal(v1 > v2, m12)); ASSERT_TRUE(equal(v1 > v3, m13)); ASSERT_TRUE(equal(v1 > v4, m14)); } TYPED_TEST(T128Test, GreaterThanOrEqual) { using F128 = typename TestFixture::T128; F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; F128 v3{2.0f, 3.0f, 4.0f, 5.0f}; F128 v4{0.5f, 1.5f, 2.5f, 3.5f}; F128 m12 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); F128 m13 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 m14 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); ASSERT_TRUE(equal(v1 >= v2, m12)); ASSERT_TRUE(equal(v1 >= v3, m13)); ASSERT_TRUE(equal(v1 >= v4, m14)); } TYPED_TEST(T128Test, BitwiseAND) { using F128 = typename TestFixture::T128; F128 v{1.0f, 2.0f, 3.0f, 4.0f}; F128 m1 = frombits(0xFFFFFFFFu, 0x00000000u, 0x00000000u, 0x00000000u); F128 m2 = frombits(0x00000000u, 0xFFFFFFFFu, 0x00000000u, 0x00000000u); F128 m3 = frombits(0x00000000u, 0x00000000u, 0xFFFFFFFFu, 0x00000000u); F128 m4 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0xFFFFFFFFu); F128 vm1{1.0f, 0.0f, 0.0f, 0.0f}; F128 vm2{0.0f, 2.0f, 0.0f, 0.0f}; F128 vm3{0.0f, 0.0f, 3.0f, 0.0f}; F128 vm4{0.0f, 0.0f, 0.0f, 4.0f}; ASSERT_TRUE(equal(v & m1, vm1)); ASSERT_TRUE(equal(v & m2, vm2)); ASSERT_TRUE(equal(v & m3, vm3)); ASSERT_TRUE(equal(v & m4, vm4)); } TYPED_TEST(T128Test, BitwiseOR) { using F128 = typename TestFixture::T128; F128 v1{0.0f, 2.0f, 0.0f, 4.0f}; F128 v2{1.0f, 0.0f, 3.0f, 0.0f}; F128 v12{1.0f, 2.0f, 3.0f, 4.0f}; ASSERT_TRUE(equal(v1 | v2, v12)); } TYPED_TEST(T128Test, BitwiseXOR) { using F128 = typename TestFixture::T128; F128 v1{0.0f, 2.0f, 0.0f, 4.0f}; F128 v2{0.0f, 0.0f, 3.0f, 4.0f}; F128 v12{0.0f, 2.0f, 3.0f, 0.0f}; ASSERT_TRUE(equal(v1 ^ v2, v12)); } TYPED_TEST(T128Test, BitwiseNOT) { using F128 = typename TestFixture::T128; F128 v1 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); F128 v2 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); ASSERT_TRUE(equal(~v1, v2)); ASSERT_TRUE(equal(~v2, v1)); } TYPED_TEST(T128Test, ConstructFromArgs) { typename TestFixture::T128 v{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 expected{1.0f, 2.0f, 3.0f, 4.0f}; ASSERT_TRUE(equal(v, expected)); } TYPED_TEST(T128Test, ConstructFromSingleValue) { typename TestFixture::T128 v{42.0f}; typename TestFixture::T128 expected{42.0f, 42.0f, 42.0f, 42.0f}; ASSERT_TRUE(equal(v, expected)); } TYPED_TEST(T128Test, FromAlignedSource) { alignas(TestFixture::T128::alignment()) const float expected[] = {1.0f, 2.0f, 3.0f, 4.0f}; auto v = TestFixture::T128::fromAlignedSource(expected); alignas(TestFixture::T128::alignment()) float result[TestFixture::T128::size()]; v.alignedStore(result); ASSERT_ELEMENTS_EQ(result, expected, TestFixture::T128::size()); } TYPED_TEST(T128Test, AlignedLoadStore) { alignas(TestFixture::T128::alignment()) const float expected[] = {1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v; v.alignedLoad(expected); alignas(TestFixture::T128::alignment()) float result[TestFixture::T128::size()]; v.alignedStore(result); ASSERT_ELEMENTS_EQ(result, expected, TestFixture::T128::size()); } TYPED_TEST(T128Test, FromUnalignedSource) { const float expected[] = {1.0f, 2.0f, 3.0f, 4.0f}; auto v = TestFixture::T128::fromUnalignedSource(expected); float result[TestFixture::T128::size()]; v.unalignedStore(result); ASSERT_ELEMENTS_EQ(result, expected, TestFixture::T128::size()); } TYPED_TEST(T128Test, UnalignedLoadStore) { const float expected[] = {1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v; v.unalignedLoad(expected); float result[TestFixture::T128::size()]; v.unalignedStore(result); ASSERT_ELEMENTS_EQ(result, expected, TestFixture::T128::size()); } TYPED_TEST(T128Test, LoadSingleValue) { const float source[] = {42.0f, 43.0f, 44.0f, 45.0f}; auto v = TestFixture::T128::loadSingleValue(source); typename TestFixture::T128 expected{42.0f, 0.0f, 0.0f, 0.0f}; ASSERT_TRUE(equal(v, expected)); } TYPED_TEST(T128Test, Sum) { typename TestFixture::T128 v{1.0f, 2.0f, 3.0f, 4.0f}; ASSERT_EQ(v.sum(), 10.0f); } TYPED_TEST(T128Test, CompoundAssignmentAdd) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{4.0f, 6.0f, 8.0f, 10.0f}; v1 += v2; ASSERT_TRUE(equal(v1, expected)); } TYPED_TEST(T128Test, CompoundAssignmentSub) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{-2.0f, -2.0f, -2.0f, -2.0f}; v1 -= v2; ASSERT_TRUE(equal(v1, expected)); } TYPED_TEST(T128Test, CompoundAssignmentMul) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{3.0f, 8.0f, 15.0f, 24.0f}; v1 *= v2; ASSERT_TRUE(equal(v1, expected)); } TYPED_TEST(T128Test, CompoundAssignmentDiv) { typename TestFixture::T128 v1{4.0f, 3.0f, 9.0f, 12.0f}; typename TestFixture::T128 v2{1.0f, 2.0f, 3.0f, 3.0f}; float expected[TestFixture::T128::size()] = {4.0f, 1.5f, 3.0f, 4.0f}; v1 /= v2; float result[TestFixture::T128::size()]; v1.unalignedStore(result); ASSERT_ELEMENTS_NEAR(result, expected, TestFixture::T128::size(), 0.0001f); } TYPED_TEST(T128Test, OperatorAdd) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{4.0f, 6.0f, 8.0f, 10.0f}; auto v3 = v1 + v2; ASSERT_TRUE(equal(v3, expected)); } TYPED_TEST(T128Test, OperatorSub) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{-2.0f, -2.0f, -2.0f, -2.0f}; auto v3 = v1 - v2; ASSERT_TRUE(equal(v3, expected)); } TYPED_TEST(T128Test, OperatorMul) { typename TestFixture::T128 v1{1.0f, 2.0f, 3.0f, 4.0f}; typename TestFixture::T128 v2{3.0f, 4.0f, 5.0f, 6.0f}; typename TestFixture::T128 expected{3.0f, 8.0f, 15.0f, 24.0f}; auto v3 = v1 * v2; ASSERT_TRUE(equal(v3, expected)); } TYPED_TEST(T128Test, OperatorDiv) { typename TestFixture::T128 v1{4.0f, 3.0f, 9.0f, 12.0f}; typename TestFixture::T128 v2{1.0f, 2.0f, 3.0f, 3.0f}; float expected[TestFixture::T128::size()] = {4.0f, 1.5f, 3.0f, 4.0f}; auto v3 = v1 / v2; float result[TestFixture::T128::size()]; v3.unalignedStore(result); ASSERT_ELEMENTS_NEAR(result, expected, TestFixture::T128::size(), 0.0001f); } TEST(T128Test, TransposeSquareScalar) { trimd::scalar::F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::F128 v3{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::F128 v4{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::transpose(v1, v2, v3, v4); trimd::scalar::F128 e1{1.0f, 1.0f, 1.0f, 1.0f}; trimd::scalar::F128 e2{2.0f, 2.0f, 2.0f, 2.0f}; trimd::scalar::F128 e3{3.0f, 3.0f, 3.0f, 3.0f}; trimd::scalar::F128 e4{4.0f, 4.0f, 4.0f, 4.0f}; ASSERT_TRUE(equal(v1, e1)); ASSERT_TRUE(equal(v2, e2)); ASSERT_TRUE(equal(v3, e3)); ASSERT_TRUE(equal(v4, e4)); } TEST(T128Test, AbsScalar) { trimd::scalar::F128 v{-1.0f, 2.0f, -3.0f, 0.0f}; v = trimd::scalar::abs(v); trimd::scalar::F128 e{1.0f, 2.0f, 3.0f, 0.0f}; ASSERT_TRUE(equal(v, e)); } TEST(T128Test, AndNotScalar) { trimd::scalar::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::F128 mask1 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); trimd::scalar::F128 mask2 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); trimd::scalar::F128 result1 = trimd::scalar::andnot(mask1, v); trimd::scalar::F128 result2 = trimd::scalar::andnot(mask2, v); trimd::scalar::F128 e1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::scalar::F128 e2{0.0f, 0.0f, 0.0f, 0.0f}; ASSERT_TRUE(equal(result1, e1)); ASSERT_TRUE(equal(result2, e2)); } TEST(T128Test, RsqrtScalar) { trimd::scalar::F128 v{1.0f, 2.0f, 3.0f, 9.0f}; v = trimd::scalar::rsqrt(v); trimd::scalar::F128 e{1.0f, 0.70710678f, 0.57735026f, 0.33333333f}; #ifdef TRIMD_ENABLE_FAST_INVERSE_SQRT static constexpr float threshold = 0.0004f; #else static constexpr float threshold = 0.0002f; #endif // TRIMD_ENABLE_FAST_INVERSE_SQRT ASSERT_TRUE(near(v, e, threshold)); } #ifdef TRIMD_ENABLE_SSE TEST(T128Test, TransposeSquareSSE) { trimd::sse::F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::F128 v3{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::F128 v4{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::transpose(v1, v2, v3, v4); trimd::sse::F128 e1{1.0f, 1.0f, 1.0f, 1.0f}; trimd::sse::F128 e2{2.0f, 2.0f, 2.0f, 2.0f}; trimd::sse::F128 e3{3.0f, 3.0f, 3.0f, 3.0f}; trimd::sse::F128 e4{4.0f, 4.0f, 4.0f, 4.0f}; ASSERT_TRUE(equal(v1, e1)); ASSERT_TRUE(equal(v2, e2)); ASSERT_TRUE(equal(v3, e3)); ASSERT_TRUE(equal(v4, e4)); } TEST(T128Test, AbsSSE) { trimd::sse::F128 v{-1.0f, 2.0f, -3.0f, 0.0f}; v = trimd::sse::abs(v); trimd::sse::F128 e{1.0f, 2.0f, 3.0f, 0.0f}; ASSERT_TRUE(equal(v, e)); } TEST(T128Test, AndNotSSE) { trimd::sse::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::F128 mask1 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); trimd::sse::F128 mask2 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); trimd::sse::F128 result1 = trimd::sse::andnot(mask1, v); trimd::sse::F128 result2 = trimd::sse::andnot(mask2, v); trimd::sse::F128 e1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::sse::F128 e2{0.0f, 0.0f, 0.0f, 0.0f}; ASSERT_TRUE(equal(result1, e1)); ASSERT_TRUE(equal(result2, e2)); } TEST(T128Test, RsqrtSSE) { trimd::sse::F128 v{1.0f, 2.0f, 3.0f, 9.0f}; v = trimd::sse::rsqrt(v); trimd::sse::F128 e{1.0f, 0.70710678f, 0.57735026f, 0.33333333f}; #ifdef TRIMD_ENABLE_FAST_INVERSE_SQRT static constexpr float threshold = 0.0004f; #else static constexpr float threshold = 0.0003f; #endif // TRIMD_ENABLE_FAST_INVERSE_SQRT ASSERT_TRUE(near(v, e, threshold)); } #ifdef TRIMD_ENABLE_F16C TEST(T128Test, LoadAlignedHalfFloatsSSE) { alignas(trimd::sse::F128::alignment()) const std::uint16_t halfFloats[] = {15360, 16384, 16896, 17408}; trimd::sse::F128 expected{1.0f, 2.0f, 3.0f, 4.0f}; auto v = trimd::sse::F128::fromAlignedSource(halfFloats); ASSERT_TRUE(equal(v, expected)); trimd::sse::F128 v2; v2.alignedLoad(halfFloats); ASSERT_TRUE(equal(v2, expected)); } TEST(T128Test, LoadUnalignedHalfFloatsSSE) { const std::uint16_t halfFloats[] = {15360, 16384, 16896, 17408}; trimd::sse::F128 expected{1.0f, 2.0f, 3.0f, 4.0f}; auto v = trimd::sse::F128::fromUnalignedSource(halfFloats); ASSERT_TRUE(equal(v, expected)); trimd::sse::F128 v2; v2.unalignedLoad(halfFloats); ASSERT_TRUE(equal(v2, expected)); } TEST(T128Test, StoreAlignedHalfFloatsSSE) { const std::uint16_t expected[] = {15360, 16384, 16896, 17408}; alignas(trimd::sse::F128::alignment()) std::uint16_t halfFloats[4ul] = {}; const trimd::sse::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; v.alignedStore(halfFloats); ASSERT_ELEMENTS_EQ(halfFloats, expected, 4ul); } TEST(T128Test, StoreUnalignedHalfFloatsSSE) { const std::uint16_t expected[] = {15360, 16384, 16896, 17408}; std::uint16_t halfFloats[4ul] = {}; const trimd::sse::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; v.unalignedStore(halfFloats); ASSERT_ELEMENTS_EQ(halfFloats, expected, 4ul); } #endif // TRIMD_ENABLE_F16C #endif // TRIMD_ENABLE_SSE #ifdef TRIMD_ENABLE_NEON TEST(T128Test, TransposeSquareNEON) { trimd::neon::F128 v1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::F128 v2{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::F128 v3{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::F128 v4{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::transpose(v1, v2, v3, v4); trimd::neon::F128 e1{1.0f, 1.0f, 1.0f, 1.0f}; trimd::neon::F128 e2{2.0f, 2.0f, 2.0f, 2.0f}; trimd::neon::F128 e3{3.0f, 3.0f, 3.0f, 3.0f}; trimd::neon::F128 e4{4.0f, 4.0f, 4.0f, 4.0f}; ASSERT_TRUE(equal(v1, e1)); ASSERT_TRUE(equal(v2, e2)); ASSERT_TRUE(equal(v3, e3)); ASSERT_TRUE(equal(v4, e4)); } TEST(T128Test, AbsNEON) { trimd::neon::F128 v{-1.0f, 2.0f, -3.0f, 0.0f}; v = trimd::neon::abs(v); trimd::neon::F128 e{1.0f, 2.0f, 3.0f, 0.0f}; ASSERT_TRUE(equal(v, e)); } TEST(T128Test, AndNotNEON) { trimd::neon::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::F128 mask1 = frombits(0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u); trimd::neon::F128 mask2 = frombits(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); trimd::neon::F128 result1 = trimd::neon::andnot(mask1, v); trimd::neon::F128 result2 = trimd::neon::andnot(mask2, v); trimd::neon::F128 e1{1.0f, 2.0f, 3.0f, 4.0f}; trimd::neon::F128 e2{0.0f, 0.0f, 0.0f, 0.0f}; ASSERT_TRUE(equal(result1, e1)); ASSERT_TRUE(equal(result2, e2)); } TEST(T128Test, RsqrtNEON) { trimd::neon::F128 v{1.0f, 2.0f, 3.0f, 9.0f}; v = trimd::neon::rsqrt(v); trimd::neon::F128 e{1.0f, 0.70710678f, 0.57735026f, 0.33333333f}; #ifdef TRIMD_ENABLE_FAST_INVERSE_SQRT static constexpr float threshold = 0.0004f; #else static constexpr float threshold = 0.0002f; #endif // TRIMD_ENABLE_FAST_INVERSE_SQRT ASSERT_TRUE(near(v, e, threshold)); } #ifdef TRIMD_ENABLE_NEON_FP16 TEST(T128Test, LoadAlignedHalfFloatsNEON) { alignas(trimd::neon::F128::alignment()) const std::uint16_t halfFloats[] = {15360, 16384, 16896, 17408}; trimd::neon::F128 expected{1.0f, 2.0f, 3.0f, 4.0f}; auto v = trimd::neon::F128::fromAlignedSource(halfFloats); ASSERT_TRUE(equal(v, expected)); trimd::neon::F128 v2; v2.alignedLoad(halfFloats); ASSERT_TRUE(equal(v2, expected)); } TEST(T128Test, LoadUnalignedHalfFloatsNEON) { const std::uint16_t halfFloats[] = {15360, 16384, 16896, 17408}; trimd::neon::F128 expected{1.0f, 2.0f, 3.0f, 4.0f}; auto v = trimd::neon::F128::fromUnalignedSource(halfFloats); ASSERT_TRUE(equal(v, expected)); trimd::neon::F128 v2; v2.unalignedLoad(halfFloats); ASSERT_TRUE(equal(v2, expected)); } TEST(T128Test, StoreAlignedHalfFloatsNEON) { const std::uint16_t expected[] = {15360, 16384, 16896, 17408}; alignas(trimd::neon::F128::alignment()) std::uint16_t halfFloats[4ul] = {}; const trimd::neon::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; v.alignedStore(halfFloats); ASSERT_ELEMENTS_EQ(halfFloats, expected, 4ul); } TEST(T128Test, StoreUnalignedHalfFloatsNEON) { const std::uint16_t expected[] = {15360, 16384, 16896, 17408}; std::uint16_t halfFloats[4ul] = {}; const trimd::neon::F128 v{1.0f, 2.0f, 3.0f, 4.0f}; v.unalignedStore(halfFloats); ASSERT_ELEMENTS_EQ(halfFloats, expected, 4ul); } #endif // TRIMD_ENABLE_NEON_FP16 #endif // TRIMD_ENABLE_NEON