// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4365 4987) #endif #include #ifdef _MSC_VER #pragma warning(pop) #endif namespace trimd { namespace fallback { template struct T256 { using value_type = typename T128::value_type; T128 data1; T128 data2; T256(const T128& d1, const T128& d2) : data1{d1}, data2{d2} { } T256() : data1{}, data2{} { } T256(value_type v1, value_type v2, value_type v3, value_type v4, value_type v5, value_type v6, value_type v7, value_type v8) : data1{v1, v2, v3, v4}, data2{v5, v6, v7, v8} { } explicit T256(value_type value) : T256{value, value, value, value, value, value, value, value} { } template static T256 fromAlignedSource(const U* source) { return T256{T128::fromAlignedSource(source), T128::fromAlignedSource(source + T128::size())}; } template static T256 fromUnalignedSource(const U* source) { return T256{T128::fromUnalignedSource(source), T128::fromUnalignedSource(source + T128::size())}; } template static T256 loadSingleValue(const U* source) { return T256{T128::loadSingleValue(source), T128{}}; } template static void prefetchT0(const U* source) { T128::prefetchT0(source); } template static void prefetchT1(const U* source) { T128::prefetchT1(source); } template static void prefetchT2(const U* source) { T128::prefetchT2(source); } template static void prefetchNTA(const U* source) { T128::prefetchNTA(source); } template void alignedLoad(const U* source) { data1.alignedLoad(source); data2.alignedLoad(source + T128::size()); } template void unalignedLoad(const U* source) { data1.unalignedLoad(source); data2.unalignedLoad(source + T128::size()); } template void alignedStore(U* dest) const { data1.alignedStore(dest); data2.alignedStore(dest + T128::size()); } template void unalignedStore(U* dest) const { data1.unalignedStore(dest); data2.unalignedStore(dest + T128::size()); } value_type sum() const { return data1.sum() + data2.sum(); } T256& operator+=(const T256& rhs) { data1 += rhs.data1; data2 += rhs.data2; return *this; } T256& operator-=(const T256& rhs) { data1 -= rhs.data1; data2 -= rhs.data2; return *this; } T256& operator*=(const T256& rhs) { data1 *= rhs.data1; data2 *= rhs.data2; return *this; } T256& operator/=(const T256& rhs) { data1 /= rhs.data1; data2 /= rhs.data2; return *this; } T256& operator&=(const T256& rhs) { data1 &= rhs.data1; data2 &= rhs.data2; return *this; } T256& operator|=(const T256& rhs) { data1 |= rhs.data1; data2 |= rhs.data2; return *this; } T256& operator^=(const T256& rhs) { data1 ^= rhs.data1; data2 ^= rhs.data2; return *this; } static constexpr std::size_t size() { return T128::size() * 2ul; } static constexpr std::size_t alignment() { // T128 alignment is the minimal requirement, but it might be beneficial to force here an alignment // of a theoretical T256, so the autovectorizer might generate better code on platforms not directly // supported by TRiMD. return T128::alignment(); } }; template inline T256 operator==(const T256& lhs, const T256& rhs) { return T256{lhs.data1 == rhs.data1, lhs.data2 == rhs.data2}; } template inline T256 operator!=(const T256& lhs, const T256& rhs) { return T256{lhs.data1 != rhs.data1, lhs.data2 != rhs.data2}; } template inline T256 operator<(const T256& lhs, const T256& rhs) { return T256{lhs.data1 < rhs.data1, lhs.data2 < rhs.data2}; } template inline T256 operator<=(const T256& lhs, const T256& rhs) { return T256{lhs.data1 <= rhs.data1, lhs.data2 <= rhs.data2}; } template inline T256 operator>(const T256& lhs, const T256& rhs) { return T256{lhs.data1 > rhs.data1, lhs.data2 > rhs.data2}; } template inline T256 operator>=(const T256& lhs, const T256& rhs) { return T256{lhs.data1 >= rhs.data1, lhs.data2 >= rhs.data2}; } template inline T256 operator+(const T256& lhs, const T256& rhs) { return T256(lhs) += rhs; } template inline T256 operator-(const T256& lhs, const T256& rhs) { return T256(lhs) -= rhs; } template inline T256 operator*(const T256& lhs, const T256& rhs) { return T256(lhs) *= rhs; } template inline T256 operator/(const T256& lhs, const T256& rhs) { return T256(lhs) /= rhs; } template inline T256 operator&(const T256& lhs, const T256& rhs) { return T256(lhs) &= rhs; } template inline T256 operator|(const T256& lhs, const T256& rhs) { return T256(lhs) |= rhs; } template inline T256 operator^(const T256& lhs, const T256& rhs) { return T256(lhs) ^= rhs; } template inline T256 operator~(const T256& rhs) { return T256{~rhs.data1, ~rhs.data2}; } // *INDENT-OFF* template inline void transpose(T256& row0, T256& row1, T256& row2, T256& row3, T256& row4, T256& row5, T256& row6, T256& row7) { transpose(row0.data1, row1.data1, row2.data1, row3.data1); transpose(row0.data2, row1.data2, row2.data2, row3.data2); transpose(row4.data1, row5.data1, row6.data1, row7.data1); transpose(row4.data2, row5.data2, row6.data2, row7.data2); std::swap(row0.data2, row4.data1); std::swap(row1.data2, row5.data1); std::swap(row2.data2, row6.data1); std::swap(row3.data2, row7.data1); } // *INDENT-ON* template inline T256 abs(const T256& rhs) { return T256{abs(rhs.data1), abs(rhs.data2)}; } template inline T256 andnot(const T256& lhs, const T256& rhs) { return T256{andnot(lhs.data1, rhs.data1), andnot(lhs.data2, rhs.data2)}; } template inline T256 rsqrt(const T256& rhs) { return T256{rsqrt(rhs.data1), rsqrt(rhs.data2)}; } } // namespace fallback } // namespace trimd