// RUN: %{ispc} %s --nostdlib --nowrap --target=sse2-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse2-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i8x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i16x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i8x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i16x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i64x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i8x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i16x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i64x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512knl-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x64 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x64 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512 // A note about notrunning on macOS - it's because of SPR not being supported there. // It should be sufficient to run this test on other platforms and not to complicate // with splitting it to SPR and non-SPR versions. // REQUIRES: X86_ENABLED && LLVM_14_0+ && !MACOS_HOST // Float16 -> integers int8 cvt_fp16_to_i8(float16 f) { return f; } uint8 cvt_fp16_to_ui8(float16 f) { return f; } int16 cvt_fp16_to_i16(float16 f) { return f; } uint16 cvt_fp16_to_ui16(float16 f) { return f; } int32 cvt_fp16_to_i32(float16 f) { return f; } // CHECK_AVX2: Performance Warning: Conversion from float16 to uint32 is slow. Use "int32" if possible uint32 cvt_fp16_to_ui32(float16 f) { return f; } int64 cvt_fp16_to_i64(float16 f) { return f; } // CHECK_AVX2: Performance Warning: Conversion from float16 to uint64 is slow. Use "int64" if possible uint64 cvt_fp16_to_ui64(float16 f) { return f; } // Float -> integers int8 cvt_fp32_to_i8(float f) { return f; } uint8 cvt_fp32_to_ui8(float f) { return f; } int16 cvt_fp32_to_i16(float f) { return f; } uint16 cvt_fp32_to_ui16(float f) { return f; } int32 cvt_fp32_to_i32(float f) { return f; } // CHECK_SSE: Performance Warning: Conversion from float to uint32 is slow. Use "int32" if possible // CHECK_AVX2: Performance Warning: Conversion from float to uint32 is slow. Use "int32" if possible uint32 cvt_fp32_to_ui32(float f) { return f; } int64 cvt_fp32_to_i64(float f) { return f; } // CHECK_SSE: Performance Warning: Conversion from float to uint64 is slow. Use "int64" if possible // CHECK_AVX2: Performance Warning: Conversion from float to uint64 is slow. Use "int64" if possible uint64 cvt_fp32_to_ui64(float f) { return f; } // Double -> integers int8 cvt_fp64_to_i8(double f) { return f; } uint8 cvt_fp64_to_ui8(double f) { return f; } int16 cvt_fp64_to_i16(double f) { return f; } uint16 cvt_fp64_to_ui16(double f) { return f; } int32 cvt_fp64_to_i32(double f) { return f; } // CHECK_SSE: Performance Warning: Conversion from double to uint32 is slow. Use "int32" if possible // CHECK_AVX2 Performance Warning: Conversion from double to uint32 is slow. Use "int32" if possible uint32 cvt_fp64_to_ui32(double f) { return f; } int64 cvt_fp64_to_i64(double f) { return f; } // CHECK_SSE: Performance Warning: Conversion from double to uint64 is slow. Use "int64" if possible // CHECK_AVX2: Performance Warning: Conversion from double to uint64 is slow. Use "int64" if possible uint64 cvt_fp64_to_ui64(double f) { return f; } // Integers -> float16 float16 cvt_i8_to_fp16(int8 i) { return i; } float16 cvt_ui8_to_fp16(uint8 i) { return i; } float16 cvt_i16_to_fp16(int16 i) { return i; } float16 cvt_ui16_to_fp16(uint16 i) { return i; } float16 cvt_i32_to_fp16(int32 i) { return i; } // CHECK_AVX2: Performance Warning: Conversion from uint32 to float16 is slow. Use "int32" if possible float16 cvt_ui32_to_fp16(uint32 i) { return i; } float16 cvt_i64_to_fp16(int64 i) { return i; } // CHECK_AVX2: Performance Warning: Conversion from uint64 to float16 is slow. Use "int32" if possible float16 cvt_ui64_to_fp16(uint64 i) { return i; } // Integers -> float float cvt_i8_to_fp32(int8 i) { return i; } float cvt_ui8_to_fp32(uint8 i) { return i; } float cvt_i16_to_fp32(int16 i) { return i; } float cvt_ui16_to_fp32(uint16 i) { return i; } float cvt_i32_to_fp32(int32 i) { return i; } // CHECK_SSE: Performance Warning: Conversion from uint32 to float is slow. Use "int32" if possible // CHECK_AVX2: Performance Warning: Conversion from uint32 to float is slow. Use "int32" if possible float cvt_ui32_to_fp32(uint32 i) { return i; } float cvt_i64_to_fp32(int64 i) { return i; } // CHECK_SSE: Performance Warning: Conversion from uint64 to float is slow. Use "int64" if possible // CHECK_AVX2: Performance Warning: Conversion from uint64 to float is slow. Use "int64" if possible float cvt_ui64_to_fp32(uint64 i) { return i; } // Integers -> double double cvt_i8_to_fp64(int8 i) { return i; } double cvt_ui8_to_fp64(uint8 i) { return i; } double cvt_i16_to_fp64(int16 i) { return i; } double cvt_ui16_to_fp64(uint16 i) { return i; } double cvt_i32_to_fp64(int32 i) { return i; } // CHECK_SSE: Performance Warning: Conversion from uint32 to double is slow. Use "int32" if possible // CHECK_AVX2: Performance Warning: Conversion from uint32 to double is slow. Use "int32" if possible double cvt_ui32_to_fp64(uint32 i) { return i; } double cvt_i64_to_fp64(int64 i) { return i; } // CHECK_SSE: Performance Warning: Conversion from uint64 to double is slow. Use "int32" if possible // CHECK_AVX2: Performance Warning: Conversion from uint64 to double is slow. Use "int32" if possible double cvt_ui64_to_fp64(uint64 i) { return i; } // Integer div/mod, no need to check all types. // CHECK_SSE-COUNT-2: Performance Warning: Division with varying integer types is very inefficient. // CHECK_AVX2-COUNT-2: Performance Warning: Division with varying integer types is very inefficient. // CHECK_AVX512-COUNT-2: Performance Warning: Division with varying integer types is very inefficient. int32 div_i32(int32 a, int32 b) { return a / b; } uint32 div_ui32(uint32 a, uint32 b) { return a / b; } // CHECK_SSE-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient. // CHECK_AVX2-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient. // CHECK_AVX512-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient. int32 mod_i32(int32 a, int32 b) { return a % b; } uint32 mod_ui32(uint32 a, uint32 b) { return a % b; } // Shift Right by variable amount // CHECK_SSE: Performance Warning: Shift right is inefficient for varying shift amounts. int32 shr_i32(int32 a, int32 b) { return a >> b; }