186 lines
9.1 KiB
Plaintext
186 lines
9.1 KiB
Plaintext
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse2-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse2-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i8x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i16x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i8x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i16x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=sse4.1-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i32x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx1-i64x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_SSE
|
|
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i8x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i16x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i32x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx2-i64x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX2
|
|
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512knl-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512skx-x64 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x4 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x8 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x16 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x32 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
// RUN: %{ispc} %s --nostdlib --nowrap --target=avx512spr-x64 -o %t.o 2>&1 | FileCheck %s -check-prefix=CHECK_AVX512
|
|
|
|
// A note about notrunning on macOS - it's because of SPR not being supported there.
|
|
// It should be sufficient to run this test on other platforms and not to complicate
|
|
// with splitting it to SPR and non-SPR versions.
|
|
// REQUIRES: X86_ENABLED && LLVM_14_0+ && !MACOS_HOST
|
|
|
|
// Float16 -> integers
|
|
|
|
int8 cvt_fp16_to_i8(float16 f) { return f; }
|
|
|
|
uint8 cvt_fp16_to_ui8(float16 f) { return f; }
|
|
|
|
int16 cvt_fp16_to_i16(float16 f) { return f; }
|
|
|
|
uint16 cvt_fp16_to_ui16(float16 f) { return f; }
|
|
|
|
int32 cvt_fp16_to_i32(float16 f) { return f; }
|
|
|
|
// CHECK_AVX2: Performance Warning: Conversion from float16 to uint32 is slow. Use "int32" if possible
|
|
uint32 cvt_fp16_to_ui32(float16 f) { return f; }
|
|
|
|
int64 cvt_fp16_to_i64(float16 f) { return f; }
|
|
|
|
// CHECK_AVX2: Performance Warning: Conversion from float16 to uint64 is slow. Use "int64" if possible
|
|
uint64 cvt_fp16_to_ui64(float16 f) { return f; }
|
|
|
|
// Float -> integers
|
|
|
|
int8 cvt_fp32_to_i8(float f) { return f; }
|
|
|
|
uint8 cvt_fp32_to_ui8(float f) { return f; }
|
|
|
|
int16 cvt_fp32_to_i16(float f) { return f; }
|
|
|
|
uint16 cvt_fp32_to_ui16(float f) { return f; }
|
|
|
|
int32 cvt_fp32_to_i32(float f) { return f; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from float to uint32 is slow. Use "int32" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from float to uint32 is slow. Use "int32" if possible
|
|
uint32 cvt_fp32_to_ui32(float f) { return f; }
|
|
|
|
int64 cvt_fp32_to_i64(float f) { return f; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from float to uint64 is slow. Use "int64" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from float to uint64 is slow. Use "int64" if possible
|
|
uint64 cvt_fp32_to_ui64(float f) { return f; }
|
|
|
|
// Double -> integers
|
|
|
|
int8 cvt_fp64_to_i8(double f) { return f; }
|
|
|
|
uint8 cvt_fp64_to_ui8(double f) { return f; }
|
|
|
|
int16 cvt_fp64_to_i16(double f) { return f; }
|
|
|
|
uint16 cvt_fp64_to_ui16(double f) { return f; }
|
|
|
|
int32 cvt_fp64_to_i32(double f) { return f; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from double to uint32 is slow. Use "int32" if possible
|
|
// CHECK_AVX2 Performance Warning: Conversion from double to uint32 is slow. Use "int32" if possible
|
|
uint32 cvt_fp64_to_ui32(double f) { return f; }
|
|
|
|
int64 cvt_fp64_to_i64(double f) { return f; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from double to uint64 is slow. Use "int64" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from double to uint64 is slow. Use "int64" if possible
|
|
uint64 cvt_fp64_to_ui64(double f) { return f; }
|
|
|
|
// Integers -> float16
|
|
|
|
float16 cvt_i8_to_fp16(int8 i) { return i; }
|
|
|
|
float16 cvt_ui8_to_fp16(uint8 i) { return i; }
|
|
|
|
float16 cvt_i16_to_fp16(int16 i) { return i; }
|
|
|
|
float16 cvt_ui16_to_fp16(uint16 i) { return i; }
|
|
|
|
float16 cvt_i32_to_fp16(int32 i) { return i; }
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint32 to float16 is slow. Use "int32" if possible
|
|
float16 cvt_ui32_to_fp16(uint32 i) { return i; }
|
|
|
|
float16 cvt_i64_to_fp16(int64 i) { return i; }
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint64 to float16 is slow. Use "int32" if possible
|
|
float16 cvt_ui64_to_fp16(uint64 i) { return i; }
|
|
|
|
// Integers -> float
|
|
|
|
float cvt_i8_to_fp32(int8 i) { return i; }
|
|
|
|
float cvt_ui8_to_fp32(uint8 i) { return i; }
|
|
|
|
float cvt_i16_to_fp32(int16 i) { return i; }
|
|
|
|
float cvt_ui16_to_fp32(uint16 i) { return i; }
|
|
|
|
float cvt_i32_to_fp32(int32 i) { return i; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from uint32 to float is slow. Use "int32" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint32 to float is slow. Use "int32" if possible
|
|
float cvt_ui32_to_fp32(uint32 i) { return i; }
|
|
|
|
float cvt_i64_to_fp32(int64 i) { return i; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from uint64 to float is slow. Use "int64" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint64 to float is slow. Use "int64" if possible
|
|
float cvt_ui64_to_fp32(uint64 i) { return i; }
|
|
|
|
// Integers -> double
|
|
|
|
double cvt_i8_to_fp64(int8 i) { return i; }
|
|
|
|
double cvt_ui8_to_fp64(uint8 i) { return i; }
|
|
|
|
double cvt_i16_to_fp64(int16 i) { return i; }
|
|
|
|
double cvt_ui16_to_fp64(uint16 i) { return i; }
|
|
|
|
double cvt_i32_to_fp64(int32 i) { return i; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from uint32 to double is slow. Use "int32" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint32 to double is slow. Use "int32" if possible
|
|
double cvt_ui32_to_fp64(uint32 i) { return i; }
|
|
|
|
double cvt_i64_to_fp64(int64 i) { return i; }
|
|
|
|
// CHECK_SSE: Performance Warning: Conversion from uint64 to double is slow. Use "int32" if possible
|
|
// CHECK_AVX2: Performance Warning: Conversion from uint64 to double is slow. Use "int32" if possible
|
|
double cvt_ui64_to_fp64(uint64 i) { return i; }
|
|
|
|
// Integer div/mod, no need to check all types.
|
|
|
|
// CHECK_SSE-COUNT-2: Performance Warning: Division with varying integer types is very inefficient.
|
|
// CHECK_AVX2-COUNT-2: Performance Warning: Division with varying integer types is very inefficient.
|
|
// CHECK_AVX512-COUNT-2: Performance Warning: Division with varying integer types is very inefficient.
|
|
int32 div_i32(int32 a, int32 b) { return a / b; }
|
|
uint32 div_ui32(uint32 a, uint32 b) { return a / b; }
|
|
|
|
// CHECK_SSE-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient.
|
|
// CHECK_AVX2-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient.
|
|
// CHECK_AVX512-COUNT-2: Performance Warning: Modulus operator with varying types is very inefficient.
|
|
int32 mod_i32(int32 a, int32 b) { return a % b; }
|
|
uint32 mod_ui32(uint32 a, uint32 b) { return a % b; }
|
|
|
|
// Shift Right by variable amount
|
|
|
|
// CHECK_SSE: Performance Warning: Shift right is inefficient for varying shift amounts.
|
|
int32 shr_i32(int32 a, int32 b) { return a >> b; }
|