Files
UnrealEngine/Engine/Source/ThirdParty/Intel/ISPC/ispc-1.24.0/tests/lit-tests/vnni-2.ispc
2025-05-18 13:04:45 +08:00

43 lines
2.1 KiB
Plaintext

// Test checks emitted code for VNNI dot product instructions.
// RUN: %{ispc} %s --target=avx512icl-x32 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX2
// RUN: %{ispc} %s --target=avx512spr-x32 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX2
// RUN: %{ispc} %s --target=avx512icl-x64 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX4
// RUN: %{ispc} %s --target=avx512spr-x64 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX4
// REQUIRES: X86_ENABLED && !MACOS_HOST
// CHECK_ALL-LABEL: dot4add_u8i8
// CHECK_ZMMX2-COUNT-2: vpdpbusd {{.*}} %zmm
// CHECK_ZMMX4-COUNT-4: vpdpbusd {{.*}} %zmm
void dot4add_u8i8(uniform int a[], uniform int b[], uniform int dst[]) {
dst[programIndex] = dot4add_u8i8packed(a[programIndex], b[programIndex], programIndex);
}
// CHECK_ALL-LABEL: dot4add_u8i8_sat
// CHECK_ZMMX2-COUNT-2: vpdpbusds {{.*}} %zmm
// CHECK_ZMMX4-COUNT-4: vpdpbusds {{.*}} %zmm
void dot4add_u8i8_sat(uniform int a[], uniform int b[], uniform int dst[]) {
dst[programIndex] = dot4add_u8i8packed_sat(a[programIndex], b[programIndex], programIndex);
}
// For this test "unmasked" version of function is used. This is because for avx512*x32 and avx512*x64 targets
// the generic masked stores are used resulting in a set of per-lane scalar stores. In this case it's more profitable
// to transform vpdpwssd into vpmaddwd+vpaddd https://reviews.llvm.org/D148980.
// Related discussion: https://github.com/llvm/llvm-project/issues/84182
// CHECK_ALL-LABEL: dot2add_i16
// CHECK_ZMMX2-COUNT-2: vpdpwssd {{.*}} %zmm
// CHECK_ZMMX4-COUNT-4: vpdpwssd {{.*}} %zmm
unmasked void dot2add_i16(uniform int a[], uniform int b[], uniform int dst[]) {
dst[programIndex] = dot2add_i16packed(a[programIndex], b[programIndex], programIndex);
}
// CHECK_ALL-LABEL: dot2add_i16_sat
// CHECK_ZMMX2-COUNT-2: vpdpwssds {{.*}} %zmm
// CHECK_ZMMX4-COUNT-4: vpdpwssds {{.*}} %zmm
void dot2add_i16_sat(uniform int a[], uniform int b[], uniform int dst[]) {
dst[programIndex] = dot2add_i16packed_sat(a[programIndex], b[programIndex], programIndex);
}