27 lines
985 B
Plaintext
27 lines
985 B
Plaintext
// This test checks that no redundant vmovaps instructions are generated for the code below.
|
|
// The assembler used to be the following:
|
|
// vmovaps LCPI0_0(%rip), %ymm0 ## ymm0 = [4294967295,4294967295,4294967295,4294967295,0,0,0,0]
|
|
// vmaskmovps (%rdi), %ymm0, %ymm1
|
|
// vmaskmovps (%rsi), %ymm0, %ymm2
|
|
// vmovaps %xmm1, %xmm1 // <-- redundant, as upper part of ymm1 is already zeros.
|
|
// vmovaps %xmm2, %xmm2 // <-- redundant, as upper part of ymm1 is already zeros.
|
|
// vmulps %ymm2, %ymm1, %ymm1
|
|
// vmaskmovps %ymm1, %ymm0, -16(%rsp)
|
|
|
|
// RUN: %{ispc} %s --target=avx2-i32x8 --emit-asm -o - | FileCheck %s
|
|
|
|
// REQUIRES: X86_ENABLED
|
|
|
|
struct FVector4 {
|
|
float V[4];
|
|
};
|
|
|
|
unmasked uniform FVector4 Mul1(const uniform FVector4 &A, const uniform FVector4 &B) {
|
|
uniform FVector4 Result;
|
|
// CHECK-NOT: vmovaps [[REG:%xmm[0-9]+]], [[REG]]
|
|
foreach(i = 0 ... 4) {
|
|
Result.V[i] = A.V[i] * B.V[i];
|
|
}
|
|
return Result;
|
|
}
|