43 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| // Test checks emitted code for VNNI dot product instructions.
 | |
| 
 | |
| // RUN: %{ispc} %s --target=avx512icl-x32 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX2
 | |
| // RUN: %{ispc} %s --target=avx512spr-x32 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX2
 | |
| 
 | |
| // RUN: %{ispc} %s --target=avx512icl-x64 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX4
 | |
| // RUN: %{ispc} %s --target=avx512spr-x64 --emit-asm -o - | FileCheck %s -check-prefixes=CHECK_ALL,CHECK_ZMMX4
 | |
| 
 | |
| // REQUIRES: X86_ENABLED && !MACOS_HOST
 | |
| 
 | |
| // CHECK_ALL-LABEL: dot4add_u8i8
 | |
| // CHECK_ZMMX2-COUNT-2: vpdpbusd {{.*}} %zmm
 | |
| // CHECK_ZMMX4-COUNT-4: vpdpbusd {{.*}} %zmm
 | |
| void dot4add_u8i8(uniform int a[], uniform int b[], uniform int dst[]) {
 | |
|     dst[programIndex] = dot4add_u8i8packed(a[programIndex], b[programIndex], programIndex);
 | |
| }
 | |
| 
 | |
| // CHECK_ALL-LABEL: dot4add_u8i8_sat
 | |
| // CHECK_ZMMX2-COUNT-2: vpdpbusds {{.*}} %zmm
 | |
| // CHECK_ZMMX4-COUNT-4: vpdpbusds {{.*}} %zmm
 | |
| void dot4add_u8i8_sat(uniform int a[], uniform int b[], uniform int dst[]) {
 | |
|     dst[programIndex] = dot4add_u8i8packed_sat(a[programIndex], b[programIndex], programIndex);
 | |
| }
 | |
| 
 | |
| // For this test "unmasked" version of function is used. This is because for avx512*x32 and avx512*x64 targets
 | |
| // the generic masked stores are used resulting in a set of per-lane scalar stores. In this case it's more profitable
 | |
| // to transform vpdpwssd into vpmaddwd+vpaddd https://reviews.llvm.org/D148980.
 | |
| // Related discussion: https://github.com/llvm/llvm-project/issues/84182
 | |
| 
 | |
| // CHECK_ALL-LABEL: dot2add_i16
 | |
| // CHECK_ZMMX2-COUNT-2: vpdpwssd {{.*}} %zmm
 | |
| // CHECK_ZMMX4-COUNT-4: vpdpwssd {{.*}} %zmm
 | |
| unmasked void dot2add_i16(uniform int a[], uniform int b[], uniform int dst[]) {
 | |
|     dst[programIndex] = dot2add_i16packed(a[programIndex], b[programIndex], programIndex);
 | |
| }
 | |
| 
 | |
| // CHECK_ALL-LABEL: dot2add_i16_sat
 | |
| // CHECK_ZMMX2-COUNT-2: vpdpwssds {{.*}} %zmm
 | |
| // CHECK_ZMMX4-COUNT-4: vpdpwssds {{.*}} %zmm
 | |
| void dot2add_i16_sat(uniform int a[], uniform int b[], uniform int dst[]) {
 | |
|     dst[programIndex] = dot2add_i16packed_sat(a[programIndex], b[programIndex], programIndex);
 | |
| }
 |