756 lines
33 KiB
C++
756 lines
33 KiB
C++
/*
|
|
Copyright (c) 2019-2022, Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
#define ISPC_IS_WINDOWS
|
|
#elif defined(__linux__)
|
|
#define ISPC_IS_LINUX
|
|
#elif defined(__APPLE__)
|
|
#error "L0 is not supported on macOS"
|
|
#elif defined(__FreeBSD__)
|
|
#error "L0 is not supported on FreeBSD"
|
|
#else
|
|
#error "Host OS was not detected"
|
|
#endif
|
|
|
|
#ifdef ISPC_IS_WINDOWS
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
#define NOMINMAX
|
|
#pragma warning(disable : 4244)
|
|
#pragma warning(disable : 4305)
|
|
#include <windows.h>
|
|
#endif // ISPC_IS_WINDOWS
|
|
|
|
#include <cassert>
|
|
#include <cfloat>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#ifdef ISPC_IS_LINUX
|
|
#include <malloc.h>
|
|
#endif
|
|
|
|
/******************************/
|
|
|
|
#include <cstdlib>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <level_zero/ze_api.h>
|
|
#include <limits>
|
|
#include <math.h>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#define L0_SAFE_CALL(call) \
|
|
{ \
|
|
auto status = (call); \
|
|
if (status != 0) { \
|
|
fprintf(stderr, "%s:%d: L0 error %d\n", __FILE__, __LINE__, (int)status); \
|
|
exit(1); \
|
|
} \
|
|
}
|
|
|
|
#define N 64
|
|
|
|
int width() {
|
|
#if defined(TEST_WIDTH)
|
|
return TEST_WIDTH;
|
|
#else
|
|
#error "Unknown or unset TEST_WIDTH value"
|
|
#endif
|
|
}
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
#define ALIGN
|
|
#else
|
|
#define ALIGN __attribute__((aligned(64)))
|
|
#endif
|
|
|
|
static void L0InitContext(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue) {
|
|
L0_SAFE_CALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
|
|
|
// Retrieve drivers
|
|
uint32_t driverCount = 0;
|
|
L0_SAFE_CALL(zeDriverGet(&driverCount, nullptr));
|
|
|
|
std::vector<ze_driver_handle_t> allDrivers(driverCount);
|
|
L0_SAFE_CALL(zeDriverGet(&driverCount, allDrivers.data()));
|
|
|
|
// Find an instance of Intel GPU device
|
|
// User can select particular device using env variable
|
|
// By default first available device is selected
|
|
auto gpuDeviceToGrab = 0;
|
|
const char *gpuDeviceEnv = getenv("ISPC_GPU_DEVICE");
|
|
if (gpuDeviceEnv) {
|
|
std::istringstream(gpuDeviceEnv) >> gpuDeviceToGrab;
|
|
} else {
|
|
// Allow using ISPCRT env to make things easier
|
|
const char *gpuDeviceEnv = getenv("ISPCRT_GPU_DEVICE");
|
|
if (gpuDeviceEnv) {
|
|
std::istringstream(gpuDeviceEnv) >> gpuDeviceToGrab;
|
|
}
|
|
}
|
|
|
|
auto gpuDevice = 0;
|
|
ze_driver_handle_t hDriver = 0;
|
|
for (auto &driver : allDrivers) {
|
|
uint32_t deviceCount = 0;
|
|
L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, nullptr));
|
|
std::vector<ze_device_handle_t> allDevices(deviceCount);
|
|
L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, allDevices.data()));
|
|
|
|
for (auto &device : allDevices) {
|
|
ze_device_properties_t device_properties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
|
L0_SAFE_CALL(zeDeviceGetProperties(device, &device_properties));
|
|
if (device_properties.type == ZE_DEVICE_TYPE_GPU && device_properties.vendorId == 0x8086) {
|
|
gpuDevice++;
|
|
if (gpuDevice == gpuDeviceToGrab + 1) {
|
|
hDevice = device;
|
|
hDriver = driver;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (hDevice)
|
|
break;
|
|
}
|
|
|
|
assert(hDriver);
|
|
assert(hDevice);
|
|
|
|
// Create default command context
|
|
ze_context_desc_t contextDesc = {}; // use default values
|
|
L0_SAFE_CALL(zeContextCreate(hDriver, &contextDesc, &hContext));
|
|
|
|
// Create a command queue
|
|
ze_command_queue_desc_t commandQueueDesc = {};
|
|
commandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
|
commandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
|
L0_SAFE_CALL(zeCommandQueueCreate(hContext, hDevice, &commandQueueDesc, &hCommandQueue));
|
|
|
|
std::ifstream is;
|
|
#ifdef TEST_ZEBIN
|
|
std::string fn = "test_xe.bin";
|
|
#else
|
|
std::string fn = "test_xe.spv";
|
|
#endif
|
|
is.open(fn, std::ios::binary);
|
|
if (!is.good()) {
|
|
fprintf(stderr, "Open %s failed\n", fn.c_str());
|
|
return;
|
|
}
|
|
|
|
is.seekg(0, std::ios::end);
|
|
size_t codeSize = is.tellg();
|
|
is.seekg(0, std::ios::beg);
|
|
|
|
if (codeSize == 0) {
|
|
return;
|
|
}
|
|
|
|
unsigned char *codeBin = new unsigned char[codeSize];
|
|
if (!codeBin) {
|
|
return;
|
|
}
|
|
|
|
is.read((char *)codeBin, codeSize);
|
|
is.close();
|
|
|
|
std::string igcOptions = "-vc-codegen -no-optimize -Xfinalizer '-presched'";
|
|
#ifdef ISPC_IS_LINUX
|
|
// `newspillcost` is not yet supported on Windows in open source
|
|
// TODO: use `newspillcost` for all platforms as soon as it available
|
|
igcOptions += " -Xfinalizer '-newspillcost'";
|
|
#endif
|
|
const char *userIgcOptionsEnv = getenv("ISPCRT_IGC_OPTIONS");
|
|
if (userIgcOptionsEnv) {
|
|
std::string userIgcOptions(userIgcOptionsEnv);
|
|
|
|
if (userIgcOptions.length() >= 3) {
|
|
auto prefix = userIgcOptions.substr(0, 2);
|
|
if (prefix == "+ ") {
|
|
igcOptions += ' ' + userIgcOptions.substr(2);
|
|
} else if (prefix == "= ") {
|
|
igcOptions = userIgcOptions.substr(2);
|
|
} else {
|
|
throw std::runtime_error("Invalid ISPCRT_IGC_OPTIONS string" + userIgcOptions);
|
|
}
|
|
} else {
|
|
throw std::runtime_error("Invalid ISPCRT_IGC_OPTIONS string" + userIgcOptions);
|
|
}
|
|
}
|
|
|
|
// Create module
|
|
ze_module_desc_t moduleDesc = {};
|
|
#ifdef TEST_ZEBIN
|
|
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
|
#else
|
|
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
|
|
#endif
|
|
moduleDesc.pInputModule = codeBin;
|
|
moduleDesc.inputSize = codeSize;
|
|
moduleDesc.pBuildFlags = igcOptions.c_str();
|
|
// Add build log output for easier debugginer the tests
|
|
ze_module_build_log_handle_t buildlog;
|
|
if (zeModuleCreate(hContext, hDevice, &moduleDesc, &hModule, &buildlog) != ZE_RESULT_SUCCESS) {
|
|
size_t szLog = 0;
|
|
zeModuleBuildLogGetString(buildlog, &szLog, nullptr);
|
|
|
|
char *strLog = (char *)malloc(szLog);
|
|
zeModuleBuildLogGetString(buildlog, &szLog, strLog);
|
|
std::cout << "Build log:" << strLog << std::endl;
|
|
|
|
free(strLog);
|
|
}
|
|
L0_SAFE_CALL(zeModuleBuildLogDestroy(buildlog));
|
|
}
|
|
|
|
static void L0Create_Kernel(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_list_handle_t &hCommandList, ze_kernel_handle_t &hKernel, const char *name) {
|
|
// Create command list
|
|
ze_command_list_desc_t commandListDesc = {};
|
|
L0_SAFE_CALL(zeCommandListCreate(hContext, hDevice, &commandListDesc, &hCommandList));
|
|
|
|
ze_kernel_desc_t kernelDesc = {};
|
|
kernelDesc.pKernelName = name;
|
|
L0_SAFE_CALL(zeKernelCreate(hModule, &kernelDesc, &hKernel));
|
|
|
|
// Set device/shared indirect flags
|
|
ze_kernel_indirect_access_flags_t kernel_flags =
|
|
ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
|
|
}
|
|
|
|
static void L0Launch_Kernel(ze_command_queue_handle_t &hCommandQueue, ze_command_list_handle_t &hCommandList,
|
|
ze_kernel_handle_t &hKernel, int bufsize = 0, void *return_data = nullptr,
|
|
void *OUTBuff = nullptr, int groupSpaceWidth = 1, int groupSpaceHeight = 1) {
|
|
// set group size
|
|
uint32_t group_size = groupSpaceWidth * groupSpaceHeight;
|
|
L0_SAFE_CALL(zeKernelSetGroupSize(hKernel, /*x*/ groupSpaceWidth, /*y*/ groupSpaceHeight, /*z*/ 1));
|
|
|
|
// set grid size
|
|
ze_group_count_t dispatchTraits = {1, 1, 1};
|
|
|
|
// launch
|
|
L0_SAFE_CALL(zeCommandListAppendBarrier(hCommandList, nullptr, 0, nullptr));
|
|
|
|
L0_SAFE_CALL(zeCommandListAppendLaunchKernel(hCommandList, hKernel, &dispatchTraits, nullptr, 0, nullptr));
|
|
|
|
L0_SAFE_CALL(zeCommandListAppendBarrier(hCommandList, nullptr, 0, nullptr));
|
|
|
|
// copy result to host
|
|
if (return_data && OUTBuff)
|
|
L0_SAFE_CALL(zeCommandListAppendMemoryCopy(hCommandList, return_data, OUTBuff, bufsize, nullptr, 0, nullptr));
|
|
// dispatch & wait
|
|
L0_SAFE_CALL(zeCommandListClose(hCommandList));
|
|
L0_SAFE_CALL(zeCommandQueueExecuteCommandLists(hCommandQueue, 1, &hCommandList, nullptr));
|
|
L0_SAFE_CALL(zeCommandQueueSynchronize(hCommandQueue, (std::numeric_limits<uint64_t>::max)()));
|
|
}
|
|
|
|
static void L0Launch_F_V(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_v");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_Threads(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, int groupSpaceWidth,
|
|
int groupSpaceHeight) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_t");
|
|
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff, groupSpaceWidth,
|
|
groupSpaceHeight);
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_F(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vfloat_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_f");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vfloat_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_FI(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vfloat_data,
|
|
void *vint_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_fi");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr, *IN1Buff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &INBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(int), N * sizeof(int), hDevice, &IN1Buff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vfloat_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(zeCommandListAppendMemoryCopy(hCommandList, IN1Buff, vint_data, N * sizeof(int), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(IN1Buff), &IN1Buff));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, IN1Buff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_FU(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vfloat_data, float b) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_fu");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vfloat_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(float), &b));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_DU(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vdouble_data, double b) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_du");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(double), N * sizeof(double), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vdouble_data, N * sizeof(double), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(double), &b));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_DUF(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vdouble_data, float b) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_duf");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(double), N * sizeof(double), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vdouble_data, N * sizeof(double), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(float), &b));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_F_DI(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data, void *vdouble_data,
|
|
void *vint2_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "f_di");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr, *INBuff = nullptr, *IN1Buff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(double), N * sizeof(double), hDevice, &INBuff));
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(int), N * sizeof(int), hDevice, &IN1Buff));
|
|
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vdouble_data, N * sizeof(double), nullptr, 0, nullptr));
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, IN1Buff, vint2_data, N * sizeof(int), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(IN1Buff), &IN1Buff));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
L0_SAFE_CALL(zeMemFree(hContext, IN1Buff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Print_UF(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, float b) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "print_uf");
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(float), &b));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel);
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Print_F(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *vfloat_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "print_f");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vfloat_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(INBuff), &INBuff));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Print_FUF(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *vfloat_data, float b) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "print_fuf");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *INBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &INBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, INBuff, vfloat_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(INBuff), &INBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(float), &b));
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel);
|
|
|
|
L0_SAFE_CALL(zeMemFree(hContext, INBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Print_NO(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "print_no");
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel);
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Result(ze_device_handle_t &hDevice, ze_module_handle_t &hModule, ze_context_handle_t &hContext,
|
|
ze_command_queue_handle_t &hCommandQueue, void *return_data) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "result");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff);
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Print_Result(ze_device_handle_t &hDevice, ze_module_handle_t &hModule,
|
|
ze_context_handle_t &hContext, ze_command_queue_handle_t &hCommandQueue) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "print_result");
|
|
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel);
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
static void L0Launch_Result_Threads(ze_device_handle_t &hDevice, ze_module_handle_t &hModule,
|
|
ze_context_handle_t &hContext, ze_command_queue_handle_t &hCommandQueue,
|
|
void *return_data, int groupSpaceWidth, int groupSpaceHeight) {
|
|
ze_command_list_handle_t hCommandList;
|
|
ze_kernel_handle_t hKernel;
|
|
L0Create_Kernel(hDevice, hModule, hContext, hCommandList, hKernel, "result_t");
|
|
// allocate buffers
|
|
ze_device_mem_alloc_desc_t allocDesc = {};
|
|
void *OUTBuff = nullptr;
|
|
L0_SAFE_CALL(zeMemAllocDevice(hContext, &allocDesc, N * sizeof(float), N * sizeof(float), hDevice, &OUTBuff));
|
|
// copy buffers to device
|
|
L0_SAFE_CALL(
|
|
zeCommandListAppendMemoryCopy(hCommandList, OUTBuff, return_data, N * sizeof(float), nullptr, 0, nullptr));
|
|
// set kernel arguments
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 0, sizeof(OUTBuff), &OUTBuff));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 1, sizeof(int), &groupSpaceWidth));
|
|
L0_SAFE_CALL(zeKernelSetArgumentValue(hKernel, 2, sizeof(int), &groupSpaceHeight));
|
|
L0Launch_Kernel(hCommandQueue, hCommandList, hKernel, N * sizeof(float), return_data, OUTBuff, groupSpaceWidth,
|
|
groupSpaceHeight);
|
|
L0_SAFE_CALL(zeMemFree(hContext, OUTBuff));
|
|
|
|
L0_SAFE_CALL(zeKernelDestroy(hKernel));
|
|
L0_SAFE_CALL(zeCommandListDestroy(hCommandList));
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
// init data
|
|
struct alignas(4096) AlignedArray {
|
|
float data[N];
|
|
} returned_result, expected_result, vfloat;
|
|
struct alignas(4096) AlignedArray1 {
|
|
int data[N];
|
|
} vint, vint2;
|
|
struct alignas(4096) AlignedArray2 {
|
|
double data[N];
|
|
} vdouble;
|
|
|
|
for (int i = 0; i < N; ++i) {
|
|
returned_result.data[i] = float(-1e20);
|
|
vfloat.data[i] = float(i + 1);
|
|
vdouble.data[i] = double(i + 1);
|
|
vint.data[i] = 2 * (i + 1);
|
|
vint2.data[i] = i + 5;
|
|
}
|
|
|
|
void *return_data = returned_result.data;
|
|
void *expect_data = expected_result.data;
|
|
void *vfloat_data = vfloat.data;
|
|
void *vint_data = vint.data;
|
|
void *vint2_data = vint2.data;
|
|
void *vdouble_data = vdouble.data;
|
|
ze_device_handle_t hDevice = nullptr;
|
|
ze_module_handle_t hModule = nullptr;
|
|
ze_driver_handle_t hDriver = nullptr;
|
|
ze_context_handle_t hContext = nullptr;
|
|
ze_command_queue_handle_t hCommandQueue = nullptr;
|
|
L0InitContext(hDevice, hModule, hContext, hCommandQueue);
|
|
#if (TEST_SIG == 0)
|
|
L0Launch_F_V(hDevice, hModule, hContext, hCommandQueue, return_data);
|
|
#elif (TEST_SIG == 1)
|
|
L0Launch_F_F(hDevice, hModule, hContext, hCommandQueue, return_data, vfloat_data);
|
|
#elif (TEST_SIG == 2)
|
|
float num = 5.0f;
|
|
L0Launch_F_FU(hDevice, hModule, hContext, hCommandQueue, return_data, vfloat_data, num);
|
|
#elif (TEST_SIG == 3)
|
|
L0Launch_F_FI(hDevice, hModule, hContext, hCommandQueue, return_data, vfloat_data, vint_data);
|
|
#elif (TEST_SIG == 4)
|
|
double num = 5.0;
|
|
L0Launch_F_DU(hDevice, hModule, hContext, hCommandQueue, return_data, vdouble_data, num);
|
|
#elif (TEST_SIG == 5)
|
|
float num = 5.0f;
|
|
L0Launch_F_DUF(hDevice, hModule, hContext, hCommandQueue, return_data, vdouble_data, num);
|
|
#elif (TEST_SIG == 6)
|
|
L0Launch_F_DI(hDevice, hModule, hContext, hCommandQueue, return_data, vdouble_data, vint2_data);
|
|
#elif (TEST_SIG == 7)
|
|
// L0Launch_F_SZ(return_data);
|
|
#error "Currently unsupported for Xe"
|
|
#elif (TEST_SIG == 8)
|
|
int groupSpaceWidth = 2;
|
|
int groupSpaceHeight = 16;
|
|
assert(N >= groupSpaceWidth * groupSpaceHeight);
|
|
L0Launch_F_Threads(hDevice, hModule, hContext, hCommandQueue, return_data, groupSpaceWidth, groupSpaceHeight);
|
|
L0Launch_Result_Threads(hDevice, hModule, hContext, hCommandQueue, expect_data, groupSpaceWidth, groupSpaceHeight);
|
|
#elif (TEST_SIG == 32)
|
|
L0Launch_Print_UF(hDevice, hModule, hContext, hCommandQueue, 5.0f);
|
|
#elif (TEST_SIG == 33)
|
|
L0Launch_Print_F(hDevice, hModule, hContext, hCommandQueue, vfloat_data);
|
|
#elif (TEST_SIG == 34)
|
|
L0Launch_Print_FUF(hDevice, hModule, hContext, hCommandQueue, vfloat_data, 5.0f);
|
|
#elif (TEST_SIG == 35)
|
|
L0Launch_Print_NO(hDevice, hModule, hContext, hCommandQueue);
|
|
#else
|
|
#error "Unknown or unset TEST_SIG value"
|
|
#endif
|
|
#if 0
|
|
const bool verbose = true;
|
|
#else
|
|
const bool verbose = false;
|
|
#endif
|
|
#if (TEST_SIG < 8)
|
|
L0Launch_Result(hDevice, hModule, hContext, hCommandQueue, expect_data);
|
|
#elif (TEST_SIG >= 32)
|
|
L0Launch_Print_Result(hDevice, hModule, hContext, hCommandQueue);
|
|
return 0;
|
|
#endif
|
|
L0_SAFE_CALL(zeCommandQueueDestroy(hCommandQueue));
|
|
L0_SAFE_CALL(zeModuleDestroy(hModule));
|
|
L0_SAFE_CALL(zeContextDestroy(hContext));
|
|
|
|
// check results.
|
|
int errors = 0;
|
|
for (int i = 0; i < width(); ++i) {
|
|
if (fabs(returned_result.data[i] - expected_result.data[i]) > 16 * FLT_EPSILON) {
|
|
#ifdef EXPECT_FAILURE
|
|
// bingo, failed
|
|
return 1;
|
|
#else
|
|
printf("%s: value %d disagrees: returned %f [%a], expected %f [%a]\n", argv[0], i, returned_result.data[i],
|
|
returned_result.data[i], expected_result.data[i], expected_result.data[i]);
|
|
++errors;
|
|
#endif // EXPECT_FAILURE
|
|
}
|
|
}
|
|
|
|
#ifdef EXPECT_FAILURE
|
|
// Don't expect to get here
|
|
return 0;
|
|
#else
|
|
return errors > 0;
|
|
#endif
|
|
}
|