// Copyright 2020-2021 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause #pragma once #include "ispcrt.h" // std #include #include #include #include #include namespace ispcrt { ///////////////////////////////////////////////////////////////////////////// // Generic base handle wrapper ////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// template class GenericObject { public: GenericObject(HANDLE_T object = nullptr); virtual ~GenericObject(); GenericObject(const GenericObject ©); GenericObject(GenericObject &&move); GenericObject &operator=(const GenericObject ©); GenericObject &operator=(GenericObject &&move); HANDLE_T handle() const; operator bool() const; protected: void setHandle(HANDLE_T object); HANDLE_T m_handle{nullptr}; }; // Inlined definitions // template inline GenericObject::GenericObject(HANDLE_T object) : m_handle(object) {} template inline GenericObject::~GenericObject() { if (m_handle) ispcrtRelease(m_handle); } template inline GenericObject::GenericObject(const GenericObject ©) { m_handle = copy.handle(); ispcrtRetain(copy.handle()); } template inline GenericObject::GenericObject(GenericObject &&move) { m_handle = move.handle(); move.m_handle = nullptr; } template inline GenericObject &GenericObject::operator=(const GenericObject ©) { m_handle = copy.handle(); ispcrtRetain(copy.handle()); return *this; } template inline GenericObject &GenericObject::operator=(GenericObject &&move) { m_handle = move.handle(); move.m_handle = nullptr; return *this; } template inline HANDLE_T GenericObject::handle() const { return m_handle; } template inline GenericObject::operator bool() const { return handle() != nullptr; } template inline void GenericObject::setHandle(HANDLE_T object) { m_handle = object; ispcrtRetain(m_handle); } ///////////////////////////////////////////////////////////////////////////// // Future wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Future : public GenericObject { public: Future() = default; Future(ISPCRTFuture f); ~Future() = default; bool valid() const; uint64_t time() const; }; inline Future::Future(ISPCRTFuture f) : GenericObject(f) { if (f) ispcrtRetain(f); } inline bool Future::valid() const { return handle() && ispcrtFutureIsValid(handle()); } inline uint64_t Future::time() const { return ispcrtFutureGetTimeNs(handle()); } ///////////////////////////////////////////////////////////////////////////// // Device wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Device : public GenericObject { public: Device() = default; Device(ISPCRTDeviceType type); // deviceIdx is an index of the device in the list of supported devices // The list of the supported devices can be obtained with: // - allDevicesInformation() call or // - deviceCount() call and a series of deviceInformation() calls Device(ISPCRTDeviceType type, uint32_t deviceIdx); void* nativePlatformHandle() const; void* nativeDeviceHandle() const; void* nativeContextHandle() const; // static methods to get information about available devices static uint32_t deviceCount(ISPCRTDeviceType type); static ISPCRTDeviceInfo deviceInformation(ISPCRTDeviceType type, uint32_t deviceIdx); static std::vector allDevicesInformation(ISPCRTDeviceType type); }; // Inlined definitions // inline Device::Device(ISPCRTDeviceType type, uint32_t deviceIdx) : GenericObject(ispcrtGetDevice(type, deviceIdx)) { } inline Device::Device(ISPCRTDeviceType type) : Device(type, 0) {} inline void* Device::nativePlatformHandle() const { return ispcrtPlatformNativeHandle(handle()); } inline void* Device::nativeDeviceHandle() const { return ispcrtDeviceNativeHandle(handle()); } inline void* Device::nativeContextHandle() const { return ispcrtContextNativeHandle(handle()); } inline uint32_t Device::deviceCount(ISPCRTDeviceType type) { return ispcrtGetDeviceCount(type); } inline ISPCRTDeviceInfo Device::deviceInformation(ISPCRTDeviceType type, uint32_t deviceIdx) { ISPCRTDeviceInfo devInfo; ispcrtGetDeviceInfo(type, deviceIdx, &devInfo); return devInfo; } inline std::vector Device::allDevicesInformation(ISPCRTDeviceType type) { auto devCount = ispcrtGetDeviceCount(type); std::vector devInfo(devCount); for (int i = 0; i < devCount; i++) { ispcrtGetDeviceInfo(type, i, &devInfo[i]); } return devInfo; } ///////////////////////////////////////////////////////////////////////////// // Arrays (MemoryView wrapper w/ element type) ////////////////////////////// ///////////////////////////////////////////////////////////////////////////// enum class AllocType { Device, Shared }; template class Array : public GenericObject { public: template using EnableForSharedAllocation = typename std::enable_if<(alloc == AllocType::Shared)>::type*; template using EnableForDeviceAllocation = typename std::enable_if<(alloc == AllocType::Device)>::type*; Array() = default; //////// Constructors that can be used for Device memory allocations //////// // Construct from raw array // template Array(const Device &device, T *appMemory, size_t size, EnableForDeviceAllocation = 0); // Construct from std:: containers (array + vector) // template Array(const Device &device, std::array &arr, EnableForDeviceAllocation = 0); template Array(const Device &device, std::vector &v, EnableForDeviceAllocation = 0); // Construct from single object // template Array(const Device &device, T &obj, EnableForDeviceAllocation = 0); //////// Constructors that can be used for Shared memory allocations //////// // Allocate single object in shared memory template Array(const Device &device, EnableForSharedAllocation = 0); // Allocate multiple objects in shared memory template Array(const Device &device, size_t size, EnableForSharedAllocation = 0); //////// Methods valid only for Device memory allocations //////// // For shared memory objects those will return the same pointer // template T *hostPtr(EnableForDeviceAllocation = 0) const; template T *devicePtr(EnableForDeviceAllocation = 0) const; //////// Methods valid for Shared memory allocations //////// template T *sharedPtr(EnableForSharedAllocation = 0) const; //////// Methods for all types of memory allocations //////// size_t size() const; }; // Inlined definitions // // Device memory allocations template template inline Array::Array(const Device &device, T *appMemory, size_t size, EnableForDeviceAllocation) : GenericObject() { ISPCRTNewMemoryViewFlags flags; flags.allocType = ISPCRT_ALLOC_TYPE_DEVICE; setHandle(ispcrtNewMemoryView(device.handle(), appMemory, size * sizeof(T), &flags)); } template template inline Array::Array(const Device &device, std::array &arr, EnableForDeviceAllocation) : Array(device, arr.data(), N) {} template template inline Array::Array(const Device &device, std::vector &v, EnableForDeviceAllocation) : Array(device, v.data(), v.size()) {} template template inline Array::Array(const Device &device, T &obj, EnableForDeviceAllocation) : Array(device, &obj, 1) {} // Shared memory allocations template template inline Array::Array(const Device &device, EnableForSharedAllocation) : Array(device, 1) {} template template inline Array::Array(const Device &device, size_t size, EnableForSharedAllocation) : GenericObject() { ISPCRTNewMemoryViewFlags flags; flags.allocType = ISPCRT_ALLOC_TYPE_SHARED; setHandle(ispcrtNewMemoryView(device.handle(), nullptr, size * sizeof(T), &flags)); } // Device-only methods template template inline T *Array::hostPtr(EnableForDeviceAllocation) const { return (T *)ispcrtHostPtr(handle()); } template template inline T *Array::devicePtr(EnableForDeviceAllocation) const { return (T *)ispcrtDevicePtr(handle()); } // Shared-only methods template template inline T *Array::sharedPtr(EnableForSharedAllocation) const { return (T *)ispcrtSharedPtr(handle()); } // All other methods template inline size_t Array::size() const { return ispcrtSize(handle()) / sizeof(T); } ///////////////////////////////////////////////////////////////////////////// // Shared Memory Allocator ////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// template class SharedMemoryAllocator { public: using value_type = T; SharedMemoryAllocator() = delete; SharedMemoryAllocator(const Device &device) : m_device(device) {} SharedMemoryAllocator(const SharedMemoryAllocator&) = default; ~SharedMemoryAllocator() = default; SharedMemoryAllocator& operator=(const SharedMemoryAllocator&) = delete; T* allocate(const size_t n); void deallocate(T* const p, const size_t n); protected: Device m_device; std::unordered_map> m_ptrToArray; }; // Inlined definitions // template inline T *SharedMemoryAllocator::allocate(const size_t n) { // Allocate a memory that can be shared between the host and the device auto a = Array(m_device, n); void* ptr = a.sharedPtr(); if (ptr == nullptr) { throw std::bad_alloc(); } m_ptrToArray[ptr] = a; return static_cast(ptr); } template inline void SharedMemoryAllocator::deallocate(T* const p, const size_t) { if (m_ptrToArray.find(p) == m_ptrToArray.end()) throw std::invalid_argument("pointer not allocated with this allocator"); m_ptrToArray.erase(p); } // Provide convinience type for shared memory allocations template using SharedVector = std::vector>; ///////////////////////////////////////////////////////////////////////////// // Module wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Module : public GenericObject { public: Module() = default; Module(const Device &device, const char *moduleName, const ISPCRTModuleOptions &opts = ISPCRTModuleOptions{}); }; // Inlined definitions // inline Module::Module(const Device &device, const char *moduleName, const ISPCRTModuleOptions &opts) : GenericObject(ispcrtLoadModule(device.handle(), moduleName, opts)) {} ///////////////////////////////////////////////////////////////////////////// // Kernel wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Kernel : public GenericObject { public: Kernel() = default; Kernel(const Device &device, const Module &module, const char *kernelName); }; // Inlined definitions // inline Kernel::Kernel(const Device &device, const Module &module, const char *kernelName) : GenericObject(ispcrtNewKernel(device.handle(), module.handle(), kernelName)) {} ///////////////////////////////////////////////////////////////////////////// // TaskQueue wrapper //////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class TaskQueue : public GenericObject { public: TaskQueue() = default; TaskQueue(const Device &device); void barrier() const; template void copyToDevice(const Array &arr) const; template void copyToHost(const Array &arr) const; Future launch(const Kernel &k, size_t dim0) const; Future launch(const Kernel &k, size_t dim0, size_t dim1) const; Future launch(const Kernel &k, size_t dim0, size_t dim1, size_t dim2) const; template Future launch(const Kernel &k, const Array &p, size_t dim0) const; template Future launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1) const; template Future launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1, size_t dim2) const; // start executing, but don't wait for the completion void submit() const; // wait for the command list to be executed (start the execution if needed as well) void sync() const; void* nativeTaskQueueHandle() const; }; // Inlined definitions // inline TaskQueue::TaskQueue(const Device &device) : GenericObject(ispcrtNewTaskQueue(device.handle())) {} inline void TaskQueue::barrier() const { ispcrtDeviceBarrier(handle()); } template inline void TaskQueue::copyToDevice(const Array &arr) const { ispcrtCopyToDevice(handle(), arr.handle()); } template inline void TaskQueue::copyToHost(const Array &arr) const { ispcrtCopyToHost(handle(), arr.handle()); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0) const { return ispcrtLaunch1D(handle(), k.handle(), nullptr, dim0); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0, size_t dim1) const { return ispcrtLaunch2D(handle(), k.handle(), nullptr, dim0, dim1); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0, size_t dim1, size_t dim2) const { return ispcrtLaunch3D(handle(), k.handle(), nullptr, dim0, dim1, dim2); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0) const { return ispcrtLaunch1D(handle(), k.handle(), p.handle(), dim0); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1) const { return ispcrtLaunch2D(handle(), k.handle(), p.handle(), dim0, dim1); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1, size_t dim2) const { return ispcrtLaunch3D(handle(), k.handle(), p.handle(), dim0, dim1, dim2); } inline void TaskQueue::sync() const { ispcrtSync(handle()); } inline void* TaskQueue::nativeTaskQueueHandle() const { return ispcrtTaskQueueNativeHandle(handle()); } } // namespace ispcrt