// Copyright 2020-2022 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause #pragma once #include "ispcrt.h" // std #include #include #include #include #include namespace ispcrt { ///////////////////////////////////////////////////////////////////////////// // Generic base handle wrapper ////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// template class GenericObject { public: GenericObject(HANDLE_T object = nullptr); virtual ~GenericObject(); GenericObject(const GenericObject ©); GenericObject(GenericObject &&move); GenericObject &operator=(const GenericObject ©); GenericObject &operator=(GenericObject &&move); HANDLE_T handle() const; operator bool() const; protected: void setHandle(HANDLE_T object); HANDLE_T m_handle{nullptr}; }; // Inlined definitions // template inline GenericObject::GenericObject(HANDLE_T object) : m_handle(object) {} template inline GenericObject::~GenericObject() { if (m_handle) ispcrtRelease(m_handle); } template inline GenericObject::GenericObject(const GenericObject ©) { m_handle = copy.handle(); ispcrtRetain(copy.handle()); } template inline GenericObject::GenericObject(GenericObject &&move) { m_handle = move.handle(); move.m_handle = nullptr; } template inline GenericObject &GenericObject::operator=(const GenericObject ©) { m_handle = copy.handle(); ispcrtRetain(copy.handle()); return *this; } template inline GenericObject &GenericObject::operator=(GenericObject &&move) { m_handle = move.handle(); move.m_handle = nullptr; return *this; } template inline HANDLE_T GenericObject::handle() const { return m_handle; } template inline GenericObject::operator bool() const { return handle() != nullptr; } template inline void GenericObject::setHandle(HANDLE_T object) { m_handle = object; ispcrtRetain(m_handle); } ///////////////////////////////////////////////////////////////////////////// // Future wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Future : public GenericObject { public: Future() = default; Future(ISPCRTFuture f); ~Future() = default; bool valid() const; uint64_t time() const; }; inline Future::Future(ISPCRTFuture f) : GenericObject(f) { if (f) ispcrtRetain(f); } inline bool Future::valid() const { return handle() && ispcrtFutureIsValid(handle()); } inline uint64_t Future::time() const { return ispcrtFutureGetTimeNs(handle()); } ///////////////////////////////////////////////////////////////////////////// // Context wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Context : public GenericObject { public: Context() = default; Context(ISPCRTDeviceType type); Context(ISPCRTDeviceType type, ISPCRTGenericHandle nativeContextHandle); ~Context() = default; void* nativeContextHandle() const; }; // Inlined definitions // inline Context::Context(ISPCRTDeviceType type) : GenericObject(ispcrtNewContext(type)) {} inline Context::Context(ISPCRTDeviceType type, ISPCRTGenericHandle nativeContextHandle) : GenericObject(ispcrtGetContextFromNativeHandle(type, nativeContextHandle)) {} inline void* Context::nativeContextHandle() const { return ispcrtContextNativeHandle(handle()); } ///////////////////////////////////////////////////////////////////////////// // Device wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Module; class Device : public GenericObject { public: Device() = default; Device(ISPCRTDeviceType type); Device(const Context &context); // deviceIdx is an index of the device in the list of supported devices // The list of the supported devices can be obtained with: // - allDevicesInformation() call or // - deviceCount() call and a series of deviceInformation() calls Device(ISPCRTDeviceType type, uint32_t deviceIdx); Device(const Context &context, uint32_t deviceIdx); Device(const Context &context, ISPCRTGenericHandle nativeDeviceHandle); void* nativePlatformHandle() const; void* nativeDeviceHandle() const; void* nativeContextHandle() const; // static methods to get information about available devices static uint32_t deviceCount(ISPCRTDeviceType type); static ISPCRTDeviceInfo deviceInformation(ISPCRTDeviceType type, uint32_t deviceIdx); static std::vector allDevicesInformation(ISPCRTDeviceType type); // link modules void dynamicLinkModules(ISPCRTModule* modules, const uint32_t num); Module staticLinkModules(ISPCRTModule* modules, const uint32_t num); // check memory type ISPCRTAllocationType getMemoryAllocType(void *memBuffer); }; // Inlined definitions // inline Device::Device(ISPCRTDeviceType type, uint32_t deviceIdx) : GenericObject(ispcrtGetDevice(type, deviceIdx)) { } inline Device::Device(ISPCRTDeviceType type) : Device(type, uint32_t(0)) {} inline Device::Device(const Context &context, uint32_t deviceIdx) : GenericObject(ispcrtGetDeviceFromContext(context.handle(), deviceIdx)) { } inline Device::Device(const Context &context) : Device(context, uint32_t(0)) {} inline Device::Device(const Context &context, ISPCRTGenericHandle nativeDeviceHandle) : GenericObject(ispcrtGetDeviceFromNativeHandle(context.handle(), nativeDeviceHandle)) {} inline void* Device::nativePlatformHandle() const { return ispcrtPlatformNativeHandle(handle()); } inline void* Device::nativeDeviceHandle() const { return ispcrtDeviceNativeHandle(handle()); } inline void* Device::nativeContextHandle() const { return ispcrtDeviceContextNativeHandle(handle()); } inline uint32_t Device::deviceCount(ISPCRTDeviceType type) { return ispcrtGetDeviceCount(type); } inline ISPCRTDeviceInfo Device::deviceInformation(ISPCRTDeviceType type, uint32_t deviceIdx) { ISPCRTDeviceInfo devInfo; ispcrtGetDeviceInfo(type, deviceIdx, &devInfo); return devInfo; } inline std::vector Device::allDevicesInformation(ISPCRTDeviceType type) { auto devCount = ispcrtGetDeviceCount(type); std::vector devInfo(devCount); for (int i = 0; i < devCount; i++) { ispcrtGetDeviceInfo(type, i, &devInfo[i]); } return devInfo; } inline void Device::dynamicLinkModules(ISPCRTModule* modules, const uint32_t num) { ispcrtDynamicLinkModules(handle(), (ISPCRTModule*)modules, num); } inline ISPCRTAllocationType Device::getMemoryAllocType(void *memBuffer) { return ispcrtGetMemoryAllocType(handle(), memBuffer); } ///////////////////////////////////////////////////////////////////////////// // Arrays (MemoryView wrapper w/ element type) ////////////////////////////// ///////////////////////////////////////////////////////////////////////////// enum class AllocType { Device, Shared }; enum class SharedMemoryUsageHint { HostDeviceReadWrite, HostWriteDeviceRead, HostReadDeviceWrite }; template class Array : public GenericObject { public: template using EnableForSharedAllocation = typename std::enable_if<(alloc == AllocType::Shared)>::type*; template using EnableForDeviceAllocation = typename std::enable_if<(alloc == AllocType::Device)>::type*; Array() = default; //////// Constructors that can be used for Device memory allocations //////// // Construct from raw array // template Array(const Device &device, T *appMemory, size_t size, EnableForDeviceAllocation = 0); // Construct from std:: containers (array + vector) // template Array(const Device &device, std::array &arr, EnableForDeviceAllocation = 0); template Array(const Device &device, std::vector &v, EnableForDeviceAllocation = 0); // Construct from single object // template Array(const Device &device, T &obj, EnableForDeviceAllocation = 0); //////// Constructors that can be used for Shared memory allocations //////// // Allocate single object in shared memory template Array(const Device &device, SharedMemoryUsageHint SMAT = SharedMemoryUsageHint::HostDeviceReadWrite, EnableForSharedAllocation = 0); // Allocate single object in shared memory for context template Array(const Context &context, SharedMemoryUsageHint SMAT = SharedMemoryUsageHint::HostDeviceReadWrite, EnableForSharedAllocation = 0); // Allocate multiple objects in shared memory template Array(const Device &device, size_t size, SharedMemoryUsageHint SMAT = SharedMemoryUsageHint::HostDeviceReadWrite, EnableForSharedAllocation = 0); // Allocate multiple objects in shared memory for context template Array(const Context &context, size_t size, SharedMemoryUsageHint SMAT = SharedMemoryUsageHint::HostDeviceReadWrite, EnableForSharedAllocation = 0); //////// Methods valid only for Device memory allocations //////// // For shared memory objects those will return the same pointer // template T *hostPtr(EnableForDeviceAllocation = 0) const; template T *devicePtr(EnableForDeviceAllocation = 0) const; //////// Methods valid for Shared memory allocations //////// template T *sharedPtr(EnableForSharedAllocation = 0) const; template SharedMemoryUsageHint smType(EnableForSharedAllocation = 0) const; //////// Methods for all types of memory allocations //////// size_t size() const; AllocType type() const; private: SharedMemoryUsageHint m_smuh{SharedMemoryUsageHint::HostDeviceReadWrite}; }; // Inlined definitions // // Device memory allocations template template inline Array::Array(const Device &device, T *appMemory, size_t size, EnableForDeviceAllocation) : GenericObject() { ISPCRTNewMemoryViewFlags flags; flags.allocType = ISPCRT_ALLOC_TYPE_DEVICE; flags.smHint = ISPCRT_SM_HOST_DEVICE_READ_WRITE; m_handle = ispcrtNewMemoryView(device.handle(), appMemory, size * sizeof(T), &flags); } template template inline Array::Array(const Device &device, std::array &arr, EnableForDeviceAllocation) : Array(device, arr.data(), N) {} template template inline Array::Array(const Device &device, std::vector &v, EnableForDeviceAllocation) : Array(device, v.data(), v.size()) {} template template inline Array::Array(const Device &device, T &obj, EnableForDeviceAllocation) : Array(device, &obj, 1) {} // Shared memory allocations template template inline Array::Array(const Device &device, SharedMemoryUsageHint smuh, EnableForSharedAllocation) : Array(device, 1, smuh) {} template template inline Array::Array(const Context &context, SharedMemoryUsageHint smuh, EnableForSharedAllocation) : Array(context, 1, smuh) {} inline void set_shared_memory_view_flags(ISPCRTNewMemoryViewFlags *p, SharedMemoryUsageHint t) { p->allocType = ISPCRT_ALLOC_TYPE_SHARED; switch (t) { case SharedMemoryUsageHint::HostDeviceReadWrite: p->smHint = ISPCRT_SM_HOST_DEVICE_READ_WRITE; break; case SharedMemoryUsageHint::HostWriteDeviceRead: p->smHint = ISPCRT_SM_HOST_WRITE_DEVICE_READ; break; case SharedMemoryUsageHint::HostReadDeviceWrite: p->smHint = ISPCRT_SM_HOST_READ_DEVICE_WRITE; break; default: throw std::bad_alloc(); } } template template inline Array::Array(const Device &device, size_t size, SharedMemoryUsageHint smuh, EnableForSharedAllocation) : m_smuh(smuh), GenericObject() { ISPCRTNewMemoryViewFlags flags; set_shared_memory_view_flags(&flags, smuh); m_handle = ispcrtNewMemoryView(device.handle(), nullptr, size * sizeof(T), &flags); } template template inline Array::Array(const Context &context, size_t size, SharedMemoryUsageHint smuh, EnableForSharedAllocation) : m_smuh(smuh), GenericObject() { ISPCRTNewMemoryViewFlags flags; set_shared_memory_view_flags(&flags, smuh); m_handle = ispcrtNewMemoryViewForContext(context.handle(), nullptr, size * sizeof(T), &flags); } // Device-only methods template template inline T *Array::hostPtr(EnableForDeviceAllocation) const { return (T *)ispcrtHostPtr(handle()); } template template inline T *Array::devicePtr(EnableForDeviceAllocation) const { return (T *)ispcrtDevicePtr(handle()); } // Shared-only methods template template inline T *Array::sharedPtr(EnableForSharedAllocation) const { return (T *)ispcrtSharedPtr(handle()); } template template inline SharedMemoryUsageHint Array::smType(EnableForSharedAllocation) const { return m_smuh; } // All other methods template inline size_t Array::size() const { return ispcrtSize(handle()) / sizeof(T); } template inline AllocType Array::type() const { return AT; } ///////////////////////////////////////////////////////////////////////////// // Shared Memory Allocator ////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// template class SharedMemoryAllocator { public: using value_type = T; SharedMemoryAllocator() = delete; SharedMemoryAllocator(const Context &context, SharedMemoryUsageHint smuh = SharedMemoryUsageHint::HostDeviceReadWrite) : m_context(context), m_smuh(smuh) {} SharedMemoryAllocator(const SharedMemoryAllocator&) = default; ~SharedMemoryAllocator() = default; SharedMemoryAllocator& operator=(const SharedMemoryAllocator&) = delete; T* allocate(const size_t n); void deallocate(T* const p, const size_t n); SharedMemoryUsageHint smType() const { return m_smuh; } protected: Context m_context; SharedMemoryUsageHint m_smuh; std::unordered_map> m_ptrToArray; }; // Inlined definitions // template inline T *SharedMemoryAllocator::allocate(const size_t n) { // Allocate a memory that can be shared between the host and the device auto a = Array(m_context, n, m_smuh); void* ptr = a.sharedPtr(); if (ptr == nullptr) { throw std::bad_alloc(); } m_ptrToArray[ptr] = a; return static_cast(ptr); } template inline void SharedMemoryAllocator::deallocate(T* const p, const size_t) { if (m_ptrToArray.find(p) == m_ptrToArray.end()) throw std::invalid_argument("pointer not allocated with this allocator"); m_ptrToArray.erase(p); } // Provide convinience type for shared memory allocations template using SharedVector = std::vector>; ///////////////////////////////////////////////////////////////////////////// // Module wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Module : public GenericObject { public: Module() = default; Module(const Device &device, const char *moduleName, const ISPCRTModuleOptions &opts = ISPCRTModuleOptions{}); Module(ISPCRTModule module); void *functionPtr(const char *functionName); }; // Inlined definitions // inline Module::Module(const Device &device, const char *moduleName, const ISPCRTModuleOptions &opts) : GenericObject(ispcrtLoadModule(device.handle(), moduleName, opts)) {} inline Module::Module(ISPCRTModule module) : GenericObject(module) {} inline void* Module::functionPtr(const char *functionName){ return ispcrtFunctionPtr(handle(), functionName); } inline Module Device::staticLinkModules(ISPCRTModule* modules, const uint32_t num) { return Module(ispcrtStaticLinkModules(handle(), (ISPCRTModule*)modules, num)); } ///////////////////////////////////////////////////////////////////////////// // Kernel wrapper /////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class Kernel : public GenericObject { public: Kernel() = default; Kernel(const Device &device, const Module &module, const char *kernelName); }; // Inlined definitions // inline Kernel::Kernel(const Device &device, const Module &module, const char *kernelName) : GenericObject(ispcrtNewKernel(device.handle(), module.handle(), kernelName)) {} ///////////////////////////////////////////////////////////////////////////// // TaskQueue wrapper //////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// class TaskQueue : public GenericObject { public: TaskQueue() = default; TaskQueue(const Device &device); void barrier() const; template void copyToDevice(const Array &arr) const; template void copyToHost(const Array &arr) const; template void copyArray(const Array &arrDst, const Array &arrSrc, const size_t size) const; Future launch(const Kernel &k, size_t dim0) const; Future launch(const Kernel &k, size_t dim0, size_t dim1) const; Future launch(const Kernel &k, size_t dim0, size_t dim1, size_t dim2) const; template Future launch(const Kernel &k, const Array &p, size_t dim0) const; template Future launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1) const; template Future launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1, size_t dim2) const; // start executing, but don't wait for the completion void submit() const; // wait for the command list to be executed (start the execution if needed as well) void sync() const; void* nativeTaskQueueHandle() const; }; // Inlined definitions // inline TaskQueue::TaskQueue(const Device &device) : GenericObject(ispcrtNewTaskQueue(device.handle())) {} inline void TaskQueue::barrier() const { ispcrtDeviceBarrier(handle()); } template inline void TaskQueue::copyToDevice(const Array &arr) const { ispcrtCopyToDevice(handle(), arr.handle()); } template inline void TaskQueue::copyToHost(const Array &arr) const { ispcrtCopyToHost(handle(), arr.handle()); } template inline void TaskQueue::copyArray(const Array &arrDst, const Array &arrSrc, const size_t size) const { ispcrtCopyMemoryView(handle(), arrDst.handle(), arrSrc.handle(), size * sizeof(T)); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0) const { return ispcrtLaunch1D(handle(), k.handle(), nullptr, dim0); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0, size_t dim1) const { return ispcrtLaunch2D(handle(), k.handle(), nullptr, dim0, dim1); } inline Future TaskQueue::launch(const Kernel &k, size_t dim0, size_t dim1, size_t dim2) const { return ispcrtLaunch3D(handle(), k.handle(), nullptr, dim0, dim1, dim2); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0) const { return ispcrtLaunch1D(handle(), k.handle(), p.handle(), dim0); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1) const { return ispcrtLaunch2D(handle(), k.handle(), p.handle(), dim0, dim1); } template inline Future TaskQueue::launch(const Kernel &k, const Array &p, size_t dim0, size_t dim1, size_t dim2) const { return ispcrtLaunch3D(handle(), k.handle(), p.handle(), dim0, dim1, dim2); } inline void TaskQueue::sync() const { ispcrtSync(handle()); } inline void* TaskQueue::nativeTaskQueueHandle() const { return ispcrtTaskQueueNativeHandle(handle()); } } // namespace ispcrt