608 lines
22 KiB
C++
608 lines
22 KiB
C++
#pragma once
|
|
|
|
// UE4-adapted version of https://github.com/max0x7ba/atomic_queue
|
|
// Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <type_traits>
|
|
#include <utility>
|
|
#include <atomic>
|
|
|
|
#if PLATFORM_USE_SSE2_FOR_THREAD_YIELD
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
namespace atomic_queue {
|
|
|
|
using std::uint32_t;
|
|
using std::uint64_t;
|
|
using std::uint8_t;
|
|
|
|
static inline void spin_loop_pause() noexcept {
|
|
// TODO(andriy): x86/x64 only
|
|
#if PLATFORM_USE_SSE2_FOR_THREAD_YIELD
|
|
_mm_pause();
|
|
#elif PLATFORM_CPU_ARM_FAMILY
|
|
# if _MSC_VER
|
|
__yield();
|
|
# else
|
|
__asm__ __volatile__("yield");
|
|
# endif
|
|
#else
|
|
#error Implement this for your platform/architecture
|
|
#endif
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
namespace details {
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<size_t elements_per_cache_line> struct GetCacheLineIndexBits { static int constexpr value = 0; };
|
|
template<> struct GetCacheLineIndexBits<64> { static int constexpr value = 6; };
|
|
template<> struct GetCacheLineIndexBits<32> { static int constexpr value = 5; };
|
|
template<> struct GetCacheLineIndexBits<16> { static int constexpr value = 4; };
|
|
template<> struct GetCacheLineIndexBits< 8> { static int constexpr value = 3; };
|
|
template<> struct GetCacheLineIndexBits< 4> { static int constexpr value = 2; };
|
|
template<> struct GetCacheLineIndexBits< 2> { static int constexpr value = 1; };
|
|
|
|
template<bool minimize_contention, unsigned array_size, size_t elements_per_cache_line>
|
|
struct GetIndexShuffleBits {
|
|
static int constexpr bits = GetCacheLineIndexBits<elements_per_cache_line>::value;
|
|
static unsigned constexpr min_size = 1u << (bits * 2);
|
|
static int constexpr value = array_size < min_size ? 0 : bits;
|
|
};
|
|
|
|
template<unsigned array_size, size_t elements_per_cache_line>
|
|
struct GetIndexShuffleBits<false, array_size, elements_per_cache_line> {
|
|
static int constexpr value = 0;
|
|
};
|
|
|
|
// Multiple writers/readers contend on the same cache line when storing/loading elements at
|
|
// subsequent indexes, aka false sharing. For power of 2 ring buffer size it is possible to re-map
|
|
// the index in such a way that each subsequent element resides on another cache line, which
|
|
// minimizes contention. This is done by swapping the lowest order N bits (which are the index of
|
|
// the element within the cache line) with the next N bits (which are the index of the cache line)
|
|
// of the element index.
|
|
template<int BITS>
|
|
constexpr unsigned remap_index(unsigned index) noexcept {
|
|
constexpr unsigned MASK = (1u << BITS) - 1;
|
|
unsigned mix = (index ^ (index >> BITS))& MASK;
|
|
return index ^ mix ^ (mix << BITS);
|
|
}
|
|
|
|
template<>
|
|
constexpr unsigned remap_index<0>(unsigned index) noexcept {
|
|
return index;
|
|
}
|
|
|
|
template<int BITS, class T>
|
|
constexpr T& map(T* elements, unsigned index) noexcept {
|
|
index = remap_index<BITS>(index);
|
|
return elements[index];
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
constexpr uint32_t round_up_to_power_of_2(uint32_t a) noexcept {
|
|
--a;
|
|
a |= a >> 1;
|
|
a |= a >> 2;
|
|
a |= a >> 4;
|
|
a |= a >> 8;
|
|
a |= a >> 16;
|
|
++a;
|
|
return a;
|
|
}
|
|
|
|
constexpr uint64_t round_up_to_power_of_2(uint64_t a) noexcept {
|
|
--a;
|
|
a |= a >> 1;
|
|
a |= a >> 2;
|
|
a |= a >> 4;
|
|
a |= a >> 8;
|
|
a |= a >> 16;
|
|
a |= a >> 32;
|
|
++a;
|
|
return a;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
} // namespace details
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class Derived>
|
|
class AtomicQueueCommon {
|
|
protected:
|
|
// Put these on different cache lines to avoid false sharing between readers and writers.
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) std::atomic<unsigned> head_ = {};
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) std::atomic<unsigned> tail_ = {};
|
|
|
|
// The special member functions are not thread-safe.
|
|
|
|
AtomicQueueCommon() noexcept = default;
|
|
|
|
AtomicQueueCommon(AtomicQueueCommon const& b) noexcept
|
|
: head_(b.head_.load(std::memory_order_relaxed))
|
|
, tail_(b.tail_.load(std::memory_order_relaxed)) {}
|
|
|
|
AtomicQueueCommon& operator=(AtomicQueueCommon const& b) noexcept {
|
|
head_.store(b.head_.load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
tail_.store(b.tail_.load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
return *this;
|
|
}
|
|
|
|
void swap(AtomicQueueCommon& b) noexcept {
|
|
unsigned h = head_.load(std::memory_order_relaxed);
|
|
unsigned t = tail_.load(std::memory_order_relaxed);
|
|
head_.store(b.head_.load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
tail_.store(b.tail_.load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
b.head_.store(h, std::memory_order_relaxed);
|
|
b.tail_.store(t, std::memory_order_relaxed);
|
|
}
|
|
|
|
template<class T, T NIL>
|
|
static T do_pop_atomic(std::atomic<T>& q_element) noexcept {
|
|
if (Derived::spsc_) {
|
|
for (;;) {
|
|
T element = q_element.load(std::memory_order_relaxed);
|
|
if (LIKELY(element != NIL)) {
|
|
q_element.store(NIL, std::memory_order_release);
|
|
return element;
|
|
}
|
|
if (Derived::maximize_throughput_)
|
|
spin_loop_pause();
|
|
}
|
|
}
|
|
else {
|
|
for (;;) {
|
|
T element = q_element.exchange(NIL, std::memory_order_release); // (2) The store to wait for.
|
|
if (LIKELY(element != NIL))
|
|
return element;
|
|
// Do speculative loads while busy-waiting to avoid broadcasting RFO messages.
|
|
do
|
|
spin_loop_pause();
|
|
while (Derived::maximize_throughput_ && q_element.load(std::memory_order_relaxed) == NIL);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class T, T NIL>
|
|
static void do_push_atomic(T element, std::atomic<T>& q_element) noexcept {
|
|
assert(element != NIL);
|
|
if (Derived::spsc_) {
|
|
while (UNLIKELY(q_element.load(std::memory_order_relaxed) != NIL))
|
|
if (Derived::maximize_throughput_)
|
|
spin_loop_pause();
|
|
q_element.store(element, std::memory_order_release);
|
|
}
|
|
else {
|
|
for (T expected = NIL; UNLIKELY(!q_element.compare_exchange_strong(expected, element, std::memory_order_release, std::memory_order_relaxed)); expected = NIL) {
|
|
do
|
|
spin_loop_pause(); // (1) Wait for store (2) to complete.
|
|
while (Derived::maximize_throughput_ && q_element.load(std::memory_order_relaxed) != NIL);
|
|
}
|
|
}
|
|
}
|
|
|
|
enum State : unsigned char { EMPTY, STORING, STORED, LOADING };
|
|
|
|
template<class T>
|
|
static T do_pop_any(std::atomic<unsigned char>& state, T& q_element) noexcept {
|
|
if (Derived::spsc_) {
|
|
while (UNLIKELY(state.load(std::memory_order_acquire) != STORED))
|
|
if (Derived::maximize_throughput_)
|
|
spin_loop_pause();
|
|
T element{ std::move(q_element) };
|
|
state.store(EMPTY, std::memory_order_release);
|
|
return element;
|
|
}
|
|
else {
|
|
for (;;) {
|
|
unsigned char expected = STORED;
|
|
if (LIKELY(state.compare_exchange_strong(expected, LOADING, std::memory_order_relaxed, std::memory_order_relaxed))) {
|
|
T element{ std::move(q_element) };
|
|
state.store(EMPTY, std::memory_order_release);
|
|
return element;
|
|
}
|
|
// Do speculative loads while busy-waiting to avoid broadcasting RFO messages.
|
|
do
|
|
spin_loop_pause();
|
|
while (Derived::maximize_throughput_ && state.load(std::memory_order_relaxed) != STORED);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class U, class T>
|
|
static void do_push_any(U&& element, std::atomic<unsigned char>& state, T& q_element) noexcept {
|
|
if (Derived::spsc_) {
|
|
while (UNLIKELY(state.load(std::memory_order_acquire) != EMPTY))
|
|
if (Derived::maximize_throughput_)
|
|
spin_loop_pause();
|
|
q_element = std::forward<U>(element);
|
|
state.store(STORED, std::memory_order_release);
|
|
}
|
|
else {
|
|
for (;;) {
|
|
unsigned char expected = EMPTY;
|
|
if (LIKELY(state.compare_exchange_strong(expected, STORING, std::memory_order_relaxed, std::memory_order_relaxed))) {
|
|
q_element = std::forward<U>(element);
|
|
state.store(STORED, std::memory_order_release);
|
|
return;
|
|
}
|
|
// Do speculative loads while busy-waiting to avoid broadcasting RFO messages.
|
|
do
|
|
spin_loop_pause();
|
|
while (Derived::maximize_throughput_ && state.load(std::memory_order_relaxed) != EMPTY);
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
template<class T>
|
|
bool try_push(T&& element) noexcept {
|
|
auto head = head_.load(std::memory_order_relaxed);
|
|
if (Derived::spsc_) {
|
|
if (static_cast<int>(head - tail_.load(std::memory_order_relaxed)) >= static_cast<int>(static_cast<Derived&>(*this).size_))
|
|
return false;
|
|
head_.store(head + 1, std::memory_order_relaxed);
|
|
}
|
|
else {
|
|
do {
|
|
if (static_cast<int>(head - tail_.load(std::memory_order_relaxed)) >= static_cast<int>(static_cast<Derived&>(*this).size_))
|
|
return false;
|
|
} while (UNLIKELY(!head_.compare_exchange_strong(head, head + 1, std::memory_order_acquire, std::memory_order_relaxed))); // This loop is not FIFO.
|
|
}
|
|
|
|
static_cast<Derived&>(*this).do_push(std::forward<T>(element), head);
|
|
return true;
|
|
}
|
|
|
|
template<class T>
|
|
bool try_pop(T& element) noexcept {
|
|
auto tail = tail_.load(std::memory_order_relaxed);
|
|
if (Derived::spsc_) {
|
|
if (static_cast<int>(head_.load(std::memory_order_relaxed) - tail) <= 0)
|
|
return false;
|
|
tail_.store(tail + 1, std::memory_order_relaxed);
|
|
}
|
|
else {
|
|
do {
|
|
if (static_cast<int>(head_.load(std::memory_order_relaxed) - tail) <= 0)
|
|
return false;
|
|
} while (UNLIKELY(!tail_.compare_exchange_strong(tail, tail + 1, std::memory_order_acquire, std::memory_order_relaxed))); // This loop is not FIFO.
|
|
}
|
|
|
|
element = static_cast<Derived&>(*this).do_pop(tail);
|
|
return true;
|
|
}
|
|
|
|
template<class T>
|
|
void push(T&& element) noexcept {
|
|
unsigned head;
|
|
if (Derived::spsc_) {
|
|
head = head_.load(std::memory_order_relaxed);
|
|
head_.store(head + 1, std::memory_order_relaxed);
|
|
}
|
|
else {
|
|
constexpr auto memory_order = Derived::total_order_ ? std::memory_order_seq_cst : std::memory_order_acquire;
|
|
head = head_.fetch_add(1, memory_order); // FIFO and total order on Intel regardless, as of 2019.
|
|
}
|
|
static_cast<Derived&>(*this).do_push(std::forward<T>(element), head);
|
|
}
|
|
|
|
auto pop() noexcept {
|
|
unsigned tail;
|
|
if (Derived::spsc_) {
|
|
tail = tail_.load(std::memory_order_relaxed);
|
|
tail_.store(tail + 1, std::memory_order_relaxed);
|
|
}
|
|
else {
|
|
constexpr auto memory_order = Derived::total_order_ ? std::memory_order_seq_cst : std::memory_order_acquire;
|
|
tail = tail_.fetch_add(1, memory_order); // FIFO and total order on Intel regardless, as of 2019.
|
|
}
|
|
return static_cast<Derived&>(*this).do_pop(tail);
|
|
}
|
|
|
|
bool was_empty() const noexcept {
|
|
return static_cast<int>(head_.load(std::memory_order_relaxed) - tail_.load(std::memory_order_relaxed)) <= 0;
|
|
}
|
|
|
|
bool was_full() const noexcept {
|
|
return static_cast<int>(head_.load(std::memory_order_relaxed) - tail_.load(std::memory_order_relaxed)) >= static_cast<int>(static_cast<Derived const&>(*this).size_);
|
|
}
|
|
|
|
unsigned capacity() const noexcept {
|
|
return static_cast<Derived const&>(*this).size_;
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class T, unsigned SIZE, T NIL = T{}, bool MINIMIZE_CONTENTION = true, bool MAXIMIZE_THROUGHPUT = true, bool TOTAL_ORDER = false, bool SPSC = false >
|
|
class AtomicQueue : public AtomicQueueCommon<AtomicQueue<T, SIZE, NIL, MINIMIZE_CONTENTION, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>> {
|
|
using Base = AtomicQueueCommon<AtomicQueue<T, SIZE, NIL, MINIMIZE_CONTENTION, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>;
|
|
friend Base;
|
|
|
|
static constexpr unsigned size_ = MINIMIZE_CONTENTION ? details::round_up_to_power_of_2(SIZE) : SIZE;
|
|
static constexpr int SHUFFLE_BITS = details::GetIndexShuffleBits<MINIMIZE_CONTENTION, size_, PLATFORM_CACHE_LINE_SIZE / sizeof(std::atomic<T>)>::value;
|
|
static constexpr bool total_order_ = TOTAL_ORDER;
|
|
static constexpr bool spsc_ = SPSC;
|
|
static constexpr bool maximize_throughput_ = MAXIMIZE_THROUGHPUT;
|
|
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) std::atomic<T> elements_[size_] = {}; // Empty elements are NIL.
|
|
|
|
T do_pop(unsigned tail) noexcept {
|
|
std::atomic<T>& q_element = details::map<SHUFFLE_BITS>(elements_, tail % size_);
|
|
return Base::template do_pop_atomic<T, NIL>(q_element);
|
|
}
|
|
|
|
void do_push(T element, unsigned head) noexcept {
|
|
std::atomic<T>& q_element = details::map<SHUFFLE_BITS>(elements_, head % size_);
|
|
Base::template do_push_atomic<T, NIL>(element, q_element);
|
|
}
|
|
|
|
public:
|
|
using value_type = T;
|
|
|
|
AtomicQueue() noexcept {
|
|
assert(std::atomic<T>{NIL}.is_lock_free()); // This queue is for atomic elements only. AtomicQueue2 is for non-atomic ones.
|
|
if (T{} != NIL)
|
|
for (auto& element : elements_)
|
|
element.store(NIL, std::memory_order_relaxed);
|
|
}
|
|
|
|
AtomicQueue(AtomicQueue const&) = delete;
|
|
AtomicQueue& operator=(AtomicQueue const&) = delete;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class T, unsigned SIZE, bool MINIMIZE_CONTENTION = true, bool MAXIMIZE_THROUGHPUT = true, bool TOTAL_ORDER = false, bool SPSC = false>
|
|
class AtomicQueue2 : public AtomicQueueCommon<AtomicQueue2<T, SIZE, MINIMIZE_CONTENTION, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>> {
|
|
using Base = AtomicQueueCommon<AtomicQueue2<T, SIZE, MINIMIZE_CONTENTION, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>;
|
|
using State = typename Base::State;
|
|
friend Base;
|
|
|
|
static constexpr unsigned size_ = MINIMIZE_CONTENTION ? details::round_up_to_power_of_2(SIZE) : SIZE;
|
|
static constexpr int SHUFFLE_BITS = details::GetIndexShuffleBits<MINIMIZE_CONTENTION, size_, PLATFORM_CACHE_LINE_SIZE / sizeof(State)>::value;
|
|
static constexpr bool total_order_ = TOTAL_ORDER;
|
|
static constexpr bool spsc_ = SPSC;
|
|
static constexpr bool maximize_throughput_ = MAXIMIZE_THROUGHPUT;
|
|
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) std::atomic<unsigned char> states_[size_] = {};
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) T elements_[size_] = {};
|
|
|
|
T do_pop(unsigned tail) noexcept {
|
|
unsigned index = details::remap_index<SHUFFLE_BITS>(tail % size_);
|
|
return Base::template do_pop_any(states_[index], elements_[index]);
|
|
}
|
|
|
|
template<class U>
|
|
void do_push(U&& element, unsigned head) noexcept {
|
|
unsigned index = details::remap_index<SHUFFLE_BITS>(head % size_);
|
|
Base::template do_push_any(std::forward<U>(element), states_[index], elements_[index]);
|
|
}
|
|
|
|
public:
|
|
using value_type = T;
|
|
|
|
AtomicQueue2() noexcept = default;
|
|
AtomicQueue2(AtomicQueue2 const&) = delete;
|
|
AtomicQueue2& operator=(AtomicQueue2 const&) = delete;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class T, class A = std::allocator<T>, T NIL = T{}, bool MAXIMIZE_THROUGHPUT = true, bool TOTAL_ORDER = false, bool SPSC = false >
|
|
class AtomicQueueB : public AtomicQueueCommon<AtomicQueueB<T, A, NIL, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>,
|
|
private std::allocator_traits<A>::template rebind_alloc<std::atomic<T>> {
|
|
using Base = AtomicQueueCommon<AtomicQueueB<T, A, NIL, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>;
|
|
friend Base;
|
|
|
|
static constexpr bool total_order_ = TOTAL_ORDER;
|
|
static constexpr bool spsc_ = SPSC;
|
|
static constexpr bool maximize_throughput_ = MAXIMIZE_THROUGHPUT;
|
|
|
|
using AllocatorElements = typename std::allocator_traits<A>::template rebind_alloc<std::atomic<T>>;
|
|
|
|
static constexpr auto ELEMENTS_PER_CACHE_LINE = PLATFORM_CACHE_LINE_SIZE / sizeof(std::atomic<T>);
|
|
static_assert(ELEMENTS_PER_CACHE_LINE, "Unexpected ELEMENTS_PER_CACHE_LINE.");
|
|
|
|
static constexpr auto SHUFFLE_BITS = details::GetCacheLineIndexBits<ELEMENTS_PER_CACHE_LINE>::value;
|
|
static_assert(SHUFFLE_BITS, "Unexpected SHUFFLE_BITS.");
|
|
|
|
// AtomicQueueCommon members are stored into by readers and writers.
|
|
// Allocate these immutable members on another cache line which never gets invalidated by stores.
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) unsigned size_;
|
|
std::atomic<T>* elements_;
|
|
|
|
T do_pop(unsigned tail) noexcept {
|
|
std::atomic<T>& q_element = details::map<SHUFFLE_BITS>(elements_, tail & (size_ - 1));
|
|
return Base::template do_pop_atomic<T, NIL>(q_element);
|
|
}
|
|
|
|
void do_push(T element, unsigned head) noexcept {
|
|
std::atomic<T>& q_element = details::map<SHUFFLE_BITS>(elements_, head & (size_ - 1));
|
|
Base::template do_push_atomic<T, NIL>(element, q_element);
|
|
}
|
|
|
|
public:
|
|
using value_type = T;
|
|
|
|
// The special member functions are not thread-safe.
|
|
|
|
AtomicQueueB(unsigned size)
|
|
: size_(std::max(details::round_up_to_power_of_2(size), 1u << (SHUFFLE_BITS * 2)))
|
|
, elements_(AllocatorElements::allocate(size_)) {
|
|
assert(std::atomic<T>{NIL}.is_lock_free()); // This queue is for atomic elements only. AtomicQueueB2 is for non-atomic ones.
|
|
for (auto p = elements_, q = elements_ + size_; p < q; ++p)
|
|
p->store(NIL, std::memory_order_relaxed);
|
|
}
|
|
|
|
AtomicQueueB(AtomicQueueB&& b) noexcept
|
|
: Base(static_cast<Base&&>(b))
|
|
, AllocatorElements(static_cast<AllocatorElements&&>(b)) // TODO: This must be noexcept, static_assert that.
|
|
, size_(b.size_)
|
|
, elements_(b.elements_) {
|
|
b.size_ = 0;
|
|
b.elements_ = 0;
|
|
}
|
|
|
|
AtomicQueueB& operator=(AtomicQueueB&& b) noexcept {
|
|
b.swap(*this);
|
|
return *this;
|
|
}
|
|
|
|
~AtomicQueueB() noexcept {
|
|
if (elements_)
|
|
AllocatorElements::deallocate(elements_, size_); // TODO: This must be noexcept, static_assert that.
|
|
}
|
|
|
|
void swap(AtomicQueueB& b) noexcept {
|
|
using std::swap;
|
|
this->Base::swap(b);
|
|
swap(static_cast<AllocatorElements&>(*this), static_cast<AllocatorElements&>(b));
|
|
swap(size_, b.size_);
|
|
swap(elements_, b.elements_);
|
|
}
|
|
|
|
friend void swap(AtomicQueueB& a, AtomicQueueB& b) {
|
|
a.swap(b);
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class T, class A = std::allocator<T>, bool MAXIMIZE_THROUGHPUT = true, bool TOTAL_ORDER = false, bool SPSC = false>
|
|
class AtomicQueueB2 : public AtomicQueueCommon<AtomicQueueB2<T, A, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>,
|
|
private A,
|
|
private std::allocator_traits<A>::template rebind_alloc<std::atomic<uint8_t>> {
|
|
using Base = AtomicQueueCommon<AtomicQueueB2<T, A, MAXIMIZE_THROUGHPUT, TOTAL_ORDER, SPSC>>;
|
|
using State = typename Base::State;
|
|
friend Base;
|
|
|
|
static constexpr bool total_order_ = TOTAL_ORDER;
|
|
static constexpr bool spsc_ = SPSC;
|
|
static constexpr bool maximize_throughput_ = MAXIMIZE_THROUGHPUT;
|
|
|
|
using AllocatorElements = A;
|
|
using AllocatorStates = typename std::allocator_traits<A>::template rebind_alloc<std::atomic<uint8_t>>;
|
|
|
|
// AtomicQueueCommon members are stored into by readers and writers.
|
|
// Allocate these immutable members on another cache line which never gets invalidated by stores.
|
|
alignas(PLATFORM_CACHE_LINE_SIZE) unsigned size_;
|
|
std::atomic<unsigned char>* states_;
|
|
T* elements_;
|
|
|
|
static constexpr auto STATES_PER_CACHE_LINE = PLATFORM_CACHE_LINE_SIZE / sizeof(State);
|
|
static_assert(STATES_PER_CACHE_LINE, "Unexpected STATES_PER_CACHE_LINE.");
|
|
|
|
static constexpr auto SHUFFLE_BITS = details::GetCacheLineIndexBits<STATES_PER_CACHE_LINE>::value;
|
|
static_assert(SHUFFLE_BITS, "Unexpected SHUFFLE_BITS.");
|
|
|
|
T do_pop(unsigned tail) noexcept {
|
|
unsigned index = details::remap_index<SHUFFLE_BITS>(tail & (size_ - 1));
|
|
return Base::template do_pop_any(states_[index], elements_[index]);
|
|
}
|
|
|
|
template<class U>
|
|
void do_push(U&& element, unsigned head) noexcept {
|
|
unsigned index = details::remap_index<SHUFFLE_BITS>(head & (size_ - 1));
|
|
Base::template do_push_any(std::forward<U>(element), states_[index], elements_[index]);
|
|
}
|
|
|
|
public:
|
|
using value_type = T;
|
|
|
|
// The special member functions are not thread-safe.
|
|
|
|
AtomicQueueB2(unsigned size)
|
|
: size_(std::max(details::round_up_to_power_of_2(size), 1u << (SHUFFLE_BITS * 2)))
|
|
, states_(AllocatorStates::allocate(size_))
|
|
, elements_(AllocatorElements::allocate(size_)) {
|
|
for (auto p = states_, q = states_ + size_; p < q; ++p)
|
|
p->store(Base::EMPTY, std::memory_order_relaxed);
|
|
|
|
AllocatorElements& ae = *this;
|
|
for (auto p = elements_, q = elements_ + size_; p < q; ++p)
|
|
std::allocator_traits<AllocatorElements>::construct(ae, p);
|
|
}
|
|
|
|
AtomicQueueB2(AtomicQueueB2&& b) noexcept
|
|
: Base(static_cast<Base&&>(b))
|
|
, AllocatorElements(static_cast<AllocatorElements&&>(b)) // TODO: This must be noexcept, static_assert that.
|
|
, AllocatorStates(static_cast<AllocatorStates&&>(b)) // TODO: This must be noexcept, static_assert that.
|
|
, size_(b.size_)
|
|
, states_(b.states_)
|
|
, elements_(b.elements_) {
|
|
b.size_ = 0;
|
|
b.states_ = 0;
|
|
b.elements_ = 0;
|
|
}
|
|
|
|
AtomicQueueB2& operator=(AtomicQueueB2&& b) noexcept {
|
|
b.swap(*this);
|
|
return *this;
|
|
}
|
|
|
|
~AtomicQueueB2() noexcept {
|
|
if (elements_) {
|
|
AllocatorElements& ae = *this;
|
|
for (auto p = elements_, q = elements_ + size_; p < q; ++p)
|
|
std::allocator_traits<AllocatorElements>::destroy(ae, p);
|
|
AllocatorElements::deallocate(elements_, size_); // TODO: This must be noexcept, static_assert that.
|
|
AllocatorStates::deallocate(states_, size_); // TODO: This must be noexcept, static_assert that.
|
|
}
|
|
}
|
|
|
|
void swap(AtomicQueueB2& b) noexcept {
|
|
using std::swap;
|
|
this->Base::swap(b);
|
|
swap(static_cast<AllocatorElements&>(*this), static_cast<AllocatorElements&>(b));
|
|
swap(static_cast<AllocatorStates&>(*this), static_cast<AllocatorStates&>(b));
|
|
swap(size_, b.size_);
|
|
swap(states_, b.states_);
|
|
swap(elements_, b.elements_);
|
|
}
|
|
|
|
friend void swap(AtomicQueueB2& a, AtomicQueueB2& b) noexcept {
|
|
a.swap(b);
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class Queue>
|
|
struct RetryDecorator : Queue {
|
|
using T = typename Queue::value_type;
|
|
|
|
using Queue::Queue;
|
|
|
|
void push(T element) noexcept {
|
|
while (!this->try_push(element))
|
|
spin_loop_pause();
|
|
}
|
|
|
|
T pop() noexcept {
|
|
T element;
|
|
while (!this->try_pop(element))
|
|
spin_loop_pause();
|
|
return element;
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
} // namespace atomic_queue
|