allocPool/allocPool.hpp

#pragma once

#include <algorithm>
#include <cassert>
#include <concepts>
#include <cstddef>
#include <cstring>
#include <mutex>
#include <new>
#include <thread>
#include <unordered_map>
#include <vector>

template<class T>
concept resetable = requires(T val) {
    val.reset();
};

template<class T>
    requires std::default_initializable<T> && resetable<T>
class allocPool {
public:
    explicit allocPool(size_t allocNumbers, bool enableFalseSharingMitigations = true)
        : vec(allocNumbers), pivot{allocNumbers}, falseSharingMitigations{enableFalseSharingMitigations} {
        memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
        initArray(allocNumbers);
    }

    ~allocPool() {
        for (auto i: vec)
            delete i;
    }

    T *getPtr() {
        if (pivot == 0)
            resizeVec();

        auto *ptrToReturn{vec[0]};
        std::swap(vec[0], vec[pivot - 1]);
        positionMap[vec[0]] = 0;
        positionMap[vec[pivot - 1]] = pivot - 1;
        pivot--;
        return ptrToReturn;
    }

    void returnPtr(T *ptr) {
        size_t pos = positionMap[ptr];
        ptr->reset();
        std::swap(vec[pos], vec[pivot]);
        positionMap[vec[pos]] = pos;
        positionMap[vec[pivot]] = pivot;
        pivot++;
    }

private:
    std::vector<T *> vec;
    std::mutex positionMapMutex;
    std::unordered_map<T *, size_t> positionMap;
    size_t pivot;
    bool falseSharingMitigations;

    void initArray(size_t arrSize) {
        size_t amountOfThreads{std::thread::hardware_concurrency()};
        assert(amountOfThreads);
        size_t amountPerThread{arrSize / amountOfThreads};
        size_t minObjPerThread{std::hardware_destructive_interference_size / sizeof(void *)};

        std::vector<std::thread> threads;
        threads.reserve(amountOfThreads);

        // We try to avoid false sharing by defining a minimum size.
        if (falseSharingMitigations) {
            amountPerThread = minObjPerThread > amountPerThread ? minObjPerThread : amountPerThread;
            amountOfThreads = arrSize / amountPerThread;
        }

        for (size_t i{}; i < amountOfThreads; i++)
            threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);

        for (auto &t: threads)
            t.join();

        // Remainder
        initObjects(arrSize - (arrSize % amountOfThreads), arrSize % amountOfThreads);
    }

    void initObjects(size_t startIdx, size_t amount) {
        for (size_t i{}; i < amount; i++) {
            vec[startIdx + i] = new T;
        }

        // In the future, it should be possible to write a custom hashmap with sections
        // with independent locks, or use a data structure which would be contiguous.
        std::lock_guard<std::mutex> guard(positionMapMutex);
        for (size_t i{}; i < amount; i++) {
            positionMap[vec[startIdx + i]] = i;
        }
    }

    void resizeVec() {
        size_t size{vec.size()};
        vec.resize(2 * size);
        pivot = size;
        memcpy(&(vec[size]), &(vec[0]), sizeof(vec[0]) * size);

        for (size_t i{}; i < size; i++)
            positionMap[vec[size + i]] = size + i;

        initArray(size);
    }
};