allocPool/allocPool.hpp
Timothée Leclaire-Fournier 7f58269866 allocPool: Try to avoid false sharing.
A benchmark with 50 objects with and without false sharing mitigations gives these values (for the initArray() function) on Windows:
- With mitigations: ~5500 microseconds
- Without mitigations: ~9000 microseconds

On Linux:
- With mitigations: ~600 microseconds
- Without mitigations: ~700 microseconds
2024-03-12 11:28:10 -04:00

109 lines
2.9 KiB
C++

#pragma once
#include <algorithm>
#include <cassert>
#include <concepts>
#include <cstddef>
#include <cstring>
#include <mutex>
#include <new>
#include <thread>
#include <unordered_map>
#include <vector>
template<class T>
concept resetable = requires(T val) {
val.reset();
};
template<class T>
requires std::default_initializable<T> && resetable<T>
class allocPool {
public:
explicit allocPool(size_t allocNumbers)
: vec(allocNumbers), pivot{allocNumbers} {
memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
initArray(allocNumbers);
}
~allocPool() {
for (auto i: vec)
delete i;
}
T *getPtr() {
if (pivot == 0)
resizeVec();
auto *ptrToReturn{vec[0]};
std::swap(vec[0], vec[pivot - 1]);
positionMap[vec[0]] = 0;
positionMap[vec[pivot - 1]] = pivot - 1;
pivot--;
return ptrToReturn;
}
void returnPtr(T *ptr) {
size_t pos = positionMap[ptr];
ptr->reset();
std::swap(vec[pos], vec[pivot]);
positionMap[vec[pos]] = pos;
positionMap[vec[pivot]] = pivot;
pivot++;
}
private:
std::vector<T *> vec;
std::mutex positionMapMutex;
std::unordered_map<T *, size_t> positionMap;
size_t pivot;
void initArray(size_t arrSize) {
size_t amountOfThreads{std::thread::hardware_concurrency()};
assert(amountOfThreads);
size_t amountPerThread{arrSize / amountOfThreads};
size_t minObjPerThread{std::hardware_destructive_interference_size / sizeof(void *)};
std::vector<std::thread> threads;
threads.reserve(amountOfThreads);
// We try to avoid false sharing by defining a minimum size.
amountPerThread = minObjPerThread > amountPerThread ? minObjPerThread : amountPerThread;
amountOfThreads = arrSize / amountPerThread;
for (size_t i{}; i < amountOfThreads; i++)
threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);
for (auto &t: threads)
t.join();
// Remainder
initObjects(arrSize - (arrSize % amountOfThreads), arrSize % amountOfThreads);
}
void initObjects(size_t startIdx, size_t amount) {
for (size_t i{}; i < amount; i++) {
vec[startIdx + i] = new T;
}
// In the future, it should be possible to write a custom hashmap with sections
// with independent locks, or use a data structure which would be contiguous.
std::lock_guard<std::mutex> guard(positionMapMutex);
for (size_t i{}; i < amount; i++) {
positionMap[vec[startIdx + i]] = i;
}
}
void resizeVec() {
size_t size{vec.size()};
vec.resize(2 * size);
pivot = size;
memcpy(&(vec[size]), &(vec[0]), sizeof(vec[0]) * size);
for (size_t i{}; i < size; i++)
positionMap[vec[size + i]] = size + i;
initArray(size);
}
};