allocPool: Try to avoid false sharing.

A benchmark on a 50 objects size with false sharing mitigations gives these values (for the initArray() function):
- With mitigations: 5508 microseconds
- Without mitigations: 9075 microseconds
This commit is contained in:
Timothée Leclaire-Fournier 2024-03-11 19:44:38 -04:00
parent b0cbb7455b
commit 3c5c9b4a97
2 changed files with 15 additions and 11 deletions

View File

@ -6,6 +6,7 @@
#include <cstddef> #include <cstddef>
#include <cstring> #include <cstring>
#include <mutex> #include <mutex>
#include <new>
#include <thread> #include <thread>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -19,10 +20,10 @@ template<class T>
requires std::default_initializable<T> && resetable<T> requires std::default_initializable<T> && resetable<T>
class allocPool { class allocPool {
public: public:
explicit allocPool(size_t defaultAllocNumbers = 1000) explicit allocPool(size_t allocNumbers)
: vec(defaultAllocNumbers), pivot{defaultAllocNumbers} { : vec(allocNumbers), pivot{allocNumbers} {
memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size()); memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
initArray(defaultAllocNumbers); initArray(allocNumbers);
} }
~allocPool() { ~allocPool() {
@ -57,18 +58,19 @@ private:
std::unordered_map<T *, size_t> positionMap; std::unordered_map<T *, size_t> positionMap;
size_t pivot; size_t pivot;
void initArray(size_t amount) { void initArray(size_t arrSize) {
const auto amountOfThreads{std::thread::hardware_concurrency()}; size_t amountOfThreads{std::thread::hardware_concurrency()};
assert(amountOfThreads); assert(amountOfThreads);
const auto amountPerThread{amount / amountOfThreads}; size_t amountPerThread{arrSize / amountOfThreads};
size_t minObjPerThread{std::hardware_destructive_interference_size / sizeof(void *)};
std::vector<std::thread> threads; std::vector<std::thread> threads;
threads.reserve(amountOfThreads); threads.reserve(amountOfThreads);
// Using an allocPool, we estimate that we want to allocate a lot of objects, therefore // We try to avoid false sharing by defining a minimum size.
// the amount per thread *should* be higher than a cache line. This means we should, for amountPerThread = minObjPerThread > amountPerThread ? minObjPerThread : amountPerThread;
// the most part, avoid false sharing. In the case that it isn't, then the total amount amountOfThreads = arrSize / amountPerThread;
// should be pretty low, therefore false sharing shouldn't matter.
for (size_t i{}; i < amountOfThreads; i++) for (size_t i{}; i < amountOfThreads; i++)
threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread); threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);
@ -76,7 +78,7 @@ private:
t.join(); t.join();
// Remainder // Remainder
initObjects(amount - (amount % amountOfThreads), amount % amountOfThreads); initObjects(arrSize - (arrSize % amountOfThreads), arrSize % amountOfThreads);
} }
void initObjects(size_t startIdx, size_t amount) { void initObjects(size_t startIdx, size_t amount) {

View File

@ -4,8 +4,10 @@
#include "tests.hpp" #include "tests.hpp"
int main() { int main() {
#ifdef _DEBUG
tests t; tests t;
t.runTests(); t.runTests();
#endif
auto startSlow{std::chrono::high_resolution_clock::now()}; auto startSlow{std::chrono::high_resolution_clock::now()};
stub *ptr{}; stub *ptr{};