allocPool: Try to avoid false sharing.

A benchmark on a 50 objects size with false sharing mitigations gives these values (for the initArray() function): - With mitigations: 5508 microseconds - Without mitigations: 9075 microseconds
2024-03-11 19:44:38 -04:00 · 2024-03-11 19:44:38 -04:00 · 3c5c9b4a97
commit 3c5c9b4a97
parent b0cbb7455b
2 changed files with 15 additions and 11 deletions
--- a/allocPool.hpp
+++ b/allocPool.hpp
@ -6,6 +6,7 @@
 #include <cstddef>
 #include <cstring>
 #include <mutex>
 #include <new>
 #include <thread>
 #include <unordered_map>
 #include <vector>
@ -19,10 +20,10 @@ template<class T>
    requires std::default_initializable<T> && resetable<T>
 class allocPool {
 public:
-    explicit allocPool(size_t defaultAllocNumbers = 1000)
+    explicit allocPool(size_t allocNumbers)
-        : vec(defaultAllocNumbers), pivot{defaultAllocNumbers} {
+        : vec(allocNumbers), pivot{allocNumbers} {
        memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
-        initArray(defaultAllocNumbers);
+        initArray(allocNumbers);
    }
    ~allocPool() {
@ -57,18 +58,19 @@ private:
    std::unordered_map<T *, size_t> positionMap;
    size_t pivot;
-    void initArray(size_t amount) {
+    void initArray(size_t arrSize) {
-        const auto amountOfThreads{std::thread::hardware_concurrency()};
+        size_t amountOfThreads{std::thread::hardware_concurrency()};
        assert(amountOfThreads);
-        const auto amountPerThread{amount / amountOfThreads};
+        size_t amountPerThread{arrSize / amountOfThreads};
        size_t minObjPerThread{std::hardware_destructive_interference_size / sizeof(void *)};
        std::vector<std::thread> threads;
        threads.reserve(amountOfThreads);
-        // Using an allocPool, we estimate that we want to allocate a lot of objects, therefore
+        // We try to avoid false sharing by defining a minimum size.
-        // the amount per thread *should* be higher than a cache line. This means we should, for
+        amountPerThread = minObjPerThread > amountPerThread ? minObjPerThread : amountPerThread;
-        // the most part, avoid false sharing. In the case that it isn't, then the total amount
+        amountOfThreads = arrSize / amountPerThread;
-        // should be pretty low, therefore false sharing shouldn't matter.
+
        for (size_t i{}; i < amountOfThreads; i++)
            threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);
@ -76,7 +78,7 @@ private:
            t.join();
        // Remainder
-        initObjects(amount - (amount % amountOfThreads), amount % amountOfThreads);
+        initObjects(arrSize - (arrSize % amountOfThreads), arrSize % amountOfThreads);
    }
    void initObjects(size_t startIdx, size_t amount) {
--- a/main.cpp
+++ b/main.cpp
@ -4,8 +4,10 @@
 #include "tests.hpp"
 int main() {
 #ifdef _DEBUG
    tests t;
    t.runTests();
 #endif
    auto startSlow{std::chrono::high_resolution_clock::now()};
    stub *ptr{};