From 2f1b34b4cd94360e6565000925e9ea02342fb616 Mon Sep 17 00:00:00 2001 From: Timothee Leclaire-Fournier Date: Sat, 2 Mar 2024 12:48:49 -0500 Subject: [PATCH] Meta: Add more documentation and correct naming. --- README.md | 25 +++++++++++++++++++++---- allocPool.hpp | 12 +++++++++--- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 355c061..8de7375 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,22 @@ avoid expensive allocations during runtime. This preallocates objects in the constructor (with threads) then offers you two functions: `getPtr()` and `returnPtr(ptr)`. Using C++ concepts, we can use templates and require the class given to have a -default constructor and to have a .reset() function. It will be used to clean the +default constructor and to have a `.reset()` function. It will be used to clean the objects before giving them to another caller. -This pool uses a hashmap and a pivot to make returnPtr(ptr) extremely fast. +We avoid false sharing by keeping a high amount of work per thread. This should +lead to cache lines not being shared between threads. While this pool uses a hashmap +and a pivot to make `returnPtr(ptr)` extremely fast, the construction's bottleneck is +in the locking and unlocking of the hashmap's mutex. We need to do this since we cannot +write in a `std::unordered_map` at different hashes concurrently. It will automatically grow when the max capacity is reached, though there will be a performance penalty. ## Performance With a simple stub class and a pool of 10000 objects, using the pool to take a pointer -and give it back takes 3 ms vs 19 ms when allocating and deallocating by hand. +and give it back for each element is significantly faster than doing it by hand. + ``` class stub { public: @@ -27,4 +32,16 @@ public: private: int i = 15; }; -``` \ No newline at end of file +``` +``` +Time (milliseconds) required for allocations without pool: 21 +Time (milliseconds) required for allocations with pool: 3 +Time (milliseconds) required for real allocations when constructing pool: 9 +``` + +This trivial example show some performance improvements that would be much more +important should the allocation and construction of objects be more complex. + +## Safety +AddressSanitizer, LeakSanitizer and ThreadSanitizer have been used to ensure the safety +of the class. Tests have been added to ensure the correct behavior in all cases. \ No newline at end of file diff --git a/allocPool.hpp b/allocPool.hpp index f57d437..0cadab6 100644 --- a/allocPool.hpp +++ b/allocPool.hpp @@ -59,13 +59,17 @@ private: void initArray(size_t amount) { const auto amountOfThreads{std::thread::hardware_concurrency()}; assert(amountOfThreads); - const auto amountPerThreads{amount / amountOfThreads}; + const auto amountPerThread{amount / amountOfThreads}; std::vector threads; threads.reserve(amountOfThreads); + // Using an allocPool, we estimate that we want to allocate a lot of objects, therefore + // the amount per thread *should* be higher than a cache line. This means we should, for + // the most part, avoid false sharing. In the case that it isn't, then the total amount + // should be pretty low, therefore false sharing shouldn't matter. for (size_t i{}; i < amountOfThreads; i++) - threads.emplace_back(&allocPool::initObjects, this, i * amountPerThreads, amountPerThreads); + threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread); for (auto &t: threads) t.join(); @@ -76,9 +80,11 @@ private: void initObjects(size_t startIdx, size_t amount) { for (size_t i{}; i < amount; i++) { - // TODO: Be more cache friendly by making a vector per thread, then doing memcpy into the original vector. vec[startIdx + i] = new T; } + + // In the future, it should be possible to write a custom hashmap with sections + // with independent locks, or use a data structure which would be contiguous. std::lock_guard guard(positionMapMutex); for (size_t i{}; i < amount; i++) { positionMap[vec[startIdx + i]] = i;