allocPool: Try to avoid false sharing.
A benchmark with 50 objects with and without false sharing mitigations gives these values (for the initArray() function) on Windows: - With mitigations: ~5500 microseconds - Without mitigations: ~9000 microseconds On Linux: - With mitigations: ~600 microseconds - Without mitigations: ~700 microseconds
This commit is contained in:
parent
b0cbb7455b
commit
7f58269866
@ -6,6 +6,7 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <new>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -19,10 +20,10 @@ template<class T>
|
|||||||
requires std::default_initializable<T> && resetable<T>
|
requires std::default_initializable<T> && resetable<T>
|
||||||
class allocPool {
|
class allocPool {
|
||||||
public:
|
public:
|
||||||
explicit allocPool(size_t defaultAllocNumbers = 1000)
|
explicit allocPool(size_t allocNumbers)
|
||||||
: vec(defaultAllocNumbers), pivot{defaultAllocNumbers} {
|
: vec(allocNumbers), pivot{allocNumbers} {
|
||||||
memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
|
memset(&(vec[0]), 0, sizeof(vec[0]) * vec.size());
|
||||||
initArray(defaultAllocNumbers);
|
initArray(allocNumbers);
|
||||||
}
|
}
|
||||||
|
|
||||||
~allocPool() {
|
~allocPool() {
|
||||||
@ -57,18 +58,19 @@ private:
|
|||||||
std::unordered_map<T *, size_t> positionMap;
|
std::unordered_map<T *, size_t> positionMap;
|
||||||
size_t pivot;
|
size_t pivot;
|
||||||
|
|
||||||
void initArray(size_t amount) {
|
void initArray(size_t arrSize) {
|
||||||
const auto amountOfThreads{std::thread::hardware_concurrency()};
|
size_t amountOfThreads{std::thread::hardware_concurrency()};
|
||||||
assert(amountOfThreads);
|
assert(amountOfThreads);
|
||||||
const auto amountPerThread{amount / amountOfThreads};
|
size_t amountPerThread{arrSize / amountOfThreads};
|
||||||
|
size_t minObjPerThread{std::hardware_destructive_interference_size / sizeof(void *)};
|
||||||
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
threads.reserve(amountOfThreads);
|
threads.reserve(amountOfThreads);
|
||||||
|
|
||||||
// Using an allocPool, we estimate that we want to allocate a lot of objects, therefore
|
// We try to avoid false sharing by defining a minimum size.
|
||||||
// the amount per thread *should* be higher than a cache line. This means we should, for
|
amountPerThread = minObjPerThread > amountPerThread ? minObjPerThread : amountPerThread;
|
||||||
// the most part, avoid false sharing. In the case that it isn't, then the total amount
|
amountOfThreads = arrSize / amountPerThread;
|
||||||
// should be pretty low, therefore false sharing shouldn't matter.
|
|
||||||
for (size_t i{}; i < amountOfThreads; i++)
|
for (size_t i{}; i < amountOfThreads; i++)
|
||||||
threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);
|
threads.emplace_back(&allocPool::initObjects, this, i * amountPerThread, amountPerThread);
|
||||||
|
|
||||||
@ -76,7 +78,7 @@ private:
|
|||||||
t.join();
|
t.join();
|
||||||
|
|
||||||
// Remainder
|
// Remainder
|
||||||
initObjects(amount - (amount % amountOfThreads), amount % amountOfThreads);
|
initObjects(arrSize - (arrSize % amountOfThreads), arrSize % amountOfThreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initObjects(size_t startIdx, size_t amount) {
|
void initObjects(size_t startIdx, size_t amount) {
|
||||||
|
2
main.cpp
2
main.cpp
@ -4,8 +4,10 @@
|
|||||||
#include "tests.hpp"
|
#include "tests.hpp"
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
#ifdef _DEBUG
|
||||||
tests t;
|
tests t;
|
||||||
t.runTests();
|
t.runTests();
|
||||||
|
#endif
|
||||||
|
|
||||||
auto startSlow{std::chrono::high_resolution_clock::now()};
|
auto startSlow{std::chrono::high_resolution_clock::now()};
|
||||||
stub *ptr{};
|
stub *ptr{};
|
||||||
|
Loading…
Reference in New Issue
Block a user