Reputation: 147
Assume C++17 (some of this is deprecated in C++20)
I have written a class EventDB that stores a fixed set of shared_ptr.
class EventDB
{
public:
EventDB() = delete;
EventDB(const std::vector<std::shared_ptr<const EventInfo>>& init_events):
PointEvents(init_events.begin(),init_events.end())
{}
std::shared_ptr<const EventInfo> Swap(std::shared_ptr<const EventInfo> event)
{
auto old_it = PointEvents.find(event);
if(old_it == PointEvents.end())
return nullptr;
//cast away the constness of the iterator element
//is this OK, because we know we're not changing its hash/equality?
auto old_evt_addr = &const_cast<std::shared_ptr<const EventInfo>&>(*old_it);
return std::atomic_exchange(old_evt_addr,event);
}
private:
std::unordered_set<std::shared_ptr<const EventInfo>,EventPointHash,EventPointEq> PointEvents;
};
It provides a way to swap out the elements of the set using std::atomic_exchange. 'swapping out' an element of a set may seem pointless, but I provide custom hasher and equality for the set, so the swapped elements actually hold different data even though they're equivalent from the perspective of the set. The correctness of all this is the subject of a secondary question, because I could just replace it with a map if I needed to.
My main question is about thread safety - is EventDB thread safe, and if not, why not?
The secondary question alluded to above is how bad it is to cast away the constness of the set iterator so I can (atomically) modify the element. Am I breaking the rules of the language and relying on implementation specific behaviour? Or is this technically allowed?
For bonus kudos, what would I replace std::atomic_exchange with in C++20. I know there are proper atomic smart pointer, but could I convert between normal shared_ptr in this example?
Here's some self-contained code that compiles and works with g++ 9.3.0 GLIBCXX_3.4.28
#include <vector>
#include <string>
#include <iostream>
#include <thread>
#include <memory>
#include <limits>
#include <unordered_set>
enum class EventType : uint8_t
{
RED = 0,
BLUE = 1
};
class EventInfo
{
public:
EventInfo() = delete;
EventInfo(const EventType t, const size_t i, const std::string& p):
Type(t),Index(i),Payload(p)
{}
size_t GetIndex() const
{
return Index;
}
EventType GetEventType() const
{
return Type;
}
const std::string& GetPayload() const
{
return Payload;
}
private:
EventType Type;
size_t Index;
std::string Payload;
};
struct EventPointHash
{
size_t operator()(const std::shared_ptr<const EventInfo>& evt) const
{
if(!evt)
return std::numeric_limits<size_t>::max();
return (evt->GetIndex() << (sizeof(EventType)<<3)) + static_cast<size_t>(evt->GetEventType());
}
};
struct EventPointEq
{
bool operator()(const std::shared_ptr<const EventInfo>& lhs,
const std::shared_ptr<const EventInfo>& rhs) const
{
if(!lhs && !rhs) return true;
if(!lhs || !rhs) return false;
return (lhs->GetIndex() == rhs->GetIndex() && lhs->GetEventType() == rhs->GetEventType());
}
};
class EventDB
{
public:
EventDB() = delete;
EventDB(const std::vector<std::shared_ptr<const EventInfo>>& init_events):
PointEvents(init_events.begin(),init_events.end())
{}
std::shared_ptr<const EventInfo> Swap(std::shared_ptr<const EventInfo> event)
{
auto old_it = PointEvents.find(event);
if(old_it == PointEvents.end())
return nullptr;
//cast away the constness of the iterator element
//is this OK, because we know we're not changing its hash/equality?
auto old_evt_addr = &const_cast<std::shared_ptr<const EventInfo>&>(*old_it);
return std::atomic_exchange(old_evt_addr,event);
}
private:
std::unordered_set<std::shared_ptr<const EventInfo>,EventPointHash,EventPointEq> PointEvents;
};
int main()
{
//create a database to hold 100 events
std::vector<std::shared_ptr<const EventInfo>> init_events;
for(int i=0;i<100;i++)
{
init_events.emplace_back(std::make_shared<const EventInfo>(EventType::RED,i,"-1"));
}
EventDB DB(init_events);
//Access the element concurrently
std::vector<std::thread> threads;
for(int i = 0;i<5;i++)
{
threads.emplace_back([&]()
{
for(int j = 0;j<1000000;j++)
{
//replace a random element
auto event = std::make_shared<const EventInfo>(EventType::RED,rand()%100,std::to_string(j));
auto old_evt = DB.Swap(event);
//access the data - randomly print
if(old_evt && std::stoi(old_evt->GetPayload())%2000 == 0 && old_evt->GetIndex() == 66)
std::cout<<"Replaced "<<old_evt->GetPayload()<<" with "<<event->GetPayload()<<std::endl;
}
});
}
init_events.clear();
for(auto& t : threads)
t.join();
return 0;
}
Typical output:
Replaced 20000 with 20033
Replaced 134000 with 134002
Replaced 144000 with 143694
Replaced 144000 with 144435
Replaced 172000 with 174980
Replaced 252000 with 255578
Replaced 258000 with 252434
Replaced 368000 with 367261
Replaced 498000 with 497470
Replaced 584000 with 583205
Replaced 628000 with 619809
Replaced 722000 with 722603
Replaced 730000 with 722302
Replaced 780000 with 768508
Replaced 784000 with 784036
Replaced 816000 with 821799
Replaced 842000 with 844719
Replaced 970000 with 950851
Igor's answer pointed me to the data race. I was then able to easily modify the code to prove it in practice.
Adding a destructor that messed up the hash if a destroyed element was used, and then printing a message when find failed:
~EventInfo()
{
//these aren't used in the example
// - so they will mess up find when the race is lost
Index = 200;
Type = EventType::BLUE;
}
auto old_evt = DB.Swap(event);
if(!old_evt)
std::cout<<"BOOM"<<std::endl;
And sure enough:
BOOM
BOOM
BOOM
BOOM
This is my attemp to implement the fix suggested in Igor's answer
#include <vector>
#include <string>
#include <iostream>
#include <thread>
#include <memory>
#include <limits>
#include <unordered_map>
enum class EventType : uint8_t
{
RED = 0,
BLUE = 1
};
class EventInfo
{
public:
EventInfo() = delete;
EventInfo(const EventType t, const size_t i, const std::string& p):
Type(t),Index(i),Payload(p)
{}
size_t GetIndex() const
{
return Index;
}
EventType GetEventType() const
{
return Type;
}
const std::string& GetPayload() const
{
return Payload;
}
private:
EventType Type;
size_t Index;
std::string Payload;
};
struct EventPointHash
{
size_t operator()(const std::pair<EventType,size_t>& point) const
{
return (point.second << (sizeof(EventType)<<3)) + static_cast<size_t>(point.first);
}
};
class EventDB
{
public:
EventDB() = delete;
EventDB(const std::vector<std::shared_ptr<const EventInfo>>& init_events)
{
for(const auto& event : init_events)
PointEvents[{event->GetEventType(),event->GetIndex()}] = event;
}
std::shared_ptr<const EventInfo> Swap(const std::shared_ptr<const EventInfo> event)
{
auto old_it = PointEvents.find({event->GetEventType(),event->GetIndex()});
if(old_it == PointEvents.end())
return nullptr;
auto old_evt_addr = &(old_it->second);
return std::atomic_exchange(old_evt_addr,event);
}
private:
std::unordered_map<std::pair<EventType,size_t>,std::shared_ptr<const EventInfo>,EventPointHash> PointEvents;
};
int main()
{
//create a database to hold 100 events
std::vector<std::shared_ptr<const EventInfo>> init_events;
for(int i=0;i<100;i++)
{
init_events.emplace_back(std::make_shared<const EventInfo>(EventType::RED,i,"-1"));
}
EventDB DB(init_events);
init_events.clear();
//Access the element concurrently
std::vector<std::thread> threads;
for(int i = 0;i<5;i++)
{
threads.emplace_back([&]()
{
for(int j = 0;j<1000000;j++)
{
//replace a random element
auto event = std::make_shared<const EventInfo>(EventType::RED,rand()%100,std::to_string(j));
auto old_evt = DB.Swap(event);
if(!old_evt)
{
std::cout<<"BOOM"<<std::endl;
continue;
}
//access the data - randomly print
if(std::stoi(old_evt->GetPayload())%2000 == 0 && old_evt->GetIndex() == 66)
std::cout<<"Replaced "<<old_evt->GetPayload()<<" with "<<event->GetPayload()<<std::endl;
}
});
}
for(auto& t : threads)
t.join();
return 0;
}
Upvotes: 1
Views: 127
Reputation: 52621
There's a data race. While writing the set's elements is atomic via std::atomic_exchange
, reading them inside find
is not. find
may see a torn read if another thread swaps an element right from under it.
There's a subtler scenario: one thread has called PointEvents.find(event)
, and find
is currently reading the contents of some EventInfo
instance in the set (let's call it X
), to compute its hash or compare it to event
. At the same time, another thread performs Swap
on that same element and returns the shared pointer holding X
to the caller. The caller perhaps looks at X
briefly then allows the shared pointer to be destroyed, and X
together with it. find
then races with X
's destructor.
Consider separating fixed parts of EventInfo
that contribute to the hash and equality, from the payload part that can vary. Store them in std::unordered_map
, with fixed part as the key and the payload as the value. Then you can swap the payload without affecting find
.
Upvotes: 1