Skip to content

Commit

Permalink
Fix thread deadlock issue (sonic-net#211)
Browse files Browse the repository at this point in the history
* Fix thread deadlock isssue

Add separate lock for notification and reuse
global syncd lock to protect only db access

* Change mutex name to db_mutex

* Fix mutex name in header

* Change mutex on counters thread
  • Loading branch information
kcudnik authored Aug 17, 2017
1 parent 7e70b4d commit a45941d
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 13 deletions.
18 changes: 14 additions & 4 deletions syncd/syncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "swss/tokenize.h"
#include <limits.h>

std::mutex g_mutex;
std::mutex g_db_mutex;

swss::RedisClient *g_redisClient = NULL;

Expand Down Expand Up @@ -148,6 +148,8 @@ void remove_rid_and_vid_from_local(
sai_object_id_t translate_rid_to_vid(
_In_ sai_object_id_t rid)
{
std::lock_guard<std::mutex> lock(g_db_mutex);

SWSS_LOG_ENTER();

if (rid == SAI_NULL_OBJECT_ID)
Expand Down Expand Up @@ -275,6 +277,8 @@ void translate_rid_to_vid_list(
sai_object_id_t translate_vid_to_rid(
_In_ sai_object_id_t vid)
{
std::lock_guard<std::mutex> lock(g_db_mutex);

SWSS_LOG_ENTER();

if (vid == SAI_NULL_OBJECT_ID)
Expand Down Expand Up @@ -397,6 +401,8 @@ void snoop_get_attr(

SWSS_LOG_DEBUG("%s", key.c_str());

std::lock_guard<std::mutex> lock(g_db_mutex);

g_redisClient->hset(key, attr_id, attr_value);
}

Expand Down Expand Up @@ -698,6 +704,8 @@ sai_status_t handle_generic(
std::string str_vid = sai_serialize_object_id(object_id);
std::string str_rid = sai_serialize_object_id(real_object_id);

std::lock_guard<std::mutex> lock(g_db_mutex);

g_redisClient->hset(VIDTORID, str_vid, str_rid);
g_redisClient->hset(RIDTOVID, str_rid, str_vid);

Expand Down Expand Up @@ -730,6 +738,8 @@ sai_status_t handle_generic(
std::string str_vid = sai_serialize_object_id(object_id);
std::string str_rid = sai_serialize_object_id(rid);

std::lock_guard<std::mutex> lock(g_db_mutex);

g_redisClient->hdel(VIDTORID, str_vid);
g_redisClient->hdel(RIDTOVID, str_rid);

Expand Down Expand Up @@ -986,6 +996,8 @@ void clearTempView()

// TODO optimize with lua script (this takes ~0.2s now)

std::lock_guard<std::mutex> lock(g_db_mutex);

for (const auto &key: g_redisClient->keys(pattern))
{
g_redisClient->del(key);
Expand Down Expand Up @@ -1379,8 +1391,6 @@ sai_status_t processBulkEvent(

sai_status_t processEvent(swss::ConsumerTable &consumer)
{
std::lock_guard<std::mutex> lock(g_mutex);

SWSS_LOG_ENTER();

swss::KeyOpFieldsValuesTuple kco;
Expand Down Expand Up @@ -1784,7 +1794,7 @@ bool handleRestartQuery(swss::NotificationConsumer &restartQuery)

bool isVeryFirstRun()
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_db_mutex);

SWSS_LOG_ENTER();

Expand Down
2 changes: 1 addition & 1 deletion syncd/syncd.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ extern "C" {

extern void exit_and_notify(int status) __attribute__ ((__noreturn__));

extern std::mutex g_mutex;
extern std::mutex g_db_mutex;
extern std::set<sai_object_id_t> g_defaultPriorityGroupsRids;
extern std::set<sai_object_id_t> g_defaultSchedulerGroupsRids;
extern std::set<sai_object_id_t> g_defaultQueuesRids;
Expand Down
3 changes: 2 additions & 1 deletion syncd/syncd_counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ void collectCounters(swss::Table &countersTable,
// collect counters should be under mutex
// sice configuration can change and we
// don't want that during counters collection
std::lock_guard<std::mutex> lock(g_mutex);

SWSS_LOG_ENTER();

Expand Down Expand Up @@ -51,6 +50,8 @@ void collectCounters(swss::Table &countersTable,
values.push_back(fvt);
}

std::lock_guard<std::mutex> lock(g_db_mutex);

countersTable.set(strPortId, values, "");
}
}
Expand Down
17 changes: 11 additions & 6 deletions syncd/syncd_notifications.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "syncd.h"
#include "sairedis.h"

// mutex to protect notification send call
std::mutex g_ntf_mutex;

void send_notification(
_In_ std::string op,
_In_ std::string data,
Expand Down Expand Up @@ -29,7 +32,7 @@ void send_notification(
void on_switch_state_change(
_In_ sai_switch_oper_status_t switch_oper_status)
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand Down Expand Up @@ -104,14 +107,16 @@ void redisPutFdbEntryToAsicView(
std::string strAttrId = sai_serialize_attr_id(*meta);
std::string strAttrValue = sai_serialize_attr_value(*meta, attr);

std::lock_guard<std::mutex> lock(g_db_mutex);

g_redisClient->hset(key, strAttrId, strAttrValue);
}

void on_fdb_event(
_In_ uint32_t count,
_In_ sai_fdb_event_notification_data_t *data)
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand Down Expand Up @@ -140,7 +145,7 @@ void on_port_state_change(
_In_ uint32_t count,
_In_ sai_port_oper_status_notification_t *data)
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand All @@ -162,7 +167,7 @@ void on_port_event(
_In_ uint32_t count,
_In_ sai_port_event_notification_t *data)
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand All @@ -180,7 +185,7 @@ void on_port_event(

void on_switch_shutdown_request()
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand All @@ -193,7 +198,7 @@ void on_packet_event(
_In_ uint32_t attr_count,
_In_ const sai_attribute_t *attr_list)
{
std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_ntf_mutex);

SWSS_LOG_ENTER();

Expand Down
2 changes: 1 addition & 1 deletion syncd/syncd_reinit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,7 @@ void onSyncdStart(bool warmStart)
// id's for ports, this may cause race condition so we need
// to use a lock here to prevent that

std::lock_guard<std::mutex> lock(g_mutex);
std::lock_guard<std::mutex> lock(g_db_mutex);

SWSS_LOG_ENTER();

Expand Down

0 comments on commit a45941d

Please sign in to comment.