From 94091c7b960aa45923036cd4d79929dcb3178cdc Mon Sep 17 00:00:00 2001 From: George Joseph Date: Fri, 1 Sep 2017 04:17:02 -0600 Subject: [PATCH] stasis/control: Fix possible deadlock with swap channel If an error occurs during a bridge impart it's possible that the "bridge_after" callback might try to run before control_swap_channel_in_bridge has been signalled to continue. Since control_swap_channel_in_bridge is holding the control lock and the callback needs it, a deadlock will occur. * control_swap_channel_in_bridge now only holds the control lock while it's actually modifying the control structure and releases it while the bridge impart is running. * bridge_after_cb is now tolerant of impart failures. Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3 --- include/asterisk/bridge_after.h | 2 + main/bridge.c | 6 +- main/bridge_after.c | 28 ++++---- res/stasis/control.c | 124 +++++++++++++++++++------------- 4 files changed, 96 insertions(+), 64 deletions(-) diff --git a/include/asterisk/bridge_after.h b/include/asterisk/bridge_after.h index 53f30b9ad2..045168571e 100644 --- a/include/asterisk/bridge_after.h +++ b/include/asterisk/bridge_after.h @@ -45,6 +45,8 @@ enum ast_bridge_after_cb_reason { AST_BRIDGE_AFTER_CB_REASON_DEPART, /*! Was explicitly removed by external code. */ AST_BRIDGE_AFTER_CB_REASON_REMOVED, + /*! The channel failed to enter the bridge. */ + AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED, }; /*! diff --git a/main/bridge.c b/main/bridge.c index b732d5fc55..ab12ecf646 100644 --- a/main/bridge.c +++ b/main/bridge.c @@ -1758,12 +1758,13 @@ join_exit:; static void *bridge_channel_depart_thread(void *data) { struct ast_bridge_channel *bridge_channel = data; + int res = 0; if (bridge_channel->callid) { ast_callid_threadassoc_add(bridge_channel->callid); } - bridge_channel_internal_join(bridge_channel); + res = bridge_channel_internal_join(bridge_channel); /* * cleanup @@ -1775,7 +1776,8 @@ static void *bridge_channel_depart_thread(void *data) ast_bridge_features_destroy(bridge_channel->features); bridge_channel->features = NULL; - ast_bridge_discard_after_callback(bridge_channel->chan, AST_BRIDGE_AFTER_CB_REASON_DEPART); + ast_bridge_discard_after_callback(bridge_channel->chan, + res ? AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED : AST_BRIDGE_AFTER_CB_REASON_DEPART); /* If join failed there will be impart threads waiting. */ bridge_channel_impart_signal(bridge_channel->chan); ast_bridge_discard_after_goto(bridge_channel->chan); diff --git a/main/bridge_after.c b/main/bridge_after.c index d4aec75d0f..69c8f989b4 100644 --- a/main/bridge_after.c +++ b/main/bridge_after.c @@ -293,23 +293,23 @@ int ast_bridge_set_after_callback(struct ast_channel *chan, ast_bridge_after_cb return 0; } -const char *reason_strings[] = { - [AST_BRIDGE_AFTER_CB_REASON_DESTROY] = "Channel destroyed (hungup)", - [AST_BRIDGE_AFTER_CB_REASON_REPLACED] = "Callback was replaced", - [AST_BRIDGE_AFTER_CB_REASON_MASQUERADE] = "Channel masqueraded", - [AST_BRIDGE_AFTER_CB_REASON_DEPART] = "Channel was departed from bridge", - [AST_BRIDGE_AFTER_CB_REASON_REMOVED] = "Callback was removed", -}; - const char *ast_bridge_after_cb_reason_string(enum ast_bridge_after_cb_reason reason) { - if (reason < AST_BRIDGE_AFTER_CB_REASON_DESTROY - || AST_BRIDGE_AFTER_CB_REASON_REMOVED < reason - || !reason_strings[reason]) { - return "Unknown"; + switch (reason) { + case AST_BRIDGE_AFTER_CB_REASON_DESTROY: + return "Channel destroyed (hungup)"; + case AST_BRIDGE_AFTER_CB_REASON_REPLACED: + return "Callback was replaced"; + case AST_BRIDGE_AFTER_CB_REASON_MASQUERADE: + return "Channel masqueraded"; + case AST_BRIDGE_AFTER_CB_REASON_DEPART: + return "Channel was departed from bridge"; + case AST_BRIDGE_AFTER_CB_REASON_REMOVED: + return "Callback was removed"; + case AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED: + return "Channel failed joining the bridge"; } - - return reason_strings[reason]; + return "Unknown"; } struct after_bridge_goto_ds { diff --git a/res/stasis/control.c b/res/stasis/control.c index 503f111aad..085ca7eb92 100644 --- a/res/stasis/control.c +++ b/res/stasis/control.c @@ -983,14 +983,21 @@ static int bridge_channel_depart(struct stasis_app_control *control, return 0; } -static void bridge_after_cb(struct ast_channel *chan, void *data) +static void internal_bridge_after_cb(struct ast_channel *chan, void *data, + enum ast_bridge_after_cb_reason reason) { struct stasis_app_control *control = data; SCOPED_AO2LOCK(lock, control); struct ast_bridge_channel *bridge_channel; - ast_debug(3, "%s, %s: Channel leaving bridge\n", - ast_channel_uniqueid(chan), control->bridge->uniqueid); + ast_debug(3, "%s, %s: %s\n", + ast_channel_uniqueid(chan), control->bridge ? control->bridge->uniqueid : "unknown", + ast_bridge_after_cb_reason_string(reason)); + + if (reason == AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED) { + /* The impart actually failed so control->bridge isn't valid. */ + control->bridge = NULL; + } ast_assert(chan == control->channel); @@ -998,18 +1005,21 @@ static void bridge_after_cb(struct ast_channel *chan, void *data) ast_channel_pbx_set(control->channel, control->pbx); control->pbx = NULL; - app_unsubscribe_bridge(control->app, control->bridge); + if (control->bridge) { + app_unsubscribe_bridge(control->app, control->bridge); - /* No longer in the bridge */ - control->bridge = NULL; + /* No longer in the bridge */ + control->bridge = NULL; - /* Get the bridge channel so we don't depart from the wrong bridge */ - ast_channel_lock(chan); - bridge_channel = ast_channel_get_bridge_channel(chan); - ast_channel_unlock(chan); + /* Get the bridge channel so we don't depart from the wrong bridge */ + ast_channel_lock(chan); + bridge_channel = ast_channel_get_bridge_channel(chan); + ast_channel_unlock(chan); + + /* Depart this channel from the bridge using the command queue if possible */ + stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup); + } - /* Depart this channel from the bridge using the command queue if possible */ - stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup); if (stasis_app_channel_is_stasis_end_published(chan)) { /* The channel has had a StasisEnd published on it, but until now had remained in * the bridging system. This means that the channel moved from a Stasis bridge to a @@ -1027,12 +1037,19 @@ static void bridge_after_cb(struct ast_channel *chan, void *data) } } +static void bridge_after_cb(struct ast_channel *chan, void *data) +{ + struct stasis_app_control *control = data; + + internal_bridge_after_cb(control->channel, data, AST_BRIDGE_AFTER_CB_REASON_DEPART); +} + static void bridge_after_cb_failed(enum ast_bridge_after_cb_reason reason, void *data) { struct stasis_app_control *control = data; - bridge_after_cb(control->channel, data); + internal_bridge_after_cb(control->channel, data, reason); ast_debug(3, " reason: %s\n", ast_bridge_after_cb_reason_string(reason)); @@ -1171,46 +1188,57 @@ int control_swap_channel_in_bridge(struct stasis_app_control *control, struct as return -1; } - { - /* pbx and bridge are modified by the bridging impart thread. - * It shouldn't happen concurrently, but we still need to lock - * for the memory fence. + ao2_lock(control); + + /* Ensure the controlling application is subscribed early enough + * to receive the ChannelEnteredBridge message. This works in concert + * with the subscription handled in the Stasis application execution + * loop */ + app_subscribe_bridge(control->app, bridge); + + /* Save off the channel's PBX */ + ast_assert(control->pbx == NULL); + if (!control->pbx) { + control->pbx = ast_channel_pbx(chan); + ast_channel_pbx_set(chan, NULL); + } + + ast_assert(stasis_app_get_bridge(control) == NULL); + /* We need to set control->bridge here since bridge_after_cb may be run + * before ast_bridge_impart returns. bridge_after_cb gets a reason + * code so it can tell if the bridge is actually valid or not. + */ + control->bridge = bridge; + + /* We can't be holding the control lock while impart is running + * or we could create a deadlock with bridge_after_cb which also + * tries to lock control. + */ + ao2_unlock(control); + res = ast_bridge_impart(bridge, + chan, + swap, + NULL, /* features */ + AST_BRIDGE_IMPART_CHAN_DEPARTABLE); + if (res != 0) { + /* ast_bridge_impart failed before it could spawn the depart + * thread. The callbacks aren't called in this case. + * The impart could still fail even if ast_bridge_impart returned + * ok but that's handled by bridge_after_cb. */ - SCOPED_AO2LOCK(lock, control); - - /* Ensure the controlling application is subscribed early enough - * to receive the ChannelEnteredBridge message. This works in concert - * with the subscription handled in the Stasis application execution - * loop */ - app_subscribe_bridge(control->app, bridge); - - /* Save off the channel's PBX */ - ast_assert(control->pbx == NULL); - if (!control->pbx) { - control->pbx = ast_channel_pbx(chan); - ast_channel_pbx_set(chan, NULL); - } - - res = ast_bridge_impart(bridge, - chan, - swap, - NULL, /* features */ - AST_BRIDGE_IMPART_CHAN_DEPARTABLE); - if (res != 0) { - ast_log(LOG_ERROR, "Error adding channel to bridge\n"); - ast_channel_pbx_set(chan, control->pbx); - control->pbx = NULL; - return -1; - } - - ast_assert(stasis_app_get_bridge(control) == NULL); - control->bridge = bridge; - + ast_log(LOG_ERROR, "Error adding channel to bridge\n"); + ao2_lock(control); + ast_channel_pbx_set(chan, control->pbx); + control->pbx = NULL; + control->bridge = NULL; + ao2_unlock(control); + } else { ast_channel_lock(chan); set_interval_hook(chan); ast_channel_unlock(chan); } - return 0; + + return res; } int control_add_channel_to_bridge(struct stasis_app_control *control, struct ast_channel *chan, void *data)