From 0bc5829b88a58b4c7595e75c52eaee522286e47e Mon Sep 17 00:00:00 2001 From: Guillaume Roguez <guillaume.roguez@savoirfairelinux.com> Date: Mon, 17 Oct 2016 15:29:01 -0400 Subject: [PATCH] ice: fix ICE init failure with unreachable TURN This commit patches PJSIP to not terminate with failure an ICE stream transport during its initialization if a TURN is set and this one is unreachable. In such situation, as we don't try to negotiate with remaining candidates, we don't let the session have a chance to succeed with these candidates. The commit also fixes an "over-trying" of contact the TURN server: a contact phase is normaly done PJ_STUN_MAX_TRANSMIT_COUNT (7 currently) time with an incrementally sized duration between tries. But the whole process is made PJ_TURN_MAX_DNS_SRV_CNT time (was 4, changed for 1) even if it's always the same IP. This leds with our current settings to an very long time before contact phase timeout (around 2 minutes!). We also increase number of STUN servers per transport (PJ_ICE_MAX_STUN) to 3 as we need 2 entries for IPv4 and IPv6, and one set by user. Finaly, our code is changed to not depend on an timeout now for the Ring account ICE initialization (the one for SIP), to let TURN registring enough time to succeed. This fixes also blocking client issue when it want to cancel the call during this init phase. Change-Id: I1aa2e95c1668d4706213930526aaccaffbe0538d Tuleap: #1047 --- contrib/src/pjproject/fix_turn_fallback.patch | 39 +++++++++++++++++++ contrib/src/pjproject/ice_config.patch | 19 +++++---- contrib/src/pjproject/rules.mak | 5 ++- src/ice_transport.cpp | 2 +- src/ringdht/ringaccount.cpp | 22 +++++------ src/sip/sipcall.cpp | 9 +++-- 6 files changed, 67 insertions(+), 29 deletions(-) create mode 100644 contrib/src/pjproject/fix_turn_fallback.patch diff --git a/contrib/src/pjproject/fix_turn_fallback.patch b/contrib/src/pjproject/fix_turn_fallback.patch new file mode 100644 index 0000000000..2a54ee15cd --- /dev/null +++ b/contrib/src/pjproject/fix_turn_fallback.patch @@ -0,0 +1,39 @@ +--- a/pjnath/src/pjnath/turn_session.c 2016-09-19 18:21:09.073614574 -0400 ++++ b/pjnath/src/pjnath/turn_session.c 2016-09-19 18:21:30.648631620 -0400 +@@ -653,3 +653,3 @@ + +- cnt = PJ_TURN_MAX_DNS_SRV_CNT; ++ cnt = 1; + ai = (pj_addrinfo*) +--- a/pjnath/src/pjnath/ice_strans.c 2016-09-19 18:36:04.180104330 -0400 ++++ b/pjnath/src/pjnath/ice_strans.c 2016-09-19 18:37:10.614136809 -0400 +@@ -1304,2 +1304,5 @@ + ++ if (!comp->turn[n].sock) ++ continue; ++ + /* Gather remote addresses for this component */ +@@ -2013,4 +2016,21 @@ + if (comp->ice_st->state < PJ_ICE_STRANS_STATE_READY) { +- sess_fail(comp->ice_st, PJ_ICE_STRANS_OP_INIT, +- "TURN allocation failed", info.last_status); ++ PJ_LOG(4,(comp->ice_st->obj_name, ++ "TURN allocation failed, disable the associated candidate")); ++ pj_ice_sess_cand *cand = NULL; ++ unsigned i; ++ /* Wait until initialization completes */ ++ pj_grp_lock_acquire(comp->ice_st->grp_lock); ++ /* Find relayed candidate in the component */ ++ for (i=0; i<comp->cand_cnt; ++i) { ++ if (comp->cand_list[i].type == PJ_ICE_CAND_TYPE_RELAYED && ++ comp->cand_list[i].transport_id == data->transport_id) { ++ cand = &comp->cand_list[i]; ++ break; ++ } ++ } ++ pj_grp_lock_release(comp->ice_st->grp_lock); ++ pj_assert(cand != NULL); ++ // Make session init succeed but disable this TURN server to be used ++ cand->status = PJNATH_ESTUNTIMEDOUT; ++ sess_init_update(comp->ice_st); + } else if (comp->turn[tp_idx].err_cnt > 1) { diff --git a/contrib/src/pjproject/ice_config.patch b/contrib/src/pjproject/ice_config.patch index e958c0d2eb..a6398f4e29 100644 --- a/contrib/src/pjproject/ice_config.patch +++ b/contrib/src/pjproject/ice_config.patch @@ -1,23 +1,22 @@ --- a/pjnath/include/pjnath/config.h +++ b/pjnath/include/pjnath/config.h -@@ -231,5 +231,5 @@ - * Default: 16 - */ +@@ -233,3 +233,3 @@ #ifndef PJ_ICE_MAX_CAND -# define PJ_ICE_MAX_CAND 16 +# define PJ_ICE_MAX_CAND 32 #endif -@@ -250,5 +250,5 @@ - * the maximum number of components (PJ_ICE_MAX_COMP) value. - */ +@@ -254,3 +254,3 @@ + #ifndef PJ_ICE_MAX_STUN +-# define PJ_ICE_MAX_STUN 2 ++# define PJ_ICE_MAX_STUN 3 + #endif +@@ -274,3 +274,3 @@ #ifndef PJ_ICE_COMP_BITS -# define PJ_ICE_COMP_BITS 1 +# define PJ_ICE_COMP_BITS 2 #endif -@@ -301,5 +301,5 @@ - * Default: 32 - */ +@@ -325,3 +325,3 @@ #ifndef PJ_ICE_MAX_CHECKS -# define PJ_ICE_MAX_CHECKS 32 +# define PJ_ICE_MAX_CHECKS 150 - #endif + #endif \ No newline at end of file diff --git a/contrib/src/pjproject/rules.mak b/contrib/src/pjproject/rules.mak index da0e0fb4a0..95e6cbb45c 100644 --- a/contrib/src/pjproject/rules.mak +++ b/contrib/src/pjproject/rules.mak @@ -33,8 +33,8 @@ ifdef HAVE_IOS PJPROJECT_OPTIONS += --with-ssl=$(PREFIX) endif -PJPROJECT_EXTRA_CFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -std=gnu11 -PJPROJECT_EXTRA_CXXFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -std=gnu++11 +PJPROJECT_EXTRA_CFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -DPJ_ICE_MAX_STUN=3 +PJPROJECT_EXTRA_CXXFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -DPJ_ICE_MAX_STUN=3 -std=gnu++11 ifdef HAVE_WIN64 PJPROJECT_EXTRA_CFLAGS += -DPJ_WIN64=1 @@ -75,6 +75,7 @@ endif $(APPLY) $(SRC)/pjproject/ice_config.patch $(APPLY) $(SRC)/pjproject/multiple_listeners.patch $(APPLY) $(SRC)/pjproject/pj_ice_sess.patch + $(APPLY) $(SRC)/pjproject/fix_turn_fallback.patch $(UPDATE_AUTOCONFIG) $(MOVE) diff --git a/src/ice_transport.cpp b/src/ice_transport.cpp index eaf7f0422a..e0ad0452fe 100644 --- a/src/ice_transport.cpp +++ b/src/ice_transport.cpp @@ -937,7 +937,7 @@ IceTransportFactory::IceTransportFactory() // v2.4.5 of PJNATH has a default of 100ms but RFC 5389 since version 14 requires // a minimum of 500ms on fixed-line links. Our usual case is wireless links. // This solves too long ICE exchange by DHT. - // Using 500ms with default PJ_STUN_MAX_TRANSMIT_COUNT (7) gives around 31s before timeout. + // Using 500ms with default PJ_STUN_MAX_TRANSMIT_COUNT (7) gives around 33s before timeout. ice_cfg_.stun_cfg.rto_msec = 500; // Add local hosts (IPv4, IPv6) as stun candidates diff --git a/src/ringdht/ringaccount.cpp b/src/ringdht/ringaccount.cpp index ee14773644..3a48f2c023 100644 --- a/src/ringdht/ringaccount.cpp +++ b/src/ringdht/ringaccount.cpp @@ -81,7 +81,6 @@ using std::chrono::system_clock; static constexpr int ICE_COMPONENTS {1}; static constexpr int ICE_COMP_SIP_TRANSPORT {0}; -static constexpr int ICE_INIT_TIMEOUT {10}; static constexpr auto ICE_NEGOTIATION_TIMEOUT = std::chrono::seconds(60); static constexpr auto TLS_TIMEOUT = std::chrono::seconds(30); const constexpr auto EXPORT_KEY_RENEWAL_TIME = std::chrono::minutes(20); @@ -141,7 +140,7 @@ RingAccount::createIceTransport(const Args&... args) } auto ice = Manager::instance().getIceTransportFactory().createTransport(args...); - if (!ice or ice->waitForInitialization(ICE_INIT_TIMEOUT) <= 0) + if (!ice) throw std::runtime_error("ICE transport creation failed"); if (const auto& publicIP = getPublishedIpAddress()) { @@ -254,24 +253,24 @@ RingAccount::newOutgoingCall(const std::string& toUrl) call->addSubCall(dev_call); - auto iceInitTimeout = std::chrono::steady_clock::now() + std::chrono::seconds {ICE_INIT_TIMEOUT}; - - manager.addTask([sthis, weak_dev_call, ice, iceInitTimeout, toUri, dev] { + manager.addTask([sthis, weak_dev_call, ice, toUri, dev] { auto call = weak_dev_call.lock(); + // call aborted? if (not call) return false; - if (ice->isFailed() or std::chrono::steady_clock::now() >= iceInitTimeout) { - RING_DBG("ice init failed (or timeout)"); - call->onFailure(); + if (ice->isFailed()) { + RING_ERR("[call:%s] ice init failed", call->getCallId().c_str()); + call->onFailure(EIO); return false; } + // Loop until ICE transport is initialized. + // Note: we suppose that ICE init routine has a an internal timeout (bounded in time) + // and we let upper layers decide when the call shall be aborded (our first check upper). if (not ice->isInitialized()) - return true; // process task again! - - RING_WARN("ICE initialised"); + return true; // Next step: sent the ICE data to peer through DHT const dht::Value::Id callvid = udist(sthis->rand_); @@ -280,7 +279,6 @@ RingAccount::newOutgoingCall(const std::string& toUrl) dht::Value val { dht::IceCandidates(callvid, ice->getLocalAttributesAndCandidates()) }; val.id = vid; - //RING_WARN("ICE initialised"); sthis->dht_.putEncrypted( callkey, dev.dev, std::move(val), diff --git a/src/sip/sipcall.cpp b/src/sip/sipcall.cpp index ca0547e0cc..09382c6274 100644 --- a/src/sip/sipcall.cpp +++ b/src/sip/sipcall.cpp @@ -63,7 +63,7 @@ getVideoSettings() } #endif -static constexpr int DEFAULT_ICE_INIT_TIMEOUT {10}; // seconds +static constexpr int DEFAULT_ICE_INIT_TIMEOUT {35}; // seconds static constexpr int DEFAULT_ICE_NEGO_TIMEOUT {60}; // seconds // SDP media Ids @@ -718,8 +718,9 @@ SIPCall::setupLocalSDPFromIce() return; } - if (waitForIceInitialization(DEFAULT_ICE_INIT_TIMEOUT) <= 0) { - RING_ERR("[call:%s] Local ICE init failed", getCallId().c_str()); + // we need an initialized ICE to progress further + if (iceTransport_->waitForInitialization(DEFAULT_ICE_INIT_TIMEOUT) <= 0) { + RING_ERR("[call:%s] Medias' ICE init failed", getCallId().c_str()); return; } @@ -761,7 +762,7 @@ SIPCall::getAllRemoteCandidates() bool SIPCall::startIce() { - if (not iceTransport_ or iceTransport_->isFailed()) + if (not iceTransport_ or iceTransport_->isFailed() or not iceTransport_->isInitialized()) return false; if (iceTransport_->isStarted()) { RING_DBG("[call:%s] ICE already started", getCallId().c_str()); -- GitLab