From 0bc5829b88a58b4c7595e75c52eaee522286e47e Mon Sep 17 00:00:00 2001
From: Guillaume Roguez <guillaume.roguez@savoirfairelinux.com>
Date: Mon, 17 Oct 2016 15:29:01 -0400
Subject: [PATCH] ice: fix ICE init failure with unreachable TURN

This commit patches PJSIP to not terminate with failure an ICE stream
transport during its initialization if a TURN is set
and this one is unreachable.

In such situation, as we don't try to negotiate with
remaining candidates, we don't let the session have a chance
to succeed with these candidates.

The commit also fixes an "over-trying" of contact
the TURN server: a contact phase is normaly done
PJ_STUN_MAX_TRANSMIT_COUNT (7 currently) time with
an incrementally sized duration between tries.
But the whole process is made PJ_TURN_MAX_DNS_SRV_CNT
time (was 4, changed for 1) even if it's always
the same IP. This leds with our current settings
to an very long time before contact phase timeout
(around 2 minutes!).

We also increase number of STUN servers per transport
(PJ_ICE_MAX_STUN) to 3 as we need 2 entries for
IPv4 and IPv6, and one set by user.

Finaly, our code is changed to not depend on an timeout
now for the Ring account ICE initialization
(the one for SIP), to let TURN registring enough time
to succeed. This fixes also blocking client issue
when it want to cancel the call during this init phase.

Change-Id: I1aa2e95c1668d4706213930526aaccaffbe0538d
Tuleap: #1047
---
 contrib/src/pjproject/fix_turn_fallback.patch | 39 +++++++++++++++++++
 contrib/src/pjproject/ice_config.patch        | 19 +++++----
 contrib/src/pjproject/rules.mak               |  5 ++-
 src/ice_transport.cpp                         |  2 +-
 src/ringdht/ringaccount.cpp                   | 22 +++++------
 src/sip/sipcall.cpp                           |  9 +++--
 6 files changed, 67 insertions(+), 29 deletions(-)
 create mode 100644 contrib/src/pjproject/fix_turn_fallback.patch

diff --git a/contrib/src/pjproject/fix_turn_fallback.patch b/contrib/src/pjproject/fix_turn_fallback.patch
new file mode 100644
index 0000000000..2a54ee15cd
--- /dev/null
+++ b/contrib/src/pjproject/fix_turn_fallback.patch
@@ -0,0 +1,39 @@
+--- a/pjnath/src/pjnath/turn_session.c	2016-09-19 18:21:09.073614574 -0400
++++ b/pjnath/src/pjnath/turn_session.c	2016-09-19 18:21:30.648631620 -0400
+@@ -653,3 +653,3 @@
+ 
+-	cnt = PJ_TURN_MAX_DNS_SRV_CNT;
++	cnt = 1;
+ 	ai = (pj_addrinfo*)
+--- a/pjnath/src/pjnath/ice_strans.c	2016-09-19 18:36:04.180104330 -0400
++++ b/pjnath/src/pjnath/ice_strans.c	2016-09-19 18:37:10.614136809 -0400
+@@ -1304,2 +1304,5 @@
+ 
++		if (!comp->turn[n].sock)
++			continue;
++
+ 	    /* Gather remote addresses for this component */
+@@ -2013,4 +2016,21 @@
+ 	    if (comp->ice_st->state < PJ_ICE_STRANS_STATE_READY) {
+-		sess_fail(comp->ice_st, PJ_ICE_STRANS_OP_INIT,
+-			  "TURN allocation failed", info.last_status);
++			PJ_LOG(4,(comp->ice_st->obj_name,
++					  "TURN allocation failed, disable the associated candidate"));
++			pj_ice_sess_cand *cand = NULL;
++			unsigned i;
++			/* Wait until initialization completes */
++			pj_grp_lock_acquire(comp->ice_st->grp_lock);
++			/* Find relayed candidate in the component */
++			for (i=0; i<comp->cand_cnt; ++i) {
++				if (comp->cand_list[i].type == PJ_ICE_CAND_TYPE_RELAYED &&
++					comp->cand_list[i].transport_id == data->transport_id) {
++					cand = &comp->cand_list[i];
++					break;
++				}
++			}
++			pj_grp_lock_release(comp->ice_st->grp_lock);
++			pj_assert(cand != NULL);
++			// Make session init succeed but disable this TURN server to be used
++			cand->status = PJNATH_ESTUNTIMEDOUT;
++			sess_init_update(comp->ice_st);
+ 	    } else if (comp->turn[tp_idx].err_cnt > 1) {
diff --git a/contrib/src/pjproject/ice_config.patch b/contrib/src/pjproject/ice_config.patch
index e958c0d2eb..a6398f4e29 100644
--- a/contrib/src/pjproject/ice_config.patch
+++ b/contrib/src/pjproject/ice_config.patch
@@ -1,23 +1,22 @@
 --- a/pjnath/include/pjnath/config.h
 +++ b/pjnath/include/pjnath/config.h
-@@ -231,5 +231,5 @@
-  * Default: 16
-  */
+@@ -233,3 +233,3 @@
  #ifndef PJ_ICE_MAX_CAND
 -#   define PJ_ICE_MAX_CAND			    16
 +#   define PJ_ICE_MAX_CAND			    32
  #endif
-@@ -250,5 +250,5 @@
-  * the maximum number of components (PJ_ICE_MAX_COMP) value.
-  */
+@@ -254,3 +254,3 @@
+ #ifndef PJ_ICE_MAX_STUN
+-#   define PJ_ICE_MAX_STUN			    2
++#   define PJ_ICE_MAX_STUN			    3
+ #endif
+@@ -274,3 +274,3 @@
  #ifndef PJ_ICE_COMP_BITS
 -#   define PJ_ICE_COMP_BITS			    1
 +#   define PJ_ICE_COMP_BITS			    2
  #endif
-@@ -301,5 +301,5 @@
-  * Default: 32
-  */
+@@ -325,3 +325,3 @@
  #ifndef PJ_ICE_MAX_CHECKS
 -#   define PJ_ICE_MAX_CHECKS			    32
 +#   define PJ_ICE_MAX_CHECKS			    150
- #endif
+ #endif
\ No newline at end of file
diff --git a/contrib/src/pjproject/rules.mak b/contrib/src/pjproject/rules.mak
index da0e0fb4a0..95e6cbb45c 100644
--- a/contrib/src/pjproject/rules.mak
+++ b/contrib/src/pjproject/rules.mak
@@ -33,8 +33,8 @@ ifdef HAVE_IOS
 PJPROJECT_OPTIONS += --with-ssl=$(PREFIX)
 endif
 
-PJPROJECT_EXTRA_CFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -std=gnu11
-PJPROJECT_EXTRA_CXXFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -std=gnu++11
+PJPROJECT_EXTRA_CFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -DPJ_ICE_MAX_STUN=3
+PJPROJECT_EXTRA_CXXFLAGS = -DPJ_ICE_MAX_CAND=32 -DPJ_ICE_MAX_CHECKS=150 -DPJ_ICE_COMP_BITS=2 -DPJ_ICE_MAX_STUN=3 -std=gnu++11
 
 ifdef HAVE_WIN64
 PJPROJECT_EXTRA_CFLAGS += -DPJ_WIN64=1
@@ -75,6 +75,7 @@ endif
 	$(APPLY) $(SRC)/pjproject/ice_config.patch
 	$(APPLY) $(SRC)/pjproject/multiple_listeners.patch
 	$(APPLY) $(SRC)/pjproject/pj_ice_sess.patch
+	$(APPLY) $(SRC)/pjproject/fix_turn_fallback.patch
 	$(UPDATE_AUTOCONFIG)
 	$(MOVE)
 
diff --git a/src/ice_transport.cpp b/src/ice_transport.cpp
index eaf7f0422a..e0ad0452fe 100644
--- a/src/ice_transport.cpp
+++ b/src/ice_transport.cpp
@@ -937,7 +937,7 @@ IceTransportFactory::IceTransportFactory()
     // v2.4.5 of PJNATH has a default of 100ms but RFC 5389 since version 14 requires
     // a minimum of 500ms on fixed-line links. Our usual case is wireless links.
     // This solves too long ICE exchange by DHT.
-    // Using 500ms with default PJ_STUN_MAX_TRANSMIT_COUNT (7) gives around 31s before timeout.
+    // Using 500ms with default PJ_STUN_MAX_TRANSMIT_COUNT (7) gives around 33s before timeout.
     ice_cfg_.stun_cfg.rto_msec = 500;
 
     // Add local hosts (IPv4, IPv6) as stun candidates
diff --git a/src/ringdht/ringaccount.cpp b/src/ringdht/ringaccount.cpp
index ee14773644..3a48f2c023 100644
--- a/src/ringdht/ringaccount.cpp
+++ b/src/ringdht/ringaccount.cpp
@@ -81,7 +81,6 @@ using std::chrono::system_clock;
 
 static constexpr int ICE_COMPONENTS {1};
 static constexpr int ICE_COMP_SIP_TRANSPORT {0};
-static constexpr int ICE_INIT_TIMEOUT {10};
 static constexpr auto ICE_NEGOTIATION_TIMEOUT = std::chrono::seconds(60);
 static constexpr auto TLS_TIMEOUT = std::chrono::seconds(30);
 const constexpr auto EXPORT_KEY_RENEWAL_TIME = std::chrono::minutes(20);
@@ -141,7 +140,7 @@ RingAccount::createIceTransport(const Args&... args)
     }
 
     auto ice = Manager::instance().getIceTransportFactory().createTransport(args...);
-    if (!ice or ice->waitForInitialization(ICE_INIT_TIMEOUT) <= 0)
+    if (!ice)
         throw std::runtime_error("ICE transport creation failed");
 
     if (const auto& publicIP = getPublishedIpAddress()) {
@@ -254,24 +253,24 @@ RingAccount::newOutgoingCall(const std::string& toUrl)
 
                 call->addSubCall(dev_call);
 
-                auto iceInitTimeout = std::chrono::steady_clock::now() + std::chrono::seconds {ICE_INIT_TIMEOUT};
-
-                manager.addTask([sthis, weak_dev_call, ice, iceInitTimeout, toUri, dev] {
+                manager.addTask([sthis, weak_dev_call, ice, toUri, dev] {
                     auto call = weak_dev_call.lock();
 
+                    // call aborted?
                     if (not call)
                         return false;
 
-                    if (ice->isFailed() or std::chrono::steady_clock::now() >= iceInitTimeout) {
-                        RING_DBG("ice init failed (or timeout)");
-                        call->onFailure();
+                    if (ice->isFailed()) {
+                        RING_ERR("[call:%s] ice init failed", call->getCallId().c_str());
+                        call->onFailure(EIO);
                         return false;
                     }
 
+                    // Loop until ICE transport is initialized.
+                    // Note: we suppose that ICE init routine has a an internal timeout (bounded in time)
+                    // and we let upper layers decide when the call shall be aborded (our first check upper).
                     if (not ice->isInitialized())
-                        return true; // process task again!
-
-                    RING_WARN("ICE initialised");
+                        return true;
 
                     // Next step: sent the ICE data to peer through DHT
                     const dht::Value::Id callvid  = udist(sthis->rand_);
@@ -280,7 +279,6 @@ RingAccount::newOutgoingCall(const std::string& toUrl)
                     dht::Value val { dht::IceCandidates(callvid, ice->getLocalAttributesAndCandidates()) };
                     val.id = vid;
 
-                    //RING_WARN("ICE initialised");
                     sthis->dht_.putEncrypted(
                         callkey, dev.dev,
                         std::move(val),
diff --git a/src/sip/sipcall.cpp b/src/sip/sipcall.cpp
index ca0547e0cc..09382c6274 100644
--- a/src/sip/sipcall.cpp
+++ b/src/sip/sipcall.cpp
@@ -63,7 +63,7 @@ getVideoSettings()
 }
 #endif
 
-static constexpr int DEFAULT_ICE_INIT_TIMEOUT {10}; // seconds
+static constexpr int DEFAULT_ICE_INIT_TIMEOUT {35}; // seconds
 static constexpr int DEFAULT_ICE_NEGO_TIMEOUT {60}; // seconds
 
 // SDP media Ids
@@ -718,8 +718,9 @@ SIPCall::setupLocalSDPFromIce()
         return;
     }
 
-    if (waitForIceInitialization(DEFAULT_ICE_INIT_TIMEOUT) <= 0) {
-        RING_ERR("[call:%s] Local ICE init failed", getCallId().c_str());
+    // we need an initialized ICE to progress further
+    if (iceTransport_->waitForInitialization(DEFAULT_ICE_INIT_TIMEOUT) <= 0) {
+        RING_ERR("[call:%s] Medias' ICE init failed", getCallId().c_str());
         return;
     }
 
@@ -761,7 +762,7 @@ SIPCall::getAllRemoteCandidates()
 bool
 SIPCall::startIce()
 {
-    if (not iceTransport_ or iceTransport_->isFailed())
+    if (not iceTransport_ or iceTransport_->isFailed() or not iceTransport_->isInitialized())
         return false;
     if (iceTransport_->isStarted()) {
         RING_DBG("[call:%s] ICE already started", getCallId().c_str());
-- 
GitLab