diff --git a/src/media/video/video_receive_thread.cpp b/src/media/video/video_receive_thread.cpp
index 6a86e8f6877c29a246cb60044d59e6db1b892124..2924a326aa9af9d785d198d9f01753a901c4f4b2 100644
--- a/src/media/video/video_receive_thread.cpp
+++ b/src/media/video/video_receive_thread.cpp
@@ -52,6 +52,7 @@ VideoReceiveThread::VideoReceiveThread(const std::string& id,
     , stream_(sdp)
     , sdpContext_(stream_.str().size(), false, &readFunction, 0, 0, this)
     , sink_ {Manager::instance().createSinkClient(id)}
+    , isVideoConfigured_(false)
     , mtu_(mtu)
     , rotation_(0)
     , loop_(std::bind(&VideoReceiveThread::setup, this),
@@ -119,36 +120,6 @@ bool VideoReceiveThread::setup()
         // Now replace our custom AVIOContext with one that will read packets
         videoDecoder_->setIOContext(demuxContext_.get());
     }
-
-    if (videoDecoder_->setupVideo()) {
-        JAMI_ERR("decoder IO startup failed");
-        return false;
-    }
-
-    // Default size from input video
-    if (dstWidth_ == 0 and dstHeight_ == 0) {
-        dstWidth_ = videoDecoder_->getWidth();
-        dstHeight_ = videoDecoder_->getHeight();
-    }
-
-    if (not sink_->start()) {
-        JAMI_ERR("RX: sink startup failed");
-        return false;
-    }
-
-    auto conf = Manager::instance().getConferenceFromCallID(id_);
-    if (!conf)
-        exitConference();
-
-    // Send remote video codec in SmartInfo
-    Smartools::getInstance().setRemoteVideoCodec(videoDecoder_->getDecoderName(), id_);
-
-    // Send the resolution in smartInfo
-    Smartools::getInstance().setResolution(id_, dstWidth_, dstHeight_);
-
-    if (onSetupSuccess_)
-        onSetupSuccess_(MEDIA_VIDEO);
-
     return true;
 }
 
@@ -187,6 +158,9 @@ void VideoReceiveThread::addIOContext(SocketPair& socketPair)
 
 void VideoReceiveThread::decodeFrame()
 {
+    if (!configureVideoOutput()) {
+        return;
+    }
     auto status = videoDecoder_->decode();
     if (status == MediaDemuxer::Status::EndOfFile ||
         status == MediaDemuxer::Status::ReadError) {
@@ -198,6 +172,48 @@ void VideoReceiveThread::decodeFrame()
     }
 }
 
+bool VideoReceiveThread::configureVideoOutput()
+{
+    if (isVideoConfigured_) {
+        return true;
+    }
+    if (!loop_.isRunning()) {
+        return false;
+    }
+   if (videoDecoder_->setupVideo()) {
+        JAMI_ERR("decoder IO startup failed");
+        loop_.stop();
+        return false;
+    }
+
+    // Default size from input video
+    if (dstWidth_ == 0 and dstHeight_ == 0) {
+        dstWidth_ = videoDecoder_->getWidth();
+        dstHeight_ = videoDecoder_->getHeight();
+    }
+
+    if (not sink_->start()) {
+        JAMI_ERR("RX: sink startup failed");
+        loop_.stop();
+        return false;
+    }
+
+    auto conf = Manager::instance().getConferenceFromCallID(id_);
+    if (!conf)
+        exitConference();
+
+    // Send remote video codec in SmartInfo
+    Smartools::getInstance().setRemoteVideoCodec(videoDecoder_->getDecoderName(), id_);
+
+    // Send the resolution in smartInfo
+    Smartools::getInstance().setResolution(id_, dstWidth_, dstHeight_);
+
+    if (onSetupSuccess_)
+        onSetupSuccess_(MEDIA_VIDEO);
+    isVideoConfigured_ = true;
+    return true;
+}
+
 void VideoReceiveThread::enterConference()
 {
     if (!loop_.isRunning())
diff --git a/src/media/video/video_receive_thread.h b/src/media/video/video_receive_thread.h
index 757bd8829d1be9d25d8123867e37d96357c184c1..9f1379e8e1f5ebcddaa4ba3e4a26117314e65d8a 100644
--- a/src/media/video/video_receive_thread.h
+++ b/src/media/video/video_receive_thread.h
@@ -91,6 +91,7 @@ private:
     std::unique_ptr<MediaIOHandle> demuxContext_;
     std::shared_ptr<SinkClient> sink_;
     bool isReset_;
+    bool isVideoConfigured_;
     uint16_t mtu_;
     int rotation_;
     std::shared_ptr<AVBufferRef> displayMatrix_;
@@ -99,6 +100,7 @@ private:
     void decodeFrame();
     static int interruptCb(void *ctx);
     static int readFunction(void *opaque, uint8_t *buf, int buf_size);
+    bool configureVideoOutput();
 
     std::function<void(MediaType)> onSetupSuccess_;
 
diff --git a/src/sip/sipcall.cpp b/src/sip/sipcall.cpp
index 174b149d4f61e91b5596bc4c9ffa7e0f5e7e17d8..f90d5488b96529694a3535c378cb241fac1ec4ad 100644
--- a/src/sip/sipcall.cpp
+++ b/src/sip/sipcall.cpp
@@ -982,6 +982,15 @@ SIPCall::startAllMedia()
 
         rtp->setSuccessfulSetupCb([this](MediaType type){ rtpSetupSuccess(type); });
 
+#ifdef ENABLE_VIDEO
+        videortp_->setRequestKeyFrameCallback([wthis = weak()] {
+            runOnMainThread([wthis] {
+                if (auto this_ = wthis.lock())
+                    this_->requestKeyframe();
+            });
+        });
+#endif
+
         // Not restarting media loop on hold as it's a huge waste of CPU ressources
         // because of the audio loop
         if (getState() != CallState::HOLD) {
@@ -1006,15 +1015,6 @@ SIPCall::startAllMedia()
         }
     }
 
-#ifdef ENABLE_VIDEO
-    videortp_->setRequestKeyFrameCallback([wthis = weak()] {
-        runOnMainThread([wthis] {
-            if (auto this_ = wthis.lock())
-                this_->requestKeyframe();
-        });
-    });
-#endif
-
     if (not isSubcall() and peerHolding_ != peer_holding) {
         peerHolding_ = peer_holding;
         emitSignal<DRing::CallSignal::PeerHold>(getCallId(), peerHolding_);