diff --git a/src/media/audio/audio_sender.cpp b/src/media/audio/audio_sender.cpp
index 6dd17325281c485dd9f9d68e5454b2a638e4d7ab..c4dd029821a25e12301c8335f5cfe6f9a260faf0 100644
--- a/src/media/audio/audio_sender.cpp
+++ b/src/media/audio/audio_sender.cpp
@@ -72,10 +72,12 @@ AudioSender::setup(SocketPair& socketPair)
         RING_DBG("audioEncoder_->openOutput %s", dest_.c_str());
         audioEncoder_->openOutput(dest_, "rtp");
         audioEncoder_->setOptions(args_);
+        auto codec = std::static_pointer_cast<AccountAudioCodecInfo>(args_.codec);
+        auto ms = MediaStream("audio sender", codec->audioformat);
+        audioEncoder_->setOptions(ms);
         audioEncoder_->addStream(args_.codec->systemCodecInfo);
         audioEncoder_->setInitSeqVal(seqVal_);
         audioEncoder_->setIOContext(muxContext_->getContext());
-        audioEncoder_->startIO();
     } catch (const MediaEncoderException &e) {
         RING_ERR("%s", e.what());
         return false;
diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp
index affc929814c50e47bb23ee64979e85893fc63787..5991909b1c7ef9b7eb2adb4e81faff01fc60ba4b 100644
--- a/src/media/media_encoder.cpp
+++ b/src/media/media_encoder.cpp
@@ -30,6 +30,7 @@
 #include "logger.h"
 #include "manager.h"
 #include "string_utils.h"
+#include "system_codec_container.h"
 
 #ifdef RING_ACCEL
 #include "video/accel.h"
@@ -75,74 +76,44 @@ MediaEncoder::~MediaEncoder()
 }
 
 void
-MediaEncoder::setDeviceOptions(const DeviceParams& args)
+MediaEncoder::setOptions(const MediaStream& opts)
 {
-    device_ = args;
-    // Make sure width and height are even (required by x264)
-    // This is especially for image/gif streaming, as video files and cameras usually have even resolutions
-    device_.width -= device_.width % 2;
-    device_.height -= device_.height % 2;
-    if (device_.width)
-        libav_utils::setDictValue(&options_, "width", ring::to_string(device_.width));
-    if (device_.height)
-        libav_utils::setDictValue(&options_, "height", ring::to_string(device_.height));
-    if (not device_.framerate)
-        device_.framerate = 30;
-    libav_utils::setDictValue(&options_, "framerate", ring::to_string(device_.framerate.real()));
+    if (!opts.isValid()) {
+        RING_ERR() << "Invalid options";
+        return;
+    }
+
+    if (opts.isVideo) {
+        videoOpts_ = opts;
+        // Make sure width and height are even (required by x264)
+        // This is especially for image/gif streaming, as video files and cameras usually have even resolutions
+        videoOpts_.width -= videoOpts_.width % 2;
+        videoOpts_.height -= videoOpts_.height % 2;
+        if (not videoOpts_.frameRate)
+            videoOpts_.frameRate = 30;
+    } else {
+        audioOpts_ = opts;
+    }
 }
 
 void
 MediaEncoder::setOptions(const MediaDescription& args)
 {
-    codec_ = args.codec;
-
     libav_utils::setDictValue(&options_, "payload_type", ring::to_string(args.payload_type));
     libav_utils::setDictValue(&options_, "max_rate", ring::to_string(args.codec->bitrate));
     libav_utils::setDictValue(&options_, "crf", ring::to_string(args.codec->quality));
 
-    if (args.codec->systemCodecInfo.mediaType == MEDIA_AUDIO) {
-        auto accountAudioCodec = std::static_pointer_cast<AccountAudioCodecInfo>(args.codec);
-        if (accountAudioCodec->audioformat.sample_rate)
-            libav_utils::setDictValue(&options_, "sample_rate",
-                        ring::to_string(accountAudioCodec->audioformat.sample_rate));
-
-        if (accountAudioCodec->audioformat.nb_channels)
-            libav_utils::setDictValue(&options_, "channels",
-                        ring::to_string(accountAudioCodec->audioformat.nb_channels));
-
-        if (accountAudioCodec->audioformat.sample_rate && accountAudioCodec->audioformat.nb_channels)
-            libav_utils::setDictValue(&options_, "frame_size",
-                        ring::to_string(static_cast<unsigned>(0.02 * accountAudioCodec->audioformat.sample_rate)));
-    }
-
     if (not args.parameters.empty())
         libav_utils::setDictValue(&options_, "parameters", args.parameters);
 }
 
 void
-MediaEncoder::setOptions(std::map<std::string, std::string> options)
+MediaEncoder::setMetadata(const std::string& title, const std::string& description)
 {
-    const auto& titleIt = options.find("title");
-    if (titleIt != options.end() and not titleIt->second.empty())
-        libav_utils::setDictValue(&outputCtx_->metadata, titleIt->first, titleIt->second);
-    const auto& descIt = options.find("description");
-    if (descIt != options.end() and not descIt->second.empty())
-        libav_utils::setDictValue(&outputCtx_->metadata, descIt->first, descIt->second);
-
-    auto bitrate = SystemCodecInfo::DEFAULT_MAX_BITRATE;
-    auto quality = SystemCodecInfo::DEFAULT_CODEC_QUALITY;
-    // ensure all options retrieved later on are in options_ (insert does nothing if key exists)
-    options.insert({"max_rate", ring::to_string(bitrate)});
-    options.insert({"crf", ring::to_string(quality)});
-    options.insert({"sample_rate", "8000"});
-    options.insert({"channels", "2"});
-    int sampleRate = atoi(options["sample_rate"].c_str());
-    options.insert({"frame_size", ring::to_string(static_cast<unsigned>(0.02*sampleRate))});
-    options.insert({"width", "320"});
-    options.insert({"height", "240"});
-    options.insert({"framerate", "30.00"});
-    for (const auto& it : options)
-        libav_utils::setDictValue(&options_, it.first, it.second);
+    if (not title.empty())
+        libav_utils::setDictValue(&outputCtx_->metadata, "title", title);
+    if (not description.empty())
+        libav_utils::setDictValue(&outputCtx_->metadata, "description", description);
 }
 
 void
@@ -166,7 +137,10 @@ MediaEncoder::getLastSeqValue()
 std::string
 MediaEncoder::getEncoderName() const
 {
-    return encoders_[currentStreamIdx_]->codec->name;
+    if (videoOpts_.isValid())
+        return videoCodec_;
+    else
+        return audioCodec_;
 }
 
 void
@@ -185,6 +159,32 @@ MediaEncoder::openOutput(const std::string& filename, const std::string& format)
 
 int
 MediaEncoder::addStream(const SystemCodecInfo& systemCodecInfo)
+{
+    if (systemCodecInfo.mediaType == MEDIA_AUDIO) {
+        audioCodec_ = systemCodecInfo.name;
+        return initStream(systemCodecInfo);
+    } else {
+        videoCodec_ = systemCodecInfo.name;
+        // TODO only support 1 audio stream and 1 video stream per encoder
+        if (audioOpts_.isValid())
+            return 1; // stream will be added to AVFormatContext after audio stream
+        else
+            return 0; // only a video stream
+    }
+}
+
+int
+MediaEncoder::initStream(const std::string& codecName)
+{
+    const auto codecInfo = getSystemCodecContainer()->searchCodecByName(codecName, MEDIA_ALL);
+    if (codecInfo)
+        return initStream(*codecInfo);
+    else
+        return -1;
+}
+
+int
+MediaEncoder::initStream(const SystemCodecInfo& systemCodecInfo)
 {
     AVCodec* outputCodec = nullptr;
     AVCodecContext* encoderCtx = nullptr;
@@ -192,7 +192,7 @@ MediaEncoder::addStream(const SystemCodecInfo& systemCodecInfo)
     if (systemCodecInfo.mediaType == MEDIA_VIDEO) {
         if (enableAccel_) {
             if (accel_ = video::HardwareAccel::setupEncoder(
-                static_cast<AVCodecID>(systemCodecInfo.avcodecId), device_.width, device_.height)) {
+                static_cast<AVCodecID>(systemCodecInfo.avcodecId), videoOpts_.width, videoOpts_.height)) {
                 outputCodec = avcodec_find_encoder_by_name(accel_->getCodecName().c_str());
             }
         } else {
@@ -337,10 +337,10 @@ MediaEncoder::addStream(const SystemCodecInfo& systemCodecInfo)
 }
 
 void
-MediaEncoder::setIOContext(AVIOContext* ioctx)
+MediaEncoder::openIOContext()
 {
-    if (ioctx) {
-        outputCtx_->pb = ioctx;
+    if (ioCtx_) {
+        outputCtx_->pb = ioCtx_;
         outputCtx_->packet_size = outputCtx_->pb->buffer_size;
     } else {
         int ret = 0;
@@ -352,7 +352,7 @@ MediaEncoder::setIOContext(AVIOContext* ioctx)
         if (!(outputCtx_->oformat->flags & AVFMT_NOFILE)) {
             if ((ret = avio_open(&outputCtx_->pb, filename, AVIO_FLAG_WRITE)) < 0) {
                 std::stringstream ss;
-                ss << "Could not set IO context for '" << filename << "': " << libav_utils::getError(ret);
+                ss << "Could not open IO context for '" << filename << "': " << libav_utils::getError(ret);
                 throw MediaEncoderException(ss.str().c_str());
             }
         }
@@ -362,6 +362,8 @@ MediaEncoder::setIOContext(AVIOContext* ioctx)
 void
 MediaEncoder::startIO()
 {
+    if (!outputCtx_->pb)
+        openIOContext();
     if (avformat_write_header(outputCtx_, options_ ? &options_ : nullptr)) {
         RING_ERR("Could not write header for output file... check codec parameters");
         throw MediaEncoderException("Failed to write output file header");
@@ -372,6 +374,7 @@ MediaEncoder::startIO()
 #else
     av_dump_format(outputCtx_, 0, outputCtx_->filename, 1);
 #endif
+    initialized_ = true;
 }
 
 #ifdef RING_VIDEO
@@ -379,6 +382,11 @@ int
 MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
                      int64_t frame_number)
 {
+    if (!initialized_) {
+        initStream(videoCodec_);
+        startIO();
+    }
+
     /* Prepare a frame suitable to our encoder frame format,
      * keeping also the input aspect ratio.
      */
@@ -426,6 +434,13 @@ MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
 int
 MediaEncoder::encodeAudio(AudioFrame& frame)
 {
+    if (!initialized_) {
+        // Initialize on first video frame, or first audio frame if no video stream
+        if (not videoOpts_.isValid())
+            startIO();
+        else
+            return 0;
+    }
     frame.pointer()->pts = sent_samples;
     sent_samples += frame.pointer()->nb_samples;
     encode(frame.pointer(), currentStreamIdx_);
@@ -435,6 +450,16 @@ MediaEncoder::encodeAudio(AudioFrame& frame)
 int
 MediaEncoder::encode(AVFrame* frame, int streamIdx)
 {
+    if (!initialized_) {
+        // Initialize on first video frame, or first audio frame if no video stream
+        bool isVideo = (frame->width > 0 && frame->height > 0);
+        if (isVideo or not videoOpts_.isValid()) {
+            initStream(videoCodec_);
+            startIO();
+        } else {
+            return 0;
+        }
+    }
     int ret = 0;
     AVCodecContext* encoderCtx = encoders_[streamIdx];
     AVPacket pkt;
@@ -536,34 +561,15 @@ MediaEncoder::prepareEncoderContext(AVCodec* outputCodec, bool is_video)
 
     if (is_video) {
         // resolution must be a multiple of two
-        if (device_.width && device_.height) {
-            encoderCtx->width = device_.width;
-            encoderCtx->height = device_.height;
-        } else {
-            encoderCtx->width = std::atoi(libav_utils::getDictValue(options_, "width"));
-            encoderCtx->height = std::atoi(libav_utils::getDictValue(options_, "height"));
-        }
+        encoderCtx->width = videoOpts_.width;
+        encoderCtx->height = videoOpts_.height;
 
         // satisfy ffmpeg: denominator must be 16bit or less value
         // time base = 1/FPS
-        if (device_.framerate) {
-            av_reduce(&encoderCtx->framerate.num, &encoderCtx->framerate.den,
-                      device_.framerate.numerator(), device_.framerate.denominator(),
-                      (1U << 16) - 1);
-            encoderCtx->time_base = av_inv_q(encoderCtx->framerate);
-        } else {
-            // get from options_, else default to 30 fps
-            auto v = libav_utils::getDictValue(options_, "framerate");
-            AVRational framerate = AVRational{30, 1};
-            if (v)
-                av_parse_ratio_quiet(&framerate, v, 120);
-            if (framerate.den == 0)
-                framerate.den = 1;
-            av_reduce(&encoderCtx->framerate.num, &encoderCtx->framerate.den,
-                      framerate.num, framerate.den,
-                      (1U << 16) - 1);
-            encoderCtx->time_base = av_inv_q(encoderCtx->framerate);
-        }
+        av_reduce(&encoderCtx->framerate.num, &encoderCtx->framerate.den,
+                  videoOpts_.frameRate.numerator(), videoOpts_.frameRate.denominator(),
+                  (1U << 16) - 1);
+        encoderCtx->time_base = av_inv_q(encoderCtx->framerate);
 
         // emit one intra frame every gop_size frames
         encoderCtx->max_b_frames = 0;
@@ -580,36 +586,21 @@ MediaEncoder::prepareEncoderContext(AVCodec* outputCodec, bool is_video)
         // encoderCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
     } else {
         encoderCtx->sample_fmt = AV_SAMPLE_FMT_S16;
-        auto v = libav_utils::getDictValue(options_, "sample_rate");
-        if (v) {
-            encoderCtx->sample_rate = atoi(v);
-            encoderCtx->time_base = AVRational{1, encoderCtx->sample_rate};
-        } else {
-            RING_WARN("[%s] No sample rate set", encoderName);
-            encoderCtx->sample_rate = 8000;
-        }
-
-        v = libav_utils::getDictValue(options_, "channels");
-        if (v) {
-            auto c = std::atoi(v);
-            if (c > 2 or c < 1) {
-                RING_WARN("[%s] Clamping invalid channel value %d", encoderName, c);
-                c = 1;
-            }
-            encoderCtx->channels = c;
+        encoderCtx->sample_rate = std::max(8000, audioOpts_.sampleRate);
+        encoderCtx->time_base = AVRational{1, encoderCtx->sample_rate};
+        if (audioOpts_.nbChannels > 2 || audioOpts_.nbChannels < 1) {
+            encoderCtx->channels = std::max(std::min(audioOpts_.nbChannels, 1), 2);
+            RING_ERR() << "[" << encoderName << "] Clamping invalid channel count: "
+                << audioOpts_.nbChannels << " -> " << encoderCtx->channels;
         } else {
-            RING_WARN("[%s] Channels not set", encoderName);
-            encoderCtx->channels = 1;
+            encoderCtx->channels = audioOpts_.nbChannels;
         }
-
-        encoderCtx->channel_layout = encoderCtx->channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
-
-        v = libav_utils::getDictValue(options_, "frame_size");
-        if (v) {
-            encoderCtx->frame_size = atoi(v);
-            RING_DBG("[%s] Frame size %d", encoderName, encoderCtx->frame_size);
+        encoderCtx->channel_layout = av_get_default_channel_layout(encoderCtx->channels);
+        if (audioOpts_.frameSize) {
+            encoderCtx->frame_size = audioOpts_.frameSize;
+            RING_DBG() << "[" << encoderName << "] Frame size " << encoderCtx->frame_size;
         } else {
-            RING_WARN("[%s] Frame size not set", encoderName);
+            RING_WARN() << "[" << encoderName << "] Frame size not set";
         }
     }
 
@@ -679,7 +670,10 @@ MediaEncoder::extractProfileLevelID(const std::string &parameters,
 bool
 MediaEncoder::useCodec(const ring::AccountCodecInfo* codec) const noexcept
 {
-    return codec_.get() == codec;
+    if (codec->systemCodecInfo.mediaType == MEDIA_VIDEO)
+        return videoCodec_ == codec->systemCodecInfo.name;
+    else
+        return audioCodec_ == codec->systemCodecInfo.name;
 }
 
 #ifdef RING_ACCEL
@@ -748,7 +742,6 @@ MediaEncoder::readConfig(AVDictionary** dict, AVCodecContext* encoderCtx)
                 return;
             }
             // If users want to change these, they should use the settings page.
-            std::vector<std::string> ignoredKeys = { "width", "height", "framerate", "sample_rate", "channels", "frame_size", "parameters" };
             for (Json::Value::const_iterator it = config.begin(); it != config.end(); ++it) {
                 Json::Value v = *it;
                 if (!it.key().isConvertibleTo(Json::ValueType::stringValue)
@@ -759,7 +752,7 @@ MediaEncoder::readConfig(AVDictionary** dict, AVCodecContext* encoderCtx)
                 const auto& key = it.key().asString();
                 const auto& value = v.asString();
                 // provides a way to override all AVCodecContext fields MediaEncoder sets
-                if (std::find(ignoredKeys.cbegin(), ignoredKeys.cend(), key) != ignoredKeys.cend())
+                if (key == "parameters") // Used by MediaEncoder for profile-level-id, ignore
                     continue;
                 else if (value.empty())
                     libav_utils::setDictValue(dict, key, nullptr);
diff --git a/src/media/media_encoder.h b/src/media/media_encoder.h
index 0a6d598de6a9a710309880a8b147b04b35f70eae..0c253543225d9ebe280c53b98a227aa51a665101 100644
--- a/src/media/media_encoder.h
+++ b/src/media/media_encoder.h
@@ -32,7 +32,6 @@
 #include "noncopyable.h"
 #include "media_buffer.h"
 #include "media_codec.h"
-#include "media_device.h"
 #include "media_stream.h"
 
 #include <map>
@@ -67,12 +66,11 @@ public:
     ~MediaEncoder();
 
     void openOutput(const std::string& filename, const std::string& format="");
-    void setDeviceOptions(const DeviceParams& args);
+    void setMetadata(const std::string& title, const std::string& description);
+    void setOptions(const MediaStream& opts);
     void setOptions(const MediaDescription& args);
-    void setOptions(std::map<std::string, std::string> options);
     int addStream(const SystemCodecInfo& codec);
-    void setIOContext(AVIOContext* ioctx);
-    void startIO();
+    void setIOContext(AVIOContext* ioctx) { ioCtx_ = ioctx; }
 
     bool send(AVPacket& packet, int streamIdx = -1);
 
@@ -91,8 +89,8 @@ public:
     /* getWidth and getHeight return size of the encoded frame.
      * Values have meaning only after openLiveOutput call.
      */
-    int getWidth() const { return device_.width; }
-    int getHeight() const { return device_.height; }
+    int getWidth() const { return videoOpts_.width; };
+    int getHeight() const { return videoOpts_.height; };
 
     void setInitSeqVal(uint16_t seqVal);
     uint16_t getLastSeqValue();
@@ -112,11 +110,17 @@ private:
     AVCodecContext* prepareEncoderContext(AVCodec* outputCodec, bool is_video);
     void forcePresetX264(AVCodecContext* encoderCtx);
     void extractProfileLevelID(const std::string &parameters, AVCodecContext *ctx);
+    int initStream(const std::string& codecName);
+    int initStream(const SystemCodecInfo& systemCodecInfo);
+    void openIOContext();
+    void startIO();
 
     std::vector<AVCodecContext*> encoders_;
     AVFormatContext *outputCtx_ = nullptr;
+    AVIOContext* ioCtx_ = nullptr;
     int currentStreamIdx_ = -1;
     unsigned sent_samples = 0;
+    bool initialized_ {false};
 
 #ifdef RING_VIDEO
     video::VideoScaler scaler_;
@@ -133,9 +137,11 @@ private:
 
 protected:
     void readConfig(AVDictionary** dict, AVCodecContext* encoderCtx);
-    AVDictionary *options_ = nullptr;
-    DeviceParams device_;
-    std::shared_ptr<const AccountCodecInfo> codec_;
+    AVDictionary* options_ = nullptr;
+    MediaStream videoOpts_;
+    MediaStream audioOpts_;
+    std::string videoCodec_;
+    std::string audioCodec_;
 };
 
 } // namespace ring
diff --git a/src/media/media_recorder.cpp b/src/media/media_recorder.cpp
index 68715e035631195d4de4d1ce7762e65198457acf..bdd25c66b306d79f9e9cc1cee631eca6e35abb43 100644
--- a/src/media/media_recorder.cpp
+++ b/src/media/media_recorder.cpp
@@ -243,8 +243,6 @@ MediaRecorder::initRecord()
     // need to get encoder parameters before calling openFileOutput
     // openFileOutput needs to be called before adding any streams
 
-    std::map<std::string, std::string> encoderOptions;
-
     std::stringstream timestampString;
     timestampString << std::put_time(&startTime_, "%Y-%m-%d %H:%M:%S");
 
@@ -254,13 +252,17 @@ MediaRecorder::initRecord()
         title_ = ss.str();
     }
     title_ = replaceAll(title_, "%TIMESTAMP", timestampString.str());
-    encoderOptions["title"] = title_;
 
     if (description_.empty()) {
         description_ = "Recorded with Jami https://jami.net";
     }
     description_ = replaceAll(description_, "%TIMESTAMP", timestampString.str());
-    encoderOptions["description"] = description_;
+
+    encoder_->setMetadata(title_, description_);
+    encoder_->openOutput(getPath());
+#ifdef RING_ACCEL
+    encoder_->enableAccel(false); // TODO recorder has problems with hardware encoding
+#endif
 
     videoFilter_.reset();
     if (hasVideo_) {
@@ -269,11 +271,7 @@ MediaRecorder::initRecord()
             RING_ERR() << "Could not retrieve video recorder stream properties";
             return -1;
         }
-        encoderOptions["width"] = std::to_string(videoStream.width);
-        encoderOptions["height"] = std::to_string(videoStream.height);
-        std::stringstream fps;
-        fps << videoStream.frameRate;
-        encoderOptions["framerate"] = fps.str();
+        encoder_->setOptions(videoStream);
     }
 
     audioFilter_.reset();
@@ -283,13 +281,9 @@ MediaRecorder::initRecord()
             RING_ERR() << "Could not retrieve audio recorder stream properties";
             return -1;
         }
-        encoderOptions["sample_rate"] = std::to_string(audioStream.sampleRate);
-        encoderOptions["channels"] = std::to_string(audioStream.nbChannels);
+        encoder_->setOptions(audioStream);
     }
 
-    encoder_->openOutput(getPath());
-    encoder_->setOptions(encoderOptions);
-
     if (hasVideo_) {
         auto videoCodec = std::static_pointer_cast<ring::SystemVideoCodecInfo>(
             getSystemCodecContainer()->searchCodecByName("VP8", ring::MEDIA_VIDEO));
@@ -310,13 +304,7 @@ MediaRecorder::initRecord()
         }
     }
 
-    try {
-        encoder_->setIOContext(nullptr);
-        encoder_->startIO();
-    } catch (const MediaEncoderException& e) {
-        RING_ERR() << "Could not start recorder: " << e.what();
-        return -1;
-    }
+    encoder_->setIOContext(nullptr);
 
     RING_DBG() << "Recording initialized";
     return 0;
diff --git a/src/media/video/video_sender.cpp b/src/media/video/video_sender.cpp
index 9ee13f8514cbb8c3c2ef79bae1d285a6b574cbef..205b24c1fc869546065f46c71fab477fcdba5d51 100644
--- a/src/media/video/video_sender.cpp
+++ b/src/media/video/video_sender.cpp
@@ -26,6 +26,7 @@
 #include "client/videomanager.h"
 #include "logger.h"
 #include "manager.h"
+#include "media_device.h"
 #include "smartools.h"
 #include "sip/sipcall.h"
 #ifdef RING_ACCEL
@@ -51,14 +52,12 @@ VideoSender::VideoSender(const std::string& dest, const DeviceParams& dev,
 {
     keyFrameFreq_ = dev.framerate.numerator() * KEY_FRAME_PERIOD;
     videoEncoder_->openOutput(dest, "rtp");
-    videoEncoder_->setDeviceOptions(dev);
+    auto opts = MediaStream("video sender", AV_PIX_FMT_YUV420P, 1 / (rational<int>)dev.framerate, dev.width, dev.height, 1, (rational<int>)dev.framerate);
+    videoEncoder_->setOptions(opts);
     videoEncoder_->setOptions(args);
     videoEncoder_->addStream(args.codec->systemCodecInfo);
     videoEncoder_->setInitSeqVal(seqVal);
     videoEncoder_->setIOContext(muxContext_->getContext());
-    videoEncoder_->startIO();
-
-    videoEncoder_->print_sdp();
 
     // Send local video codec in SmartInfo
     Smartools::getInstance().setLocalVideoCodec(videoEncoder_->getEncoderName());
@@ -72,7 +71,6 @@ VideoSender::~VideoSender()
     videoEncoder_->flush();
 }
 
-
 void
 VideoSender::encodeAndSendVideo(VideoFrame& input_frame)
 {
@@ -117,6 +115,10 @@ VideoSender::encodeAndSendVideo(VideoFrame& input_frame)
         if (videoEncoder_->encode(swFrame, is_keyframe, frameNumber_++) < 0)
             RING_ERR("encoding failed");
     }
+#ifdef DEBUG_SDP
+    if (frameNumber_ == 1) // video stream is lazy initialized, wait for first frame
+        videoEncoder_->print_sdp();
+#endif
 }
 
 void
diff --git a/src/media/video/video_sender.h b/src/media/video/video_sender.h
index b98c178b0b630da1fc38dc21658c6559aad3cc2b..220365c3776194dc37e3bee7ec1488ae06117565 100644
--- a/src/media/video/video_sender.h
+++ b/src/media/video/video_sender.h
@@ -34,6 +34,7 @@
 // Forward declarations
 namespace ring {
 class SocketPair;
+struct DeviceParams;
 struct AccountVideoCodecInfo;
 }