diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp
index 0d343641dd9d2d873b9f7a125d8adccbb492570c..5454afe8267990a7511f1223591d50593612fa81 100644
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@@ -205,7 +205,8 @@ AudioSender::getLastSeqValue()
 void
 AudioSender::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
-    audioEncoder_->startRecorder(rec);
+    if (audioEncoder_)
+        audioEncoder_->startRecorder(rec);
 }
 
 class AudioReceiveThread
@@ -383,7 +384,8 @@ AudioReceiveThread::startLoop()
 void
 AudioReceiveThread::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
-    audioDecoder_->startRecorder(rec);
+    if (audioDecoder_)
+        audioDecoder_->startRecorder(rec);
 }
 
 AudioRtpSession::AudioRtpSession(const std::string& id)
@@ -517,8 +519,10 @@ AudioRtpSession::setMuted(bool isMuted)
 void
 AudioRtpSession::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
-    receiveThread_->startRecorder(rec);
-    sender_->startRecorder(rec);
+    if (receiveThread_)
+        receiveThread_->startRecorder(rec);
+    if (sender_)
+        sender_->startRecorder(rec);
 }
 
 } // namespace ring
diff --git a/src/media/media_decoder.cpp b/src/media/media_decoder.cpp
index d3a4a3aadfac57d69514ca9375c2341aadd3831d..166c916fd39923b0f26b9d84b97225afa289406f 100644
--- a/src/media/media_decoder.cpp
+++ b/src/media/media_decoder.cpp
@@ -291,7 +291,7 @@ MediaDecoder::decode(VideoFrame& result)
 #endif
         if (auto rec = recorder_.lock()) {
             if (!recordingStarted_) {
-                auto ms = MediaStream("", avStream_);
+                auto ms = MediaStream("", avStream_, frame->pts);
                 ms.format = frame->format; // might not match avStream_ if accel is used
                 if (rec->addStream(true, true, ms) >= 0)
                     recordingStarted_ = true;
@@ -357,7 +357,7 @@ MediaDecoder::decode(const AudioFrame& decodedFrame)
 
         if (auto rec = recorder_.lock()) {
             if (!recordingStarted_) {
-                auto ms = MediaStream("", avStream_);
+                auto ms = MediaStream("", avStream_, frame->pts);
                 if (rec->addStream(false, true, ms) >= 0)
                     recordingStarted_ = true;
                 else
@@ -518,8 +518,6 @@ void
 MediaDecoder::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
     // recording will start once we can send an AVPacket to the recorder
-    if (inputDecoder_->type != AVMEDIA_TYPE_AUDIO)
-        return;
     recordingStarted_ = false;
     recorder_ = rec;
     if (auto r = recorder_.lock()) {
diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp
index 91f3ab05e870f6cc7d21c8d58370f3edcf55540f..eab867310cbf89fcb68724aacae21e799f1e5389 100644
--- a/src/media/media_encoder.cpp
+++ b/src/media/media_encoder.cpp
@@ -453,7 +453,7 @@ MediaEncoder::encode(AVFrame* frame, int streamIdx)
     if (auto rec = recorder_.lock()) {
         bool isVideo = encoderCtx->codec_type == AVMEDIA_TYPE_VIDEO;
         if (!recordingStarted_) {
-            auto ms = MediaStream("", outputCtx_->streams[streamIdx]);
+            auto ms = MediaStream("", outputCtx_->streams[streamIdx], frame->pts);
             if (rec->addStream(isVideo, false, ms) >= 0)
                 recordingStarted_ = true;
             else
@@ -710,8 +710,6 @@ void
 MediaEncoder::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
     // recording will start once we can send an AVPacket to the recorder
-    if (encoders_[0]->codec_type != AVMEDIA_TYPE_AUDIO)
-        return;
     recordingStarted_ = false;
     recorder_ = rec;
     if (auto r = recorder_.lock()) {
diff --git a/src/media/media_recorder.cpp b/src/media/media_recorder.cpp
index 3d40cd4243f17fdd12a5fdabbbdcad9b495732aa..7d7cc16df9d0636df0ecbf9757d585c6654d7abd 100644
--- a/src/media/media_recorder.cpp
+++ b/src/media/media_recorder.cpp
@@ -30,44 +30,13 @@ extern "C" {
 }
 
 #include <algorithm>
+#include <iomanip>
 #include <sstream>
 #include <sys/types.h>
-#include <time.h>
+#include <ctime>
 
 namespace ring {
 
-static std::string
-createTimestamp()
-{
-    time_t rawtime = time(nullptr);
-    struct tm * timeinfo = localtime(&rawtime);
-    std::stringstream out;
-
-    // DATE
-    out << timeinfo->tm_year + 1900;
-    if (timeinfo->tm_mon < 9) // prefix jan-sep with 0
-        out << 0;
-    out << timeinfo->tm_mon + 1; // tm_mon is 0 based
-    if (timeinfo->tm_mday < 10) // make sure there's 2 digits
-        out << 0;
-    out << timeinfo->tm_mday;
-
-    out << '-';
-
-    // TIME
-    if (timeinfo->tm_hour < 10) // make sure there's 2 digits
-        out << 0;
-    out << timeinfo->tm_hour;
-    if (timeinfo->tm_min < 10) // make sure there's 2 digits
-        out << 0;
-    out << timeinfo->tm_min;
-    if (timeinfo->tm_sec < 10) // make sure there's 2 digits
-        out << 0;
-    out << timeinfo->tm_sec;
-
-    return out.str();
-}
-
 MediaRecorder::MediaRecorder()
 {}
 
@@ -80,7 +49,23 @@ MediaRecorder::~MediaRecorder()
 std::string
 MediaRecorder::getFilename() const
 {
-    return dir_ + filename_ + ".ogg";
+    if (audioOnly_)
+        return dir_ + filename_ + ".ogg";
+    else
+        return dir_ + filename_ + ".mkv";
+}
+
+void
+MediaRecorder::audioOnly(bool audioOnly)
+{
+    audioOnly_ = audioOnly;
+}
+
+void
+MediaRecorder::setMetadata(const std::string& title, const std::string& desc)
+{
+    title_ = title;
+    description_ = desc;
 }
 
 void
@@ -121,7 +106,12 @@ MediaRecorder::toggleRecording()
 int
 MediaRecorder::startRecording()
 {
-    filename_ = createTimestamp();
+    std::time_t t = std::time(nullptr);
+    startTime_ = *std::localtime(&t);
+    std::stringstream ss;
+    ss << std::put_time(&startTime_, "%Y%m%d-%H%M%S");
+    filename_ = ss.str();
+
     encoder_.reset(new MediaEncoder);
 
     RING_DBG() << "Start recording '" << getFilename() << "'";
@@ -142,66 +132,138 @@ MediaRecorder::stopRecording()
 int
 MediaRecorder::addStream(bool isVideo, bool fromPeer, MediaStream ms)
 {
-    // video not yet implemented
-    if (isVideo)
-        return 0;
+    if (audioOnly_ && isVideo) {
+        RING_ERR() << "Trying to add video stream to audio only recording";
+        return -1;
+    }
 
     // overwrite stream name for simplicity's sake
     std::string streamName;
-    ms.name = (fromPeer ? "a:peer" : "a:local");
-    ++nbReceivedAudioStreams_;
-    streamParams_[isVideo][fromPeer] = ms;
+    if (isVideo) {
+        ms.name = (fromPeer ? "v:main" : "v:overlay");
+        ++nbReceivedVideoStreams_;
+    } else {
+        ms.name = (fromPeer ? "a:1" : "a:2");
+        ++nbReceivedAudioStreams_;
+    }
+    streams_[isVideo][fromPeer] = ms;
 
     // wait until all streams are ready before writing to the file
-    if (nbExpectedStreams_ != nbReceivedAudioStreams_)
+    if (nbExpectedStreams_ != nbReceivedAudioStreams_ + nbReceivedVideoStreams_)
         return 0;
     else
         return initRecord();
 }
 
+int
+MediaRecorder::recordData(AVFrame* frame, bool isVideo, bool fromPeer)
+{
+    std::lock_guard<std::mutex> lk(mutex_);
+    if (!isRecording_ || !isReady_)
+        return 0;
+
+    int streamIdx = (isVideo ? videoIdx_ : audioIdx_);
+    auto filter = (isVideo ? videoFilter_.get() : audioFilter_.get());
+    if (streamIdx < 0 || !filter) {
+        RING_ERR() << "Specified stream is invalid: "
+            << (fromPeer ? "remote " : "local ") << (isVideo ? "video" : "audio");
+        return -1;
+    }
+
+    // get filter input name if frame needs filtering
+    std::string inputName;
+    if (isVideo && nbReceivedVideoStreams_ == 2)
+        inputName = (fromPeer ? "v:main" : "v:overlay");
+    if (!isVideo && nbReceivedAudioStreams_ == 2)
+        inputName = (fromPeer ? "a:1" : "a:2");
+
+    // new reference because we are changing the timestamp
+    AVFrame* input = av_frame_clone(frame);
+    const MediaStream& ms = streams_[isVideo][fromPeer];
+    // stream has to start at 0
+    input->pts = input->pts - ms.firstTimestamp;
+    // convert streams to rtp timestamps if not already done by FFmpeg
+    if (isVideo && !fromPeer) {
+        input->pts = input->pts / (ms.frameRate * ms.timeBase).real();
+    } else if (!isVideo) {
+        input->pts = input->pts / (ms.timeBase * ms.sampleRate).real();
+    }
+
+    if (inputName.empty()) // #nofilters
+        return sendToEncoder(input, streamIdx);
+
+    // empty filter graph output before sending more frames
+    emptyFilterGraph();
+
+    int err = filter->feedInput(input, inputName);
+    av_frame_unref(input);
+
+    return err;
+}
+
 int
 MediaRecorder::initRecord()
 {
     std::lock_guard<std::mutex> lk(mutex_);
 
-    // use peer parameters if possible, else fall back on local parameters
-    int sampleRate = streamParams_[false][true].sampleRate;
-    if (sampleRate == 0) sampleRate = streamParams_[false][false].sampleRate;
-    int nbChannels = streamParams_[false][true].nbChannels;
-    if (nbChannels == 0) nbChannels = streamParams_[false][false].nbChannels;
+    // need to get encoder parameters before calling openFileOutput
+    // openFileOutput needs to be called before adding any streams
 
-    std::map<std::string, std::string> options;
-    options["sample_rate"] = std::to_string(sampleRate);
-    options["channels"] = std::to_string(nbChannels);
+    std::map<std::string, std::string> encoderOptions;
 
-    encoder_->openFileOutput(getFilename(), options);
+    if (title_.empty()) {
+        std::stringstream ss;
+        ss << "Ring recording at " << std::put_time(&startTime_, "%Y-%m-%d %H:%M:%S");
+        title_ = ss.str();
+    }
+    encoderOptions["title"] = title_;
 
+    if (description_.empty()) {
+        std::stringstream ss;
+        ss << "Recorded at " << std::put_time(&startTime_, "%Y-%m-%d %H:%M:%S")
+            << " with Ring https://ring.cx";
+        description_ = ss.str();
+    }
+    encoderOptions["description"] = description_;
+
+    videoFilter_.reset();
+    if (nbReceivedVideoStreams_ > 0) {
+        auto videoStream = setupVideoOutput();
+        if (videoStream.format < 0) {
+            RING_ERR() << "Could not retrieve video recorder stream properties";
+            return -1;
+        }
+        encoderOptions["width"] = std::to_string(videoStream.width);
+        encoderOptions["height"] = std::to_string(videoStream.height);
+        std::stringstream fps;
+        fps << videoStream.frameRate;
+        encoderOptions["framerate"] = fps.str();
+    }
+
+    audioFilter_.reset();
     if (nbReceivedAudioStreams_ > 0) {
-        std::vector<MediaStream> params;
-        std::string aFilter;
-        switch (nbReceivedAudioStreams_) {
-        case 1:
-            if (streamParams_[false].count(true) > 0)
-                params.emplace_back(streamParams_[false][true]);
-            else
-                params.emplace_back(streamParams_[false][false]);
-            audioFilter_.reset(); // no filter needed
-            break;
-        case 2:
-            params.emplace_back(streamParams_[false][true]);
-            params.emplace_back(streamParams_[false][false]);
-            aFilter = "[a:local] [a:peer] amix, aresample=osr=48000:ocl=stereo:osf=s16";
-            audioFilter_.reset(new MediaFilter);
-            if (audioFilter_->initialize(aFilter, params) < 0) {
-                RING_ERR() << "Failed to initialize audio filter";
-                return -1;
-            }
-            break;
-        default:
-            RING_ERR() << "Recording more than 2 audio streams is not supported";
-            return AVERROR(ENOTSUP);
+        auto audioStream = setupAudioOutput();
+        if (audioStream.format < 0) {
+            RING_ERR() << "Could not retrieve audio recorder stream properties";
+            return -1;
         }
+        encoderOptions["sample_rate"] = std::to_string(audioStream.sampleRate);
+        encoderOptions["channels"] = std::to_string(audioStream.nbChannels);
+    }
+
+    encoder_->openFileOutput(getFilename(), encoderOptions);
+
+    if (nbReceivedVideoStreams_ > 0) {
+        auto videoCodec = std::static_pointer_cast<ring::SystemVideoCodecInfo>(
+            getSystemCodecContainer()->searchCodecByName("VP8", ring::MEDIA_VIDEO));
+        videoIdx_ = encoder_->addStream(*videoCodec.get());
+        if (videoIdx_ < 0) {
+            RING_ERR() << "Failed to add video stream to encoder";
+            return -1;
+        }
+    }
 
+    if (nbReceivedAudioStreams_ > 0) {
         auto audioCodec = std::static_pointer_cast<ring::SystemAudioCodecInfo>(
             getSystemCodecContainer()->searchCodecByName("opus", ring::MEDIA_AUDIO));
         audioIdx_ = encoder_->addStream(*audioCodec.get());
@@ -209,10 +271,11 @@ MediaRecorder::initRecord()
             RING_ERR() << "Failed to add audio stream to encoder";
             return -1;
         }
-    } else
-        audioFilter_.reset();
+    }
 
-    isReady_ = (nbReceivedAudioStreams_ > 0 && audioIdx_ >= 0); // has audio and valid stream index
+    // ready to start recording if audio stream index and video stream index are valid
+    isReady_ = (nbReceivedAudioStreams_ > 0 && audioIdx_ >= 0)
+        && (audioOnly_ || (nbReceivedVideoStreams_ > 0 && videoIdx_ >= 0));
     if (isReady_) {
         std::unique_ptr<MediaIOHandle> ioHandle;
         try {
@@ -230,50 +293,124 @@ MediaRecorder::initRecord()
     }
 }
 
-int
-MediaRecorder::recordData(AVFrame* frame, bool isVideo, bool fromPeer)
+MediaStream
+MediaRecorder::setupVideoOutput()
 {
-    // video not yet implemented
-    if (isVideo)
-        return 0;
+    MediaStream encoderStream;
+    const MediaStream& peer = streams_[true][true];
+    const MediaStream& local = streams_[true][false];
+
+    switch (nbReceivedVideoStreams_) {
+    case 1: // use a stream with a valid size
+        if (peer.width > 0 && peer.height > 0)
+            encoderStream = peer;
+        else if (local.width > 0 && local.height > 0)
+            encoderStream = local;
+        else
+            encoderStream.format = -1; // invalidate stream
+        break;
+    case 2: // overlay local video over peer video
+        videoFilter_.reset(new MediaFilter);
+        if (videoFilter_->initialize(buildVideoFilter(),
+                (std::vector<MediaStream>){peer, local}) < 0) {
+            RING_ERR() << "Failed to initialize video filter";
+            encoderStream.format = -1; // invalidate stream
+        } else {
+            encoderStream = videoFilter_->getOutputParams();
+        }
+        break;
+    default:
+        RING_ERR() << "Recording more than 2 video streams is not supported";
+        encoderStream.format = -1; // invalidate stream
+    }
 
-    std::lock_guard<std::mutex> lk(mutex_);
-    if (!isRecording_ || !isReady_)
-        return 0;
+    RING_DBG() << "Video recorder '"
+        << (encoderStream.name.empty() ? "(null)" : encoderStream.name)
+        << "' properties: "
+        << av_get_pix_fmt_name(static_cast<AVPixelFormat>(encoderStream.format)) << ", "
+        << encoderStream.width << "x" << encoderStream.height << ", "
+        << encoderStream.frameRate << " fps";
+    return encoderStream;
+}
 
-    int streamIdx = audioIdx_;
-    auto filter = audioFilter_.get();
-    if (streamIdx < 0 || !filter) {
-        RING_ERR() << "Specified stream is invalid: "
-            << (fromPeer ? "remote " : "local ") << (isVideo ? "video" : "audio");
-        return -1;
-    }
+std::string
+MediaRecorder::buildVideoFilter()
+{
+    std::stringstream v;
 
-    std::string inputName;
-    if (!isVideo && nbReceivedAudioStreams_ == 2)
-        inputName = (fromPeer ? "a:peer" : "a:local");
+    const auto p = streams_[true][true];
+    const auto l = streams_[true][false];
 
-    // new reference because we are changing the timestamp
-    AVFrame* input = av_frame_clone(frame);
-    input->pts = nextTimestamp_[isVideo][fromPeer];
-    nextTimestamp_[isVideo][fromPeer] += (isVideo ? 1 : input->nb_samples);
+    const constexpr int minHeight = 720;
+    const auto newFps = std::max(p.frameRate, l.frameRate);
+    const bool needScale = (p.height < minHeight);
+    const int newHeight = (needScale ? minHeight : p.height);
 
-    if (inputName.empty()) // #nofilters
-        return sendToEncoder(input, streamIdx);
+    // NOTE -2 means preserve aspect ratio and have the new number be even
+    if (needScale)
+        v << "[v:main] fps=" << newFps << ", scale=-2:" << newHeight << " [v:m]; ";
+    else
+        v << "[v:main] fps=" << newFps << " [v:m]; ";
 
-    // empty filter graph output before sending more frames
-    emptyFilterGraph();
+    v << "[v:overlay] fps=" << newFps << ", scale=-2:" << newHeight / 5 << " [v:o]; ";
 
-    int err = filter->feedInput(input, inputName);
-    av_frame_unref(input);
+    v << "[v:m] [v:o] overlay=main_w-overlay_w-10:main_h-overlay_h-10"
+        << ", format=pix_fmts=yuv420p";
 
-    return err;
+    return v.str();
+}
+
+MediaStream
+MediaRecorder::setupAudioOutput()
+{
+    MediaStream encoderStream;
+    const MediaStream& peer = streams_[false][true];
+    const MediaStream& local = streams_[false][false];
+    std::stringstream aFilter;
+
+    switch (nbReceivedAudioStreams_) {
+    case 1: // use a stream with a valid sample rate and channel count
+        if (peer.sampleRate > 0 && peer.nbChannels > 0)
+            encoderStream = peer;
+        else if (local.sampleRate > 0 && local.nbChannels > 0)
+            encoderStream = local;
+        else
+            encoderStream.format = -1; // invalidate stream
+        break;
+    case 2: // mix both audio streams
+        audioFilter_.reset(new MediaFilter);
+        // resample to common audio format, so any player can play the file
+        aFilter << "[a:1] [a:2] amix, aresample=osr=48000:ocl=stereo:osf=s16";
+        if (audioFilter_->initialize(aFilter.str(),
+                (std::vector<MediaStream>){peer, local}) < 0) {
+            RING_ERR() << "Failed to initialize audio filter";
+            encoderStream.format = -1; // invalidate stream
+        } else {
+            encoderStream = audioFilter_->getOutputParams();
+        }
+        break;
+    default:
+        RING_ERR() << "Recording more than 2 audio streams is not supported";
+        encoderStream.format = -1; // invalidate stream
+        break;
+    }
+
+    RING_DBG() << "Audio recorder '"
+        << (encoderStream.name.empty() ? "(null)" : encoderStream.name)
+        << "' properties: "
+        << av_get_sample_fmt_name(static_cast<AVSampleFormat>(encoderStream.format)) << ", "
+        << encoderStream.sampleRate << " Hz, "
+        << encoderStream.nbChannels << " channels";
+    return encoderStream;
 }
 
 void
 MediaRecorder::emptyFilterGraph()
 {
     AVFrame* output;
+    if (videoIdx_ >= 0)
+        while ((output = videoFilter_->readOutput()))
+            sendToEncoder(output, videoIdx_);
     if (audioIdx_ >= 0)
         while ((output = audioFilter_->readOutput()))
             sendToEncoder(output, audioIdx_);
@@ -300,6 +437,7 @@ MediaRecorder::flush()
     if (!isRecording_ || encoder_->getStreamCount() <= 0)
         return 0;
 
+    emptyFilterGraph();
     encoder_->flush();
 
     return 0;
diff --git a/src/media/media_recorder.h b/src/media/media_recorder.h
index 59538c6330293a35c06e2e7138289bcaa4dc57af..23462ca853dded22eac7dd99e763b31cf25ff156 100644
--- a/src/media/media_recorder.h
+++ b/src/media/media_recorder.h
@@ -44,6 +44,12 @@ class MediaRecorder {
 
         std::string getFilename() const;
 
+        void audioOnly(bool audioOnly);
+
+        // default title is: "Ring recording at %Y-%m-%d %H:%M:%S"
+        // default description is: "Recorded at %Y-%m-%d %H:%M:%S with Ring https://ring.cx"
+        void setMetadata(const std::string& title, const std::string& desc);
+
         void setRecordingPath(const std::string& dir);
 
         // adjust nb of streams before recording
@@ -66,27 +72,36 @@ class MediaRecorder {
         NON_COPYABLE(MediaRecorder);
 
         int initRecord();
+        MediaStream setupVideoOutput();
+        std::string buildVideoFilter();
+        MediaStream setupAudioOutput();
         void emptyFilterGraph();
         int sendToEncoder(AVFrame* frame, int streamIdx);
         int flush();
 
         std::unique_ptr<MediaEncoder> encoder_;
+        std::unique_ptr<MediaFilter> videoFilter_;
         std::unique_ptr<MediaFilter> audioFilter_;
 
         std::mutex mutex_; // protect against concurrent file writes
 
         // isVideo is first key, fromPeer is second
-        std::map<bool, std::map<bool, MediaStream>> streamParams_;
-        std::map<bool, std::map<bool, int64_t>> nextTimestamp_;
+        std::map<bool, std::map<bool, MediaStream>> streams_;
 
+        std::tm startTime_;
         std::string dir_;
         std::string filename_;
+        std::string title_;
+        std::string description_;
 
         unsigned nbExpectedStreams_ = 0;
+        unsigned nbReceivedVideoStreams_ = 0;
         unsigned nbReceivedAudioStreams_ = 0;
+        int videoIdx_ = -1;
         int audioIdx_ = -1;
         bool isRecording_ = false;
         bool isReady_ = false;
+        bool audioOnly_ = false;
 };
 
 }; // namespace ring
diff --git a/src/media/media_stream.h b/src/media/media_stream.h
index 556d236deafbf5eabac8bb82dbb9c50a5ae08635..d074093e1b302ef6bda59fb63ea250ac4a73c02e 100644
--- a/src/media/media_stream.h
+++ b/src/media/media_stream.h
@@ -33,6 +33,7 @@ struct MediaStream {
     int format {-1};
     bool isVideo {false};
     rational<int> timeBase;
+    int64_t firstTimestamp {0};
     int width {0};
     int height {0};
     rational<int> aspectRatio;
@@ -65,10 +66,16 @@ struct MediaStream {
     {}
 
     MediaStream(std::string name, AVStream* st)
+        : MediaStream(name, st, 0)
+    {
+    }
+
+    MediaStream(std::string name, AVStream* st, int64_t firstTimestamp)
         : name(name)
     {
         format = st->codecpar->format;
         timeBase = st->time_base;
+        this->firstTimestamp = firstTimestamp;
         switch (st->codecpar->codec_type) {
         case AVMEDIA_TYPE_VIDEO:
             isVideo = true;
diff --git a/src/media/recordable.cpp b/src/media/recordable.cpp
index 6f043ede6c37ee25304c3ff268028dc709914e45..df1fe59db0f250c2ecaa754fa72abb9769cf9576 100644
--- a/src/media/recordable.cpp
+++ b/src/media/recordable.cpp
@@ -53,6 +53,7 @@ Recordable::toggleRecording()
     if (!recording_ || !recorder_) {
         recorder_.reset();
         recorder_ = std::make_shared<MediaRecorder>();
+        recorder_->audioOnly(isAudioOnly_);
         recorder_->setRecordingPath(Manager::instance().audioPreference.getRecordPath());
     }
     recording_ = recorder_->toggleRecording();
diff --git a/src/media/video/accel.cpp b/src/media/video/accel.cpp
index ac88b59d1a56a445033be7a06a34b78063e13e6e..ae2d96422010d6112ecb044781df7c5b1d8c6dc7 100644
--- a/src/media/video/accel.cpp
+++ b/src/media/video/accel.cpp
@@ -69,9 +69,11 @@ transferFrameData(HardwareAccel accel, AVCodecContext* /*codecCtx*/, VideoFrame&
     auto container = std::unique_ptr<VideoFrame>(new VideoFrame());
     auto output = container->pointer();
 
+    auto pts = input->pts;
     // most hardware accelerations output NV12, so skip extra conversions
     output->format = AV_PIX_FMT_NV12;
     int ret = av_hwframe_transfer_data(output, input, 0);
+    output->pts = pts;
 
     // move output into input so the caller receives extracted image data
     // but we have to delete input's data first
diff --git a/src/media/video/video_receive_thread.cpp b/src/media/video/video_receive_thread.cpp
index 9eeebb6a53e093ddb10112dca415cccbc5770b1b..e118bd4e75aaf0846d45f5650a71bcd87a7a7bfd 100644
--- a/src/media/video/video_receive_thread.cpp
+++ b/src/media/video/video_receive_thread.cpp
@@ -241,7 +241,8 @@ VideoReceiveThread::triggerKeyFrameRequest()
 void
 VideoReceiveThread::startRecorder(std::shared_ptr<ring::MediaRecorder>& rec)
 {
-    videoDecoder_->startRecorder(rec);
+    if (videoDecoder_)
+        videoDecoder_->startRecorder(rec);
 }
 
 }} // namespace ring::video
diff --git a/src/media/video/video_rtp_session.cpp b/src/media/video/video_rtp_session.cpp
index 312ea266d461e22a0d6e11f68dd1411c606f85f0..55abc26d74ac19157707fe2339b8b051e28e86a6 100644
--- a/src/media/video/video_rtp_session.cpp
+++ b/src/media/video/video_rtp_session.cpp
@@ -565,11 +565,15 @@ VideoRtpSession::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
     // video recording needs to start with keyframes
     const constexpr int keyframes = 3;
-    receiveThread_->startRecorder(rec);
-    sender_->startRecorder(rec);
+    if (receiveThread_)
+        receiveThread_->startRecorder(rec);
+    if (sender_)
+        sender_->startRecorder(rec);
     for (int i = 0; i < keyframes; ++i) {
-        receiveThread_->triggerKeyFrameRequest();
-        sender_->forceKeyFrame();
+        if (receiveThread_)
+            receiveThread_->triggerKeyFrameRequest();
+        if (sender_)
+            sender_->forceKeyFrame();
     }
 }
 
diff --git a/src/media/video/video_sender.cpp b/src/media/video/video_sender.cpp
index 41f7e5681063fb0140c36e4bad47a721db8ddca5..ca58aae12926939ebbfd9658acf6116e67d0b3d9 100644
--- a/src/media/video/video_sender.cpp
+++ b/src/media/video/video_sender.cpp
@@ -109,7 +109,8 @@ VideoSender::useCodec(const ring::AccountVideoCodecInfo* codec) const
 void
 VideoSender::startRecorder(std::shared_ptr<MediaRecorder>& rec)
 {
-    videoEncoder_->startRecorder(rec);
+    if (videoEncoder_)
+        videoEncoder_->startRecorder(rec);
 }
 
 }} // namespace ring::video
diff --git a/src/sip/sipcall.cpp b/src/sip/sipcall.cpp
index a86e62a68e6bd1863c1227e4aae7101542bc75cb..20bb887f79c81cdeb25f97e1af32f0eaf3b75a84 100644
--- a/src/sip/sipcall.cpp
+++ b/src/sip/sipcall.cpp
@@ -1154,9 +1154,14 @@ SIPCall::toggleRecording()
 {
     const bool startRecording = Call::toggleRecording();
     if (startRecording) {
-        avformatrtp_->startRecorder(recorder_);
+        std::stringstream ss;
+        ss << "Ring call between " << getSIPAccount().getUserUri() << " and "
+            << (!peerRegistredName_.empty() ? peerRegistredName_ : getPeerNumber());
+        recorder_->setMetadata(ss.str(), ""); // use default description
+        if (avformatrtp_)
+            avformatrtp_->startRecorder(recorder_);
 #ifdef RING_VIDEO
-        if (!isAudioOnly_)
+        if (!isAudioOnly_ && videortp_)
             videortp_->startRecorder(recorder_);
 #endif
     }