diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp
index b4ea60b312e0812e86acbfc39d36781ed9218f85..9736f2c6fabd4c928922c897bead5d9768ddfdc2 100644
--- a/src/media/media_encoder.cpp
+++ b/src/media/media_encoder.cpp
@@ -341,6 +341,15 @@ MediaEncoder::startIO()
 #endif
 }
 
+// seq: frame number for video, sent samples audio
+// sampleFreq: fps for video, sample rate for audio
+// clock: stream time base (packetization interval times)
+int64_t
+getNextTimestamp(int64_t seq, rational<int64_t> sampleFreq, rational<int64_t> clock)
+{
+    return (seq / (sampleFreq * clock)).real<int64_t>();
+}
+
 #ifdef RING_VIDEO
 int
 MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
@@ -354,7 +363,8 @@ MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
     scaler_.scale_with_aspect(input, scaledFrame_);
 
     auto frame = scaledFrame_.pointer();
-    frame->pts = frame_number;
+    AVStream* st = outputCtx_->streams[currentStreamIdx_];
+    frame->pts = getNextTimestamp(frame_number, st->avg_frame_rate, st->time_base);
 
     if (is_keyframe) {
         frame->pict_type = AV_PICTURE_TYPE_I;
@@ -412,7 +422,8 @@ int MediaEncoder::encode_audio(const AudioBuffer &buffer)
         frame->channels = buffer.channels();
         frame->sample_rate = sample_rate;
 
-        frame->pts = sent_samples;
+        AVStream* st = outputCtx_->streams[currentStreamIdx_];
+        frame->pts = getNextTimestamp(sent_samples, st->codecpar->sample_rate, st->time_base);
         sent_samples += frame->nb_samples;
 
         const auto buffer_size = \
diff --git a/src/media/media_recorder.cpp b/src/media/media_recorder.cpp
index 7d7cc16df9d0636df0ecbf9757d585c6654d7abd..fbb481a337a01c9b631c4b74918e2406ee54c136 100644
--- a/src/media/media_recorder.cpp
+++ b/src/media/media_recorder.cpp
@@ -182,12 +182,6 @@ MediaRecorder::recordData(AVFrame* frame, bool isVideo, bool fromPeer)
     const MediaStream& ms = streams_[isVideo][fromPeer];
     // stream has to start at 0
     input->pts = input->pts - ms.firstTimestamp;
-    // convert streams to rtp timestamps if not already done by FFmpeg
-    if (isVideo && !fromPeer) {
-        input->pts = input->pts / (ms.frameRate * ms.timeBase).real();
-    } else if (!isVideo) {
-        input->pts = input->pts / (ms.timeBase * ms.sampleRate).real();
-    }
 
     if (inputName.empty()) // #nofilters
         return sendToEncoder(input, streamIdx);
@@ -228,7 +222,7 @@ MediaRecorder::initRecord()
 
     videoFilter_.reset();
     if (nbReceivedVideoStreams_ > 0) {
-        auto videoStream = setupVideoOutput();
+        const MediaStream& videoStream = setupVideoOutput();
         if (videoStream.format < 0) {
             RING_ERR() << "Could not retrieve video recorder stream properties";
             return -1;
@@ -242,7 +236,7 @@ MediaRecorder::initRecord()
 
     audioFilter_.reset();
     if (nbReceivedAudioStreams_ > 0) {
-        auto audioStream = setupAudioOutput();
+        const MediaStream& audioStream = setupAudioOutput();
         if (audioStream.format < 0) {
             RING_ERR() << "Could not retrieve audio recorder stream properties";
             return -1;
@@ -338,8 +332,8 @@ MediaRecorder::buildVideoFilter()
 {
     std::stringstream v;
 
-    const auto p = streams_[true][true];
-    const auto l = streams_[true][false];
+    const MediaStream& p = streams_[true][true];
+    const MediaStream& l = streams_[true][false];
 
     const constexpr int minHeight = 720;
     const auto newFps = std::max(p.frameRate, l.frameRate);