From e6ed1e1faba10f303bb16e6ac33a41f6bef06411 Mon Sep 17 00:00:00 2001
From: philippegorley <philippe.gorley@savoirfairelinux.com>
Date: Fri, 13 Jul 2018 15:37:13 -0400
Subject: [PATCH] audiobuffer: add conversion to AVFrame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moves the AudioBuffer to AVFrame conversion logic outside of the
MediaEncoder, so the latter only deals with AVFrame.

Does not copy the buffer, the resulting AVFrame will simply point to the
existing buffer.

This is the first in a series of patches attempting to rid Ring of its
RingBuffer/RingBufferPool system.

Change-Id: I17c9fcdb96dc945c3eeace647303339d558bcf38
Reviewed-by: Adrien Béraud <adrien.beraud@savoirfairelinux.com>
---
 src/media/audio/audio_rtp_session.cpp | 13 ++--
 src/media/audio/audiobuffer.cpp       | 43 ++++++++++++
 src/media/audio/audiobuffer.h         |  4 ++
 src/media/media_encoder.cpp           | 99 +++++----------------------
 src/media/media_encoder.h             |  2 +-
 5 files changed, 76 insertions(+), 85 deletions(-)

diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp
index c51bdf4b93..8082949c34 100644
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@@ -175,6 +175,7 @@ AudioSender::process()
 
     Smartools::getInstance().setLocalAudioCodec(audioEncoder_->getEncoderName());
 
+    AudioBuffer buffer;
     if (mainBuffFormat.sample_rate != accountAudioCodec->audioformat.sample_rate) {
         if (not resampler_) {
             RING_DBG("Creating audio resampler");
@@ -183,12 +184,16 @@ AudioSender::process()
         resampledData_.setFormat(accountAudioCodec->audioformat);
         resampledData_.resize(samplesToGet);
         resampler_->resample(micData_, resampledData_);
-        if (audioEncoder_->encode_audio(resampledData_) < 0)
-            RING_ERR("encoding failed");
+        buffer = resampledData_;
     } else {
-        if (audioEncoder_->encode_audio(micData_) < 0)
-            RING_ERR("encoding failed");
+        buffer = micData_;
     }
+
+    if (muteState_) // audio is muted, set samples to 0
+        buffer.reset();
+
+    if (audioEncoder_->encodeAudio(buffer.toAVFrame()) < 0)
+        RING_ERR("encoding failed");
 }
 void
 AudioSender::setMuted(bool isMuted)
diff --git a/src/media/audio/audiobuffer.cpp b/src/media/audio/audiobuffer.cpp
index 7eaa832b79..9a8ce6d3ff 100644
--- a/src/media/audio/audiobuffer.cpp
+++ b/src/media/audio/audiobuffer.cpp
@@ -18,6 +18,7 @@
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
  */
 
+#include "libav_deps.h"
 #include "audiobuffer.h"
 #include "logger.h"
 #include <string.h>
@@ -289,4 +290,46 @@ size_t AudioBuffer::copy(AudioSample* in, size_t sample_num, size_t pos_out /* =
     return sample_num;
 }
 
+AVFrame*
+AudioBuffer::toAVFrame() const
+{
+    const constexpr AVSampleFormat fmt = AV_SAMPLE_FMT_S16;
+
+    // NOTE: don't use std::vector, avcodec_fill_audio_frame makes the frame point to our array
+    // instead of copying it, resulting in a heap use after free once we return
+    AudioSample** samples;
+    int err = av_samples_alloc_array_and_samples(reinterpret_cast<uint8_t***>(&samples),
+        nullptr, channels(), frames(), fmt, 0);
+    if (err < 0) {
+        RING_ERR() << "Failed to allocate audio buffer";
+        return nullptr;
+    }
+
+    interleave(*samples);
+
+    AVFrame* frame = av_frame_alloc();
+    if (!frame) {
+        RING_ERR() << "Failed to allocate frame";
+        av_freep(samples);
+        return nullptr;
+    }
+
+    frame->nb_samples = frames();
+    frame->format = fmt;
+    frame->channel_layout = av_get_default_channel_layout(channels());
+    frame->channels = channels();
+    frame->sample_rate = getSampleRate();
+
+    const auto bufSize = av_samples_get_buffer_size(nullptr,
+        frame->channels, frame->nb_samples, fmt, 0);
+    if (avcodec_fill_audio_frame(frame, frame->channels, fmt,
+        reinterpret_cast<uint8_t*>(*samples), bufSize, 0) < 0) {
+        RING_ERR() << "Failed to fill audio frame";
+        av_frame_free(&frame);
+        return nullptr;
+    }
+
+    return frame;
+}
+
 } // namespace ring
diff --git a/src/media/audio/audiobuffer.h b/src/media/audio/audiobuffer.h
index bbfbbe7646..9606b34925 100644
--- a/src/media/audio/audiobuffer.h
+++ b/src/media/audio/audiobuffer.h
@@ -34,6 +34,8 @@
 
 #include <ciso646> // fix windows compiler bug
 
+struct AVFrame;
+
 namespace ring {
 
 /**
@@ -343,6 +345,8 @@ class AudioBuffer {
          */
         size_t copy(AudioSample* in, size_t sample_num, size_t pos_out = 0);
 
+        AVFrame* toAVFrame() const;
+
     private:
         int sampleRate_;
 
diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp
index d26ea2e47d..f5f3d0aa76 100644
--- a/src/media/media_encoder.cpp
+++ b/src/media/media_encoder.cpp
@@ -378,91 +378,30 @@ MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
 }
 #endif // RING_VIDEO
 
-int MediaEncoder::encode_audio(const AudioBuffer &buffer)
+int MediaEncoder::encodeAudio(AVFrame* frame)
 {
-    const int needed_bytes = av_samples_get_buffer_size(nullptr, buffer.channels(),
-                                                        buffer.frames(),
-                                                        AV_SAMPLE_FMT_S16, 0);
-    if (needed_bytes < 0) {
-        RING_ERR("Couldn't calculate buffer size");
-        return -1;
-    }
-
-    std::vector<AudioSample> samples (needed_bytes / sizeof(AudioSample));
-    AudioSample* sample_data = samples.data();
-
-    AudioSample *offset_ptr = sample_data;
-    int nb_frames = buffer.frames();
-
-    AVCodecContext* encoderCtx = encoders_[currentStreamIdx_];
-
-    if (not is_muted) {
-        //only fill buffer with samples if not muted
-        buffer.interleave(sample_data);
-    } else {
-        //otherwise filll buffer with zero
-        buffer.fillWithZero(sample_data);
-    }
-    const auto layout = buffer.channels() == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
-    const auto sample_rate = buffer.getSampleRate();
-
-    while (nb_frames > 0) {
-        AVFrame* frame = av_frame_alloc();
-        if (!frame)
-            return -1;
-
-        if (encoderCtx->frame_size)
-            frame->nb_samples = std::min<int>(nb_frames,
-                                              encoderCtx->frame_size);
-        else
-            frame->nb_samples = nb_frames;
-
-        frame->format = AV_SAMPLE_FMT_S16;
-        frame->channel_layout = layout;
-        frame->channels = buffer.channels();
-        frame->sample_rate = sample_rate;
-
-        frame->pts = getNextTimestamp(sent_samples, encoderCtx->sample_rate, encoderCtx->time_base);
-        sent_samples += frame->nb_samples;
-
-        const auto buffer_size = \
-            av_samples_get_buffer_size(nullptr, buffer.channels(),
-                                       frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
-
-        int err = avcodec_fill_audio_frame(frame, buffer.channels(),
-                                           AV_SAMPLE_FMT_S16,
-                                           reinterpret_cast<const uint8_t *>(offset_ptr),
-                                           buffer_size, 0);
-        if (err < 0) {
-            RING_ERR() << "Failed to fill audio frame of size" << buffer_size << " with "
-                << frame->nb_samples << " samples: " << libav_utils::getError(err);
-            av_frame_free(&frame);
-            return -1;
-        }
-
-        nb_frames -= frame->nb_samples;
-        offset_ptr += frame->nb_samples * buffer.channels();
-
-        if (auto rec = recorder_.lock()) {
-            if (!recordingStarted_) {
-                auto ms = MediaStream("", encoders_[currentStreamIdx_], frame->pts);
-                if (rec->addStream(false, false, ms) >= 0) {
-                    recordingStarted_ = true;
-                } else {
-                    recorder_ = std::weak_ptr<MediaRecorder>();
-                }
+    auto enc = encoders_[currentStreamIdx_];
+    frame->pts = getNextTimestamp(sent_samples, enc->sample_rate, enc->time_base);
+    sent_samples += frame->nb_samples;
+    if (auto rec = recorder_.lock()) {
+        if (!recordingStarted_) {
+            auto ms = MediaStream("", encoders_[currentStreamIdx_], frame->pts);
+            if (rec->addStream(false, false, ms) >= 0) {
+                recordingStarted_ = true;
+            } else {
+                recorder_ = std::weak_ptr<MediaRecorder>();
             }
-            if (recordingStarted_)
-                rec->recordData(frame, false, false);
-        } else {
-            recordingStarted_ = false;
-            recorder_ = std::weak_ptr<MediaRecorder>();
         }
-
-        encode(frame, currentStreamIdx_);
-        av_frame_free(&frame);
+        if (recordingStarted_)
+            rec->recordData(frame, false, false);
+    } else {
+        recordingStarted_ = false;
+        recorder_ = std::weak_ptr<MediaRecorder>();
     }
 
+    encode(frame, currentStreamIdx_);
+    av_frame_free(&frame);
+
     return 0;
 }
 
diff --git a/src/media/media_encoder.h b/src/media/media_encoder.h
index 1ccc784070..9ec7437b8d 100644
--- a/src/media/media_encoder.h
+++ b/src/media/media_encoder.h
@@ -75,7 +75,7 @@ public:
     int encode(VideoFrame &input, bool is_keyframe, int64_t frame_number);
 #endif // RING_VIDEO
 
-    int encode_audio(const AudioBuffer &input);
+    int encodeAudio(AVFrame* frame);
 
     // frame should be ready to be sent to the encoder at this point
     int encode(AVFrame* frame, int streamIdx);
-- 
GitLab