From 20b631fb78d2883aaeebac3c16b0f9db3d127a01 Mon Sep 17 00:00:00 2001
From: philippegorley <philippe.gorley@savoirfairelinux.com>
Date: Wed, 1 Aug 2018 16:52:39 -0400
Subject: [PATCH] audio: refactor resampler class

Using MediaFilter would make the resampling time longer and more
unpredictable than directly using libswresample.

Adds libswresample as a dependency.
Simplifies Resampler class.
Resampler detects changes in input and output formats automatically.
Changes Audiofile to use Resampler instead of MediaFilter.

Change-Id: I24919e8fa514dbb4a38408e338016976e7424136
---
 configure.ac                                 |  2 +
 src/media/Makefile.am                        |  6 +-
 src/media/audio/audio_rtp_session.cpp        |  2 +-
 src/media/audio/audiolayer.cpp               |  6 +-
 src/media/audio/resampler.cpp                | 97 ++++++++++----------
 src/media/audio/resampler.h                  | 45 +++++----
 src/media/audio/sound/audiofile.cpp          | 30 ++----
 src/media/media_decoder.cpp                  |  2 +-
 test/unitTest/media/audio/test_resampler.cpp |  8 +-
 9 files changed, 90 insertions(+), 108 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9b12d4b8b7..ad3c4687f8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -446,6 +446,8 @@ PKG_CHECK_MODULES(LIBAVFILTER, libavfilter >= 5.40.101,, AC_MSG_ERROR([Missing l
 
 PKG_CHECK_MODULES(LIBSWSCALE, libswscale >= 3.1.101,, AC_MSG_ERROR([Missing libswscale development files]))
 
+PKG_CHECK_MODULES(LIBSWRESAMPLE, libswresample >= 1.2.101,, AC_MSG_ERROR([Missing libswresample development files]))
+
 dnl Video is default-enabled
 AC_ARG_ENABLE([video], AS_HELP_STRING([--disable-video], [Disable video]))
 
diff --git a/src/media/Makefile.am b/src/media/Makefile.am
index 5ea98de8d5..e0648bb187 100644
--- a/src/media/Makefile.am
+++ b/src/media/Makefile.am
@@ -49,12 +49,12 @@ libmedia_la_libADD = \
 	./video/libvideo.la
 endif
 
-libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@
+libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWRESAMPLE_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@
 
 if HAVE_WIN32
 libmedia_la_LDFLAGS += -lws2_32 -lwsock32 -lshlwapi
 endif
 
-AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@
+AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@
 
-AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@
+AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@
diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp
index e8f95109b2..6ed280a817 100644
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@@ -179,7 +179,7 @@ AudioSender::process()
     if (mainBuffFormat.sample_rate != accountAudioCodec->audioformat.sample_rate) {
         if (not resampler_) {
             RING_DBG("Creating audio resampler");
-            resampler_.reset(new Resampler(accountAudioCodec->audioformat));
+            resampler_.reset(new Resampler);
         }
         resampledData_.setFormat(accountAudioCodec->audioformat);
         resampledData_.resize(samplesToGet);
diff --git a/src/media/audio/audiolayer.cpp b/src/media/audio/audiolayer.cpp
index 63205d5658..63759fbf58 100644
--- a/src/media/audio/audiolayer.cpp
+++ b/src/media/audio/audiolayer.cpp
@@ -41,8 +41,8 @@ AudioLayer::AudioLayer(const AudioPreference &pref)
     , audioFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
     , audioInputFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
     , urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_)
-    , resampler_(new Resampler{audioFormat_.sample_rate})
-    , inputResampler_(new Resampler{audioInputFormat_.sample_rate})
+    , resampler_(new Resampler)
+    , inputResampler_(new Resampler)
     , lastNotificationTime_()
 {
     urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID);
@@ -57,13 +57,11 @@ void AudioLayer::hardwareFormatAvailable(AudioFormat playback)
     RING_DBG("Hardware audio format available : %s", playback.toString().c_str());
     audioFormat_ = Manager::instance().hardwareAudioFormatChanged(playback);
     urgentRingBuffer_.setFormat(audioFormat_);
-    resampler_->setFormat(audioFormat_);
 }
 
 void AudioLayer::hardwareInputFormatAvailable(AudioFormat capture)
 {
     RING_DBG("Hardware input audio format available : %s", capture.toString().c_str());
-    inputResampler_->setFormat(capture);
 }
 
 void AudioLayer::devicesChanged()
diff --git a/src/media/audio/resampler.cpp b/src/media/audio/resampler.cpp
index c02285f6c3..abdf639cbe 100644
--- a/src/media/audio/resampler.cpp
+++ b/src/media/audio/resampler.cpp
@@ -19,82 +19,81 @@
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
  */
 
+#include "libav_deps.h"
 #include "logger.h"
+#include "media_buffer.h"
 #include "media_filter.h"
 #include "media_stream.h"
 #include "resampler.h"
 #include "ring_types.h"
 
+extern "C" {
+#include <libswresample/swresample.h>
+}
+
 namespace ring {
 
-Resampler::Resampler(AudioFormat format)
-    : format_(format)
-{
-    setFormat(format);
-}
+Resampler::Resampler()
+    : swrCtx_(swr_alloc())
+{}
 
-Resampler::Resampler(unsigned sample_rate, unsigned channels)
-    : format_(sample_rate, channels)
+Resampler::~Resampler()
 {
-    setFormat(format_);
+    swr_free(&swrCtx_);
 }
 
-Resampler::~Resampler() = default;
-
 void
-Resampler::reinitFilter(const MediaStream& inputParams)
+Resampler::reinit(const AudioFormat& in, const int inSampleFmt,
+                  const AudioFormat& out, const int outSampleFmt)
 {
-    filter_.reset(new MediaFilter());
-    std::stringstream aformat;
-    aformat << "aformat=sample_fmts=s16:channel_layouts="
-        << av_get_default_channel_layout(format_.nb_channels)
-        << ":sample_rates=" << format_.sample_rate;
-    if (filter_->initialize(aformat.str(), inputParams) < 0) {
-        RING_ERR() << "Failed to initialize resampler";
-        filter_.reset();
-    }
+    av_opt_set_int(swrCtx_, "ich", 0, 0);
+    av_opt_set_int(swrCtx_, "icl", av_get_default_channel_layout(in.nb_channels), 0);
+    av_opt_set_int(swrCtx_, "isr", in.sample_rate, 0);
+    av_opt_set_sample_fmt(swrCtx_, "isf", static_cast<AVSampleFormat>(inSampleFmt), 0);
+
+    av_opt_set_int(swrCtx_, "och", 0, 0);
+    av_opt_set_int(swrCtx_, "ocl", av_get_default_channel_layout(out.nb_channels), 0);
+    av_opt_set_int(swrCtx_, "osr", out.sample_rate, 0);
+    av_opt_set_sample_fmt(swrCtx_, "osf", static_cast<AVSampleFormat>(outSampleFmt), 0);
+
+    swr_init(swrCtx_);
 }
 
-void
-Resampler::setFormat(AudioFormat format)
+int
+Resampler::resample(const AVFrame* input, AVFrame* output)
 {
-    format_ = format;
-    if (filter_)
-        reinitFilter(filter_->getInputParams());
+    int ret = swr_convert_frame(swrCtx_, output, input);
+    if (ret & AVERROR_INPUT_CHANGED || ret & AVERROR_OUTPUT_CHANGED) {
+        reinit(AudioFormat{(unsigned)input->sample_rate, (unsigned)input->channels}, input->format,
+               AudioFormat{(unsigned)output->sample_rate, (unsigned)output->channels}, output->format);
+        return resample(input, output);
+    } else if (ret < 0) {
+        RING_ERR() << "Failed to resample frame";
+        return -1;
+    }
+
+    return 0;
 }
 
 void
 Resampler::resample(const AudioBuffer& dataIn, AudioBuffer& dataOut)
 {
     auto input = dataIn.toAVFrame();
-    MediaStream currentParams("resampler", static_cast<AVSampleFormat>(input->format),
-        0, input->sample_rate, input->channels);
-    if (filter_) {
-        const auto& ms = filter_->getInputParams();
-        if (ms.sampleRate != input->sample_rate || ms.nbChannels != input->channels) {
-            RING_WARN() << "Resampler settings changed, reinitializing";
-            reinitFilter(currentParams);
-        }
-    } else {
-        reinitFilter(currentParams);
-    }
+    AudioFrame resampled;
+    auto output = resampled.pointer();
+    output->sample_rate = dataOut.getSampleRate();
+    output->channel_layout = av_get_default_channel_layout(dataOut.channels());
+    output->format = AV_SAMPLE_FMT_S16;
 
-    auto frame = filter_->apply(input);
-    av_frame_free(&input);
-    if (!frame) {
-        RING_ERR() << "Resampling failed, this may produce a glitch in the audio";
+    if (resample(input, output) < 0) {
+        av_frame_free(&input);
         return;
     }
 
-    dataOut.setFormat(format_);
-    dataOut.resize(frame->nb_samples);
-    if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_FLTP)
-        dataOut.convertFloatPlanarToSigned16(frame->extended_data,
-            frame->nb_samples, frame->channels);
-    else if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_S16)
-        dataOut.deinterleave(reinterpret_cast<const AudioSample*>(frame->extended_data[0]),
-            frame->nb_samples, frame->channels);
-    av_frame_free(&frame);
+    dataOut.resize(output->nb_samples);
+    dataOut.deinterleave(reinterpret_cast<const AudioSample*>(output->extended_data[0]),
+        output->nb_samples, output->channels);
+    av_frame_free(&input);
 }
 
 } // namespace ring
diff --git a/src/media/audio/resampler.h b/src/media/audio/resampler.h
index 57e9d01ac9..57dc99bc65 100644
--- a/src/media/audio/resampler.h
+++ b/src/media/audio/resampler.h
@@ -21,51 +21,48 @@
 
 #pragma once
 
-#include <memory>
-
 #include "audiobuffer.h"
 #include "noncopyable.h"
 #include "ring_types.h"
 
-namespace ring {
+struct AVFrame;
+struct SwrContext;
 
-class MediaFilter;
-struct MediaStream;
+namespace ring {
 
+/**
+ * Wrapper class for libswresample
+ */
 class Resampler {
     public:
-        /**
-         * Resampler is used for several situations:
-        * streaming conversion (RTP, IAX), audiolayer conversion,
-        * audio files conversion. Parameters are used to compute
-        * internal buffer size. Resampler must be reinitialized
-        * every time these parameters change
-        */
-        Resampler(AudioFormat outFormat);
-        Resampler(unsigned sample_rate, unsigned channels=1);
-        // empty dtor, needed for unique_ptr
+        Resampler();
         ~Resampler();
 
         /**
-         * Change the converter sample rate and channel number.
-         * Internal state is lost.
+         * Resample from @input format to @output format.
+         * NOTE: sample_rate, channel_layout, and format should be set on @output
          */
-        void setFormat(AudioFormat format);
+        int resample(const AVFrame* input, AVFrame* output);
 
         /**
-         * resample from the samplerate1 to the samplerate2
-         * @param dataIn Input buffer
-         * @param dataOut Output buffer
+         * Resample from @dataIn format to @dataOut format.
+         *
+         * NOTE: This is a wrapper for resample(AVFrame*, AVFrame*)
          */
         void resample(const AudioBuffer& dataIn, AudioBuffer& dataOut);
 
     private:
         NON_COPYABLE(Resampler);
 
-        void reinitFilter(const MediaStream& inputParams);
+        /**
+         * Reinitializes the resampler when new settings are detected. As long as both input and
+         * output buffers always have the same formats, will never be called, as the first
+         * initialization is done in swr_convert_frame.
+         */
+        void reinit(const AudioFormat& in, const int inSampleFmt,
+                    const AudioFormat& out, const int outSampleFmt);
 
-        AudioFormat format_; // number of channels and max output frequency
-        std::unique_ptr<MediaFilter> filter_;
+        SwrContext* swrCtx_; // incomplete type, cannot be a unique_ptr
 };
 
 } // namespace ring
diff --git a/src/media/audio/sound/audiofile.cpp b/src/media/audio/sound/audiofile.cpp
index ae172cf341..195b83b219 100644
--- a/src/media/audio/sound/audiofile.cpp
+++ b/src/media/audio/sound/audiofile.cpp
@@ -69,32 +69,22 @@ AudioFile::AudioFile(const std::string &fileName, unsigned int sampleRate) :
     if (decoder->setupFromAudioData() < 0)
         throw AudioFileException("Decoder setup failed: " + fileName);
 
-    const auto& ms = decoder->getStream();
-
-    auto filter = std::make_unique<MediaFilter>();
-    // aformat=sample_fmts=s16:channel_layouts=stereo
-    if (filter->initialize("aformat=sample_fmts=s16:channel_layouts=stereo|mono:sample_rates="
-        + std::to_string(getFormat().sample_rate), ms) < 0)
-        throw AudioFileException("Failed to create resampler");
-
+    auto resampler = std::make_unique<Resampler>();
     auto buf = std::make_unique<AudioBuffer>(0, getFormat());
     bool done = false;
     while (!done) {
-        AudioFrame frame;
-        AVFrame* resampled;
-        switch (decoder->decode(frame)) {
+        AudioFrame input;
+        AudioFrame output;
+        auto resampled = output.pointer();
+        switch (decoder->decode(input)) {
         case MediaDecoder::Status::FrameFinished:
-            // TODO move this code to Resampler class with conditional resampling
-            if (filter->feedInput(frame.pointer()) < 0)
-                throw AudioFileException("Frame could not be resampled");
-            if (!(resampled = filter->readOutput()))
+            resampled->sample_rate = getFormat().sample_rate;
+            resampled->channel_layout = av_get_default_channel_layout(getFormat().nb_channels);
+            resampled->format = AV_SAMPLE_FMT_S16;
+            if (resampler->resample(input.pointer(), resampled) < 0)
                 throw AudioFileException("Frame could not be resampled");
-            if (buf->append(resampled) < 0) {
-                av_frame_free(&resampled);
+            if (buf->append(resampled) < 0)
                 throw AudioFileException("Error while decoding: " + fileName);
-            } else {
-                av_frame_free(&resampled);
-            }
             break;
         case MediaDecoder::Status::DecodeError:
         case MediaDecoder::Status::ReadError:
diff --git a/src/media/media_decoder.cpp b/src/media/media_decoder.cpp
index e34fa381fe..a3061bb4be 100644
--- a/src/media/media_decoder.cpp
+++ b/src/media/media_decoder.cpp
@@ -463,7 +463,7 @@ MediaDecoder::writeToRingBuffer(const AudioFrame& decodedFrame,
     if ((unsigned)libav_frame->sample_rate != outFormat.sample_rate) {
         if (!resampler_) {
             RING_DBG("Creating audio resampler");
-            resampler_.reset(new Resampler(outFormat));
+            resampler_.reset(new Resampler);
         }
         resamplingBuff_.setFormat({(unsigned) outFormat.sample_rate, (unsigned) decoderCtx_->channels});
         resamplingBuff_.resize(libav_frame->nb_samples);
diff --git a/test/unitTest/media/audio/test_resampler.cpp b/test/unitTest/media/audio/test_resampler.cpp
index 503f43312f..e5ae3adbc5 100644
--- a/test/unitTest/media/audio/test_resampler.cpp
+++ b/test/unitTest/media/audio/test_resampler.cpp
@@ -44,8 +44,6 @@ private:
     CPPUNIT_TEST(testResample);
     CPPUNIT_TEST_SUITE_END();
 
-    void writeWav(); // writes a minimal wav file to test decoding
-
     std::unique_ptr<Resampler> resampler_;
 };
 
@@ -71,12 +69,10 @@ ResamplerTest::testResample()
     const constexpr AudioFormat infmt(44100, 1);
     const constexpr AudioFormat outfmt(48000, 2);
 
-    resampler_.reset(new Resampler(none));
-
-    resampler_->setFormat(outfmt);
+    resampler_.reset(new Resampler);
 
     AudioBuffer inbuf(1024, infmt);
-    AudioBuffer outbuf;
+    AudioBuffer outbuf(0, outfmt);
 
     resampler_->resample(inbuf, outbuf);
     CPPUNIT_ASSERT(outbuf.getFormat().sample_rate == 48000);
-- 
GitLab