From 20b631fb78d2883aaeebac3c16b0f9db3d127a01 Mon Sep 17 00:00:00 2001 From: philippegorley <philippe.gorley@savoirfairelinux.com> Date: Wed, 1 Aug 2018 16:52:39 -0400 Subject: [PATCH] audio: refactor resampler class Using MediaFilter would make the resampling time longer and more unpredictable than directly using libswresample. Adds libswresample as a dependency. Simplifies Resampler class. Resampler detects changes in input and output formats automatically. Changes Audiofile to use Resampler instead of MediaFilter. Change-Id: I24919e8fa514dbb4a38408e338016976e7424136 --- configure.ac | 2 + src/media/Makefile.am | 6 +- src/media/audio/audio_rtp_session.cpp | 2 +- src/media/audio/audiolayer.cpp | 6 +- src/media/audio/resampler.cpp | 97 ++++++++++---------- src/media/audio/resampler.h | 45 +++++---- src/media/audio/sound/audiofile.cpp | 30 ++---- src/media/media_decoder.cpp | 2 +- test/unitTest/media/audio/test_resampler.cpp | 8 +- 9 files changed, 90 insertions(+), 108 deletions(-) diff --git a/configure.ac b/configure.ac index 9b12d4b8b7..ad3c4687f8 100644 --- a/configure.ac +++ b/configure.ac @@ -446,6 +446,8 @@ PKG_CHECK_MODULES(LIBAVFILTER, libavfilter >= 5.40.101,, AC_MSG_ERROR([Missing l PKG_CHECK_MODULES(LIBSWSCALE, libswscale >= 3.1.101,, AC_MSG_ERROR([Missing libswscale development files])) +PKG_CHECK_MODULES(LIBSWRESAMPLE, libswresample >= 1.2.101,, AC_MSG_ERROR([Missing libswresample development files])) + dnl Video is default-enabled AC_ARG_ENABLE([video], AS_HELP_STRING([--disable-video], [Disable video])) diff --git a/src/media/Makefile.am b/src/media/Makefile.am index 5ea98de8d5..e0648bb187 100644 --- a/src/media/Makefile.am +++ b/src/media/Makefile.am @@ -49,12 +49,12 @@ libmedia_la_libADD = \ ./video/libvideo.la endif -libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@ +libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWRESAMPLE_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@ if HAVE_WIN32 libmedia_la_LDFLAGS += -lws2_32 -lwsock32 -lshlwapi endif -AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@ +AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@ -AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@ +AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@ diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp index e8f95109b2..6ed280a817 100644 --- a/src/media/audio/audio_rtp_session.cpp +++ b/src/media/audio/audio_rtp_session.cpp @@ -179,7 +179,7 @@ AudioSender::process() if (mainBuffFormat.sample_rate != accountAudioCodec->audioformat.sample_rate) { if (not resampler_) { RING_DBG("Creating audio resampler"); - resampler_.reset(new Resampler(accountAudioCodec->audioformat)); + resampler_.reset(new Resampler); } resampledData_.setFormat(accountAudioCodec->audioformat); resampledData_.resize(samplesToGet); diff --git a/src/media/audio/audiolayer.cpp b/src/media/audio/audiolayer.cpp index 63205d5658..63759fbf58 100644 --- a/src/media/audio/audiolayer.cpp +++ b/src/media/audio/audiolayer.cpp @@ -41,8 +41,8 @@ AudioLayer::AudioLayer(const AudioPreference &pref) , audioFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat()) , audioInputFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat()) , urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_) - , resampler_(new Resampler{audioFormat_.sample_rate}) - , inputResampler_(new Resampler{audioInputFormat_.sample_rate}) + , resampler_(new Resampler) + , inputResampler_(new Resampler) , lastNotificationTime_() { urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID); @@ -57,13 +57,11 @@ void AudioLayer::hardwareFormatAvailable(AudioFormat playback) RING_DBG("Hardware audio format available : %s", playback.toString().c_str()); audioFormat_ = Manager::instance().hardwareAudioFormatChanged(playback); urgentRingBuffer_.setFormat(audioFormat_); - resampler_->setFormat(audioFormat_); } void AudioLayer::hardwareInputFormatAvailable(AudioFormat capture) { RING_DBG("Hardware input audio format available : %s", capture.toString().c_str()); - inputResampler_->setFormat(capture); } void AudioLayer::devicesChanged() diff --git a/src/media/audio/resampler.cpp b/src/media/audio/resampler.cpp index c02285f6c3..abdf639cbe 100644 --- a/src/media/audio/resampler.cpp +++ b/src/media/audio/resampler.cpp @@ -19,82 +19,81 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +#include "libav_deps.h" #include "logger.h" +#include "media_buffer.h" #include "media_filter.h" #include "media_stream.h" #include "resampler.h" #include "ring_types.h" +extern "C" { +#include <libswresample/swresample.h> +} + namespace ring { -Resampler::Resampler(AudioFormat format) - : format_(format) -{ - setFormat(format); -} +Resampler::Resampler() + : swrCtx_(swr_alloc()) +{} -Resampler::Resampler(unsigned sample_rate, unsigned channels) - : format_(sample_rate, channels) +Resampler::~Resampler() { - setFormat(format_); + swr_free(&swrCtx_); } -Resampler::~Resampler() = default; - void -Resampler::reinitFilter(const MediaStream& inputParams) +Resampler::reinit(const AudioFormat& in, const int inSampleFmt, + const AudioFormat& out, const int outSampleFmt) { - filter_.reset(new MediaFilter()); - std::stringstream aformat; - aformat << "aformat=sample_fmts=s16:channel_layouts=" - << av_get_default_channel_layout(format_.nb_channels) - << ":sample_rates=" << format_.sample_rate; - if (filter_->initialize(aformat.str(), inputParams) < 0) { - RING_ERR() << "Failed to initialize resampler"; - filter_.reset(); - } + av_opt_set_int(swrCtx_, "ich", 0, 0); + av_opt_set_int(swrCtx_, "icl", av_get_default_channel_layout(in.nb_channels), 0); + av_opt_set_int(swrCtx_, "isr", in.sample_rate, 0); + av_opt_set_sample_fmt(swrCtx_, "isf", static_cast<AVSampleFormat>(inSampleFmt), 0); + + av_opt_set_int(swrCtx_, "och", 0, 0); + av_opt_set_int(swrCtx_, "ocl", av_get_default_channel_layout(out.nb_channels), 0); + av_opt_set_int(swrCtx_, "osr", out.sample_rate, 0); + av_opt_set_sample_fmt(swrCtx_, "osf", static_cast<AVSampleFormat>(outSampleFmt), 0); + + swr_init(swrCtx_); } -void -Resampler::setFormat(AudioFormat format) +int +Resampler::resample(const AVFrame* input, AVFrame* output) { - format_ = format; - if (filter_) - reinitFilter(filter_->getInputParams()); + int ret = swr_convert_frame(swrCtx_, output, input); + if (ret & AVERROR_INPUT_CHANGED || ret & AVERROR_OUTPUT_CHANGED) { + reinit(AudioFormat{(unsigned)input->sample_rate, (unsigned)input->channels}, input->format, + AudioFormat{(unsigned)output->sample_rate, (unsigned)output->channels}, output->format); + return resample(input, output); + } else if (ret < 0) { + RING_ERR() << "Failed to resample frame"; + return -1; + } + + return 0; } void Resampler::resample(const AudioBuffer& dataIn, AudioBuffer& dataOut) { auto input = dataIn.toAVFrame(); - MediaStream currentParams("resampler", static_cast<AVSampleFormat>(input->format), - 0, input->sample_rate, input->channels); - if (filter_) { - const auto& ms = filter_->getInputParams(); - if (ms.sampleRate != input->sample_rate || ms.nbChannels != input->channels) { - RING_WARN() << "Resampler settings changed, reinitializing"; - reinitFilter(currentParams); - } - } else { - reinitFilter(currentParams); - } + AudioFrame resampled; + auto output = resampled.pointer(); + output->sample_rate = dataOut.getSampleRate(); + output->channel_layout = av_get_default_channel_layout(dataOut.channels()); + output->format = AV_SAMPLE_FMT_S16; - auto frame = filter_->apply(input); - av_frame_free(&input); - if (!frame) { - RING_ERR() << "Resampling failed, this may produce a glitch in the audio"; + if (resample(input, output) < 0) { + av_frame_free(&input); return; } - dataOut.setFormat(format_); - dataOut.resize(frame->nb_samples); - if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_FLTP) - dataOut.convertFloatPlanarToSigned16(frame->extended_data, - frame->nb_samples, frame->channels); - else if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_S16) - dataOut.deinterleave(reinterpret_cast<const AudioSample*>(frame->extended_data[0]), - frame->nb_samples, frame->channels); - av_frame_free(&frame); + dataOut.resize(output->nb_samples); + dataOut.deinterleave(reinterpret_cast<const AudioSample*>(output->extended_data[0]), + output->nb_samples, output->channels); + av_frame_free(&input); } } // namespace ring diff --git a/src/media/audio/resampler.h b/src/media/audio/resampler.h index 57e9d01ac9..57dc99bc65 100644 --- a/src/media/audio/resampler.h +++ b/src/media/audio/resampler.h @@ -21,51 +21,48 @@ #pragma once -#include <memory> - #include "audiobuffer.h" #include "noncopyable.h" #include "ring_types.h" -namespace ring { +struct AVFrame; +struct SwrContext; -class MediaFilter; -struct MediaStream; +namespace ring { +/** + * Wrapper class for libswresample + */ class Resampler { public: - /** - * Resampler is used for several situations: - * streaming conversion (RTP, IAX), audiolayer conversion, - * audio files conversion. Parameters are used to compute - * internal buffer size. Resampler must be reinitialized - * every time these parameters change - */ - Resampler(AudioFormat outFormat); - Resampler(unsigned sample_rate, unsigned channels=1); - // empty dtor, needed for unique_ptr + Resampler(); ~Resampler(); /** - * Change the converter sample rate and channel number. - * Internal state is lost. + * Resample from @input format to @output format. + * NOTE: sample_rate, channel_layout, and format should be set on @output */ - void setFormat(AudioFormat format); + int resample(const AVFrame* input, AVFrame* output); /** - * resample from the samplerate1 to the samplerate2 - * @param dataIn Input buffer - * @param dataOut Output buffer + * Resample from @dataIn format to @dataOut format. + * + * NOTE: This is a wrapper for resample(AVFrame*, AVFrame*) */ void resample(const AudioBuffer& dataIn, AudioBuffer& dataOut); private: NON_COPYABLE(Resampler); - void reinitFilter(const MediaStream& inputParams); + /** + * Reinitializes the resampler when new settings are detected. As long as both input and + * output buffers always have the same formats, will never be called, as the first + * initialization is done in swr_convert_frame. + */ + void reinit(const AudioFormat& in, const int inSampleFmt, + const AudioFormat& out, const int outSampleFmt); - AudioFormat format_; // number of channels and max output frequency - std::unique_ptr<MediaFilter> filter_; + SwrContext* swrCtx_; // incomplete type, cannot be a unique_ptr }; } // namespace ring diff --git a/src/media/audio/sound/audiofile.cpp b/src/media/audio/sound/audiofile.cpp index ae172cf341..195b83b219 100644 --- a/src/media/audio/sound/audiofile.cpp +++ b/src/media/audio/sound/audiofile.cpp @@ -69,32 +69,22 @@ AudioFile::AudioFile(const std::string &fileName, unsigned int sampleRate) : if (decoder->setupFromAudioData() < 0) throw AudioFileException("Decoder setup failed: " + fileName); - const auto& ms = decoder->getStream(); - - auto filter = std::make_unique<MediaFilter>(); - // aformat=sample_fmts=s16:channel_layouts=stereo - if (filter->initialize("aformat=sample_fmts=s16:channel_layouts=stereo|mono:sample_rates=" - + std::to_string(getFormat().sample_rate), ms) < 0) - throw AudioFileException("Failed to create resampler"); - + auto resampler = std::make_unique<Resampler>(); auto buf = std::make_unique<AudioBuffer>(0, getFormat()); bool done = false; while (!done) { - AudioFrame frame; - AVFrame* resampled; - switch (decoder->decode(frame)) { + AudioFrame input; + AudioFrame output; + auto resampled = output.pointer(); + switch (decoder->decode(input)) { case MediaDecoder::Status::FrameFinished: - // TODO move this code to Resampler class with conditional resampling - if (filter->feedInput(frame.pointer()) < 0) - throw AudioFileException("Frame could not be resampled"); - if (!(resampled = filter->readOutput())) + resampled->sample_rate = getFormat().sample_rate; + resampled->channel_layout = av_get_default_channel_layout(getFormat().nb_channels); + resampled->format = AV_SAMPLE_FMT_S16; + if (resampler->resample(input.pointer(), resampled) < 0) throw AudioFileException("Frame could not be resampled"); - if (buf->append(resampled) < 0) { - av_frame_free(&resampled); + if (buf->append(resampled) < 0) throw AudioFileException("Error while decoding: " + fileName); - } else { - av_frame_free(&resampled); - } break; case MediaDecoder::Status::DecodeError: case MediaDecoder::Status::ReadError: diff --git a/src/media/media_decoder.cpp b/src/media/media_decoder.cpp index e34fa381fe..a3061bb4be 100644 --- a/src/media/media_decoder.cpp +++ b/src/media/media_decoder.cpp @@ -463,7 +463,7 @@ MediaDecoder::writeToRingBuffer(const AudioFrame& decodedFrame, if ((unsigned)libav_frame->sample_rate != outFormat.sample_rate) { if (!resampler_) { RING_DBG("Creating audio resampler"); - resampler_.reset(new Resampler(outFormat)); + resampler_.reset(new Resampler); } resamplingBuff_.setFormat({(unsigned) outFormat.sample_rate, (unsigned) decoderCtx_->channels}); resamplingBuff_.resize(libav_frame->nb_samples); diff --git a/test/unitTest/media/audio/test_resampler.cpp b/test/unitTest/media/audio/test_resampler.cpp index 503f43312f..e5ae3adbc5 100644 --- a/test/unitTest/media/audio/test_resampler.cpp +++ b/test/unitTest/media/audio/test_resampler.cpp @@ -44,8 +44,6 @@ private: CPPUNIT_TEST(testResample); CPPUNIT_TEST_SUITE_END(); - void writeWav(); // writes a minimal wav file to test decoding - std::unique_ptr<Resampler> resampler_; }; @@ -71,12 +69,10 @@ ResamplerTest::testResample() const constexpr AudioFormat infmt(44100, 1); const constexpr AudioFormat outfmt(48000, 2); - resampler_.reset(new Resampler(none)); - - resampler_->setFormat(outfmt); + resampler_.reset(new Resampler); AudioBuffer inbuf(1024, infmt); - AudioBuffer outbuf; + AudioBuffer outbuf(0, outfmt); resampler_->resample(inbuf, outbuf); CPPUNIT_ASSERT(outbuf.getFormat().sample_rate == 48000); -- GitLab