diff --git a/contrib/src/ffmpeg/rules.mak b/contrib/src/ffmpeg/rules.mak
index 119ffb6a3fdfc33f95aad71b9b1f543036839c46..29ad7a3d81e37c5510a1e3e6b3b44d824d773db3 100644
--- a/contrib/src/ffmpeg/rules.mak
+++ b/contrib/src/ffmpeg/rules.mak
@@ -74,6 +74,10 @@ FFMPEGCONF += \
 FFMPEGCONF += \
 	--enable-decoder=flac \
 	--enable-decoder=vorbis \
+	--enable-decoder=aac \
+	--enable-decoder=ac3 \
+	--enable-decoder=eac3 \
+	--enable-decoder=mp3 \
 	--enable-decoder=pcm_u24be \
 	--enable-decoder=pcm_u24le \
 	--enable-decoder=pcm_u32be \
diff --git a/src/media/audio/audio_input.cpp b/src/media/audio/audio_input.cpp
index 96a9bc3f7627ce2d8b4739a8dc863993b6373f4e..ff2aa95c7fe0e794bced8523866020a9b56520b1 100644
--- a/src/media/audio/audio_input.cpp
+++ b/src/media/audio/audio_input.cpp
@@ -19,9 +19,12 @@
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
  */
 
-#include "dring/media_const.h"
+#include "audio_frame_resizer.h"
 #include "audio_input.h"
+#include "dring/media_const.h"
+#include "fileutils.h" // access
 #include "manager.h"
+#include "media_decoder.h"
 #include "resampler.h"
 #include "ringbufferpool.h"
 #include "smartools.h"
@@ -37,7 +40,10 @@ static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
 AudioInput::AudioInput(const std::string& id) :
     id_(id),
     format_(Manager::instance().getRingBufferPool().getInternalAudioFormat()),
+    frameSize_(format_.sample_rate * MS_PER_PACKET.count() / 1000),
     resampler_(new Resampler),
+    resizer_(new AudioFrameResizer(format_, frameSize_,
+       [this](std::shared_ptr<AudioFrame>&& f){ frameResized(std::move(f)); })),
     loop_([] { return true; },
           [this] { process(); },
           [] {})
@@ -62,10 +68,17 @@ AudioInput::process()
             RING_DBG() << "Switching audio input to '" << devOpts_.input << "'";
     }
 
-    auto frame = std::make_shared<AudioFrame>();
-    if (!nextFromDevice(*frame))
-        return; // no frame
+    // send frame to resizer, frameResized will be called when it can be output
+    if (decodingFile_)
+        nextFromFile();
+    else
+        nextFromDevice();
+}
 
+void
+AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
+{
+    std::shared_ptr<AudioFrame> frame = std::move(ptr);
     auto ms = MediaStream("a:local", format_, sent_samples);
     frame->pointer()->pts = sent_samples;
     sent_samples += frame->pointer()->nb_samples;
@@ -73,8 +86,8 @@ AudioInput::process()
     notify(std::static_pointer_cast<MediaFrame>(frame));
 }
 
-bool
-AudioInput::nextFromDevice(AudioFrame& frame)
+void
+AudioInput::nextFromDevice()
 {
     auto& mainBuffer = Manager::instance().getRingBufferPool();
     auto bufferFormat = mainBuffer.getInternalAudioFormat();
@@ -85,7 +98,7 @@ AudioInput::nextFromDevice(AudioFrame& frame)
 
     if (mainBuffer.availableForGet(id_) < samplesToGet
         && not mainBuffer.waitForDataAvailable(id_, samplesToGet, MS_PER_PACKET)) {
-        return false;
+        return;
     }
 
     // getData resets the format to internal hardware format, will have to be resampled
@@ -93,7 +106,7 @@ AudioInput::nextFromDevice(AudioFrame& frame)
     micData_.resize(samplesToGet);
     const auto samples = mainBuffer.getData(micData_, id_);
     if (samples != samplesToGet)
-        return false;
+        return;
 
     if (muteState_) // audio is muted, set samples to 0
         micData_.reset();
@@ -108,8 +121,45 @@ AudioInput::nextFromDevice(AudioFrame& frame)
     }
 
     auto audioFrame = resampled.toAVFrame();
-    frame.copyFrom(*audioFrame);
-    return true;
+    resizer_->enqueue(std::move(audioFrame));
+}
+
+void
+AudioInput::nextFromFile()
+{
+    if (!decoder_)
+        return;
+
+    auto frame = std::make_unique<AudioFrame>();
+    const auto ret = decoder_->decode(*frame);
+    const auto inFmt = AudioFormat((unsigned)frame->pointer()->sample_rate, (unsigned)frame->pointer()->channels, (AVSampleFormat)frame->pointer()->format);
+
+    std::lock_guard<std::mutex> lk(fmtMutex_);
+    switch(ret) {
+    case MediaDecoder::Status::ReadError:
+    case MediaDecoder::Status::DecodeError:
+        RING_ERR() << "Failed to decode frame";
+        break;
+    case MediaDecoder::Status::RestartRequired:
+    case MediaDecoder::Status::EOFError:
+        createDecoder();
+        break;
+    case MediaDecoder::Status::FrameFinished:
+        if (inFmt != format_) {
+            AudioFrame out;
+            out.pointer()->format = format_.sampleFormat;
+            out.pointer()->sample_rate = format_.sample_rate;
+            out.pointer()->channel_layout = av_get_default_channel_layout(format_.nb_channels);
+            out.pointer()->channels = format_.nb_channels;
+            resampler_->resample(frame->pointer(), out.pointer());
+            frame->copyFrom(out);
+        }
+        resizer_->enqueue(std::move(frame));
+        break;
+    case MediaDecoder::Status::Success:
+    default:
+        break;
+    }
 }
 
 bool
@@ -122,6 +172,22 @@ AudioInput::initDevice(const std::string& device)
     return true;
 }
 
+bool
+AudioInput::initFile(const std::string& path)
+{
+    if (access(path.c_str(), R_OK) != 0) {
+        RING_ERR() << "File '" << path << "' not available";
+        return false;
+    }
+
+    devOpts_ = {};
+    devOpts_.input = path;
+    devOpts_.loop = "1";
+    createDecoder(); // sets devOpts_'s sample rate and number of channels
+    decodingFile_ = true;
+    return true; // all required info found
+}
+
 std::shared_future<DeviceParams>
 AudioInput::switchInput(const std::string& resource)
 {
@@ -135,6 +201,9 @@ AudioInput::switchInput(const std::string& resource)
 
     RING_DBG() << "Switching audio source to match '" << resource << "'";
 
+    decoder_.reset();
+    decodingFile_ = false;
+
     currentResource_ = resource;
     devOptsFound_ = false;
 
@@ -159,7 +228,13 @@ AudioInput::switchInput(const std::string& resource)
         return {};
 
     const auto suffix = resource.substr(pos + sep.size());
-    if (initDevice(suffix))
+    bool ready = false;
+    if (prefix == DRing::Media::VideoProtocolPrefix::FILE)
+        ready = initFile(suffix);
+    else
+        ready = initDevice(suffix);
+
+    if (ready)
         foundDevOpts(devOpts_);
 
     switchPending_ = true;
@@ -176,11 +251,51 @@ AudioInput::foundDevOpts(const DeviceParams& params)
     }
 }
 
+void
+AudioInput::createDecoder()
+{
+    decoder_.reset();
+    if (devOpts_.input.empty()) {
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    // NOTE createDecoder is currently only used for files, which require rate emulation
+    auto decoder = std::make_unique<MediaDecoder>();
+    decoder->emulateRate();
+    decoder->setInterruptCallback(
+        [](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
+        this);
+
+    if (decoder->openInput(devOpts_) < 0) {
+        RING_ERR() << "Could not open input '" << devOpts_.input << "'";
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    if (decoder->setupFromAudioData() < 0) {
+        RING_ERR() << "Could not setup decoder for '" << devOpts_.input << "'";
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    auto ms = decoder->getStream(devOpts_.input);
+    devOpts_.channel = ms.nbChannels;
+    devOpts_.framerate = ms.sampleRate;
+    RING_DBG() << "Created audio decoder: " << ms;
+
+    decoder_ = std::move(decoder);
+    foundDevOpts(devOpts_);
+}
+
 void
 AudioInput::setFormat(const AudioFormat& fmt)
 {
     std::lock_guard<std::mutex> lk(fmtMutex_);
     format_ = fmt;
+    frameSize_ = format_.sample_rate * MS_PER_PACKET.count() / 1000;
+    resizer_.reset(new AudioFrameResizer(format_, frameSize_,
+       [this](std::shared_ptr<AudioFrame>&& f){ frameResized(std::move(f)); }));
 }
 
 void
diff --git a/src/media/audio/audio_input.h b/src/media/audio/audio_input.h
index 5d442fb2bc800175cadbfa03fffcd943e4429776..071adf2b447abbce8cf8554ef12b43e8eb1826cd 100644
--- a/src/media/audio/audio_input.h
+++ b/src/media/audio/audio_input.h
@@ -33,6 +33,9 @@
 
 namespace ring {
 
+class AudioFrameResizer;
+class MediaDecoder;
+class MediaRecorder;
 struct MediaStream;
 class Resampler;
 
@@ -50,8 +53,12 @@ public:
     MediaStream getInfo() const;
 
 private:
-    bool nextFromDevice(AudioFrame& frame);
+    void nextFromDevice();
+    void nextFromFile();
     bool initDevice(const std::string& device);
+    bool initFile(const std::string& path);
+    void createDecoder();
+    void frameResized(std::shared_ptr<AudioFrame>&& ptr);
 
     std::string id_;
     AudioBuffer micData_;
@@ -59,8 +66,12 @@ private:
     uint64_t sent_samples = 0;
     mutable std::mutex fmtMutex_ {};
     AudioFormat format_;
+    int frameSize_;
 
     std::unique_ptr<Resampler> resampler_;
+    std::unique_ptr<AudioFrameResizer> resizer_;
+    std::weak_ptr<MediaRecorder> recorder_;
+    std::unique_ptr<MediaDecoder> decoder_;
 
     std::string currentResource_;
     std::atomic_bool switchPending_ {false};
@@ -69,6 +80,7 @@ private:
     std::shared_future<DeviceParams> futureDevOpts_;
     std::atomic_bool devOptsFound_ {false};
     void foundDevOpts(const DeviceParams& params);
+    std::atomic_bool decodingFile_ {false};
 
     ThreadLoop loop_;
     void process();
diff --git a/src/media/audio/audio_rtp_session.h b/src/media/audio/audio_rtp_session.h
index eea60715645c256b87d2600d787c71d827a1fb66..9710c5a7e9721e1701b7c83aaa82a56c499f5ba7 100644
--- a/src/media/audio/audio_rtp_session.h
+++ b/src/media/audio/audio_rtp_session.h
@@ -24,8 +24,8 @@
 
 #include "audiobuffer.h"
 #include "media_device.h"
-#include "threadloop.h"
 #include "rtp_session.h"
+#include "threadloop.h"
 
 #include <string>
 #include <memory>