diff --git a/daemon/configure.ac b/daemon/configure.ac
index 6f6ef4a9a7e697d25f75db38114fc348aefc7aee..1214c8dc84b4289a4148c6c979f845d13031411e 100644
--- a/daemon/configure.ac
+++ b/daemon/configure.ac
@@ -366,6 +366,7 @@ AS_IF([test "x$enable_video" != "xno"],
     ],
     [
      AM_CONDITIONAL(SFL_VIDEO, false)
+     AC_DEFINE_UNQUOTED([USE_CCRTP], 1, [Use ccrtp instead of libavformat])
      ]);
 
 LIBCCRTP_MIN_VERSION=1.3.0
diff --git a/daemon/src/audio/audiortp/Makefile.am b/daemon/src/audio/audiortp/Makefile.am
index 642f365a1ad8c65078b48cfd7e4bff82ec8d047f..360b36dc01e8eb17a4c8414f6cb587d4901e998f 100644
--- a/daemon/src/audio/audiortp/Makefile.am
+++ b/daemon/src/audio/audiortp/Makefile.am
@@ -8,6 +8,17 @@ endif
 
 libaudiortp_la_SOURCES = \
 		$(SFL_ZRTP_SRC) \
+		base64.c base64.h
+
+if SFL_VIDEO
+libaudiortp_la_SOURCES += \
+		avformat_rtp_session.cpp \
+		avformat_rtp_session.h
+
+AM_CXXFLAGS = @LIBAVFORMAT_CFLAGS@
+
+else
+libaudiortp_la_SOURCES += \
 		audio_rtp_session.cpp \
 		audio_symmetric_rtp_session.cpp \
 		audio_rtp_stream.cpp \
@@ -19,5 +30,8 @@ libaudiortp_la_SOURCES = \
 		audio_rtp_stream.h \
 		audio_rtp_factory.h \
 		audio_symmetric_rtp_session.h \
-		audio_srtp_session.h \
-		base64.c base64.h
+		audio_srtp_session.h
+endif
+
+# FIXME
+AM_CPPFLAGS += -I$(top_srcdir)/src
diff --git a/daemon/src/audio/audiortp/TODO b/daemon/src/audio/audiortp/TODO
new file mode 100644
index 0000000000000000000000000000000000000000..06bf60ae98d7519436f509de8c85c2292fe632da
--- /dev/null
+++ b/daemon/src/audio/audiortp/TODO
@@ -0,0 +1,32 @@
+Tested and Working
+-------
+* Opus
+* PCMU
+* PCMA
+* speex narrowband
+* G722
+* Mono and stereo input
+
+Needs to be implemented
+-----------------------
+* SRTP
+* DTMF over RTP
+* RTP + STUN
+* Rename Video{Encoder,Decoder} to AV{Encoder,Decoder}
+* Drop CCRTP and its dependencies and its dependents for real
+
+Needs to be fixed:
+------------------
+* speex wideband fails with "Invalid data found when processing input"
+* speex ultraband fails with "Invalid data found when processing input"
+
+Might work if libavcodec is built with support for it (untested):
+-----------------------------------------------------------------
+* ILBC
+* g726
+
+Can't work:
+-----------
+* gsm
+* g729
+* libavformat is MISSING RTP mux/demux for gsm and g729, see is_supported(enum AVCodecID id) in libavformat/rtpenc.c
diff --git a/daemon/src/audio/audiortp/avformat_rtp_session.cpp b/daemon/src/audio/audiortp/avformat_rtp_session.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..78c11db8ff4b740f3cd622c7ab2a6d245c8c3159
--- /dev/null
+++ b/daemon/src/audio/audiortp/avformat_rtp_session.cpp
@@ -0,0 +1,489 @@
+/*
+ *  Copyright (C) 2014 Savoir-Faire Linux Inc.
+ *  Author: Tristan Matthews <tristan.matthews@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ *
+ *  Additional permission under GNU GPL version 3 section 7:
+ *
+ *  If you modify this program, or any covered work, by linking or
+ *  combining it with the OpenSSL project's OpenSSL library (or a
+ *  modified version of that library), containing parts covered by the
+ *  terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc.
+ *  grants you additional permission to convey the resulting work.
+ *  Corresponding Source for a non-source form of such a combination
+ *  shall include the source code for the parts of OpenSSL used as well
+ *  as that of the covered work.
+ */
+#include "avformat_rtp_session.h"
+
+#include "logger.h"
+#include "noncopyable.h"
+#include "sip/sdp.h"
+#include "video/socket_pair.h"
+#include "video/video_base.h"
+#include "video/video_encoder.h"
+#include "video/video_decoder.h"
+#include "video/libav_deps.h"
+#include "audio/audiobuffer.h"
+#include "audio/ringbufferpool.h"
+#include "audio/resampler.h"
+#include "manager.h"
+#include <sstream>
+
+namespace sfl {
+using sfl_video::SocketPair;
+using sfl_video::VideoEncoder;
+using sfl_video::VideoIOHandle;
+using sfl_video::VideoEncoderException;
+
+class AudioSender {
+    public:
+        AudioSender(const std::string& id,
+                    std::map<std::string, std::string> txArgs,
+                    sfl_video::SocketPair& socketPair);
+        ~AudioSender();
+
+    private:
+        NON_COPYABLE(AudioSender);
+
+        bool waitForDataEncode(const std::chrono::milliseconds& max_wait) const;
+        bool setup(sfl_video::SocketPair& socketPair);
+
+        std::string id_;
+        std::map<std::string, std::string> args_;
+        const AudioFormat format_;
+        std::unique_ptr<sfl_video::VideoEncoder> audioEncoder_;
+        std::unique_ptr<sfl_video::VideoIOHandle> muxContext_;
+        std::unique_ptr<sfl::Resampler> resampler_;
+        const double secondsPerPacket_ {0.02}; // 20 ms
+
+        ThreadLoop loop_;
+        void process();
+        void cleanup();
+};
+
+AudioSender::AudioSender(const std::string& id, std::map<std::string, std::string> txArgs, SocketPair& socketPair) :
+    id_(id),
+    args_(txArgs),
+    format_(std::atoi(args_["sample_rate"].c_str()),
+            std::atoi(args_["channels"].c_str())),
+    loop_([&] { return setup(socketPair); },
+          std::bind(&AudioSender::process, this),
+          std::bind(&AudioSender::cleanup, this))
+{
+    std::ostringstream os;
+    os << secondsPerPacket_ * format_.sample_rate;
+    args_["frame_size"] = os.str();
+    loop_.start();
+}
+
+AudioSender::~AudioSender()
+{
+    loop_.join();
+}
+
+bool
+AudioSender::setup(SocketPair& socketPair)
+{
+    auto enc_name = args_["codec"].c_str();
+    auto dest = args_["destination"].c_str();
+
+    audioEncoder_.reset(new VideoEncoder);
+    muxContext_.reset(socketPair.createIOContext());
+
+    try {
+        /* Encoder setup */
+        audioEncoder_->setOptions(args_);
+        audioEncoder_->openOutput(enc_name, "rtp", dest, NULL, false);
+        audioEncoder_->setIOContext(muxContext_);
+        audioEncoder_->startIO();
+    } catch (const VideoEncoderException &e) {
+        SFL_ERR("%s", e.what());
+        return false;
+    }
+
+    std::string sdp;
+    audioEncoder_->print_sdp(sdp);
+    SFL_WARN("\n%s", sdp.c_str());
+
+    return true;
+}
+
+void
+AudioSender::cleanup()
+{
+    audioEncoder_.reset();
+    muxContext_.reset();
+}
+
+void
+AudioSender::process()
+{
+    auto mainBuffFormat = Manager::instance().getRingBufferPool().getInternalAudioFormat();
+    double resampleFactor = mainBuffFormat.sample_rate / (double) format_.sample_rate;
+
+    // compute nb of byte to get corresponding to 1 audio frame
+    const size_t samplesToGet = resampleFactor * secondsPerPacket_ * format_.sample_rate;
+
+    if (Manager::instance().getRingBufferPool().availableForGet(id_) < samplesToGet)
+        return;
+
+    // FIXME
+    AudioBuffer micData(samplesToGet, mainBuffFormat);
+
+    const size_t samples = Manager::instance().getRingBufferPool().getData(micData, id_);
+    micData.setChannelNum(format_.nb_channels, true); // down/upmix as needed
+
+    if (samples != samplesToGet) {
+        SFL_ERR("Asked for %d samples from bindings on call '%s', got %d",
+                samplesToGet, id_.c_str(), samples);
+        return;
+    }
+
+    if (mainBuffFormat.sample_rate != format_.sample_rate)
+    {
+        if (not resampler_) {
+            SFL_DBG("Creating audio resampler");
+            resampler_.reset(new Resampler(format_));
+        }
+        AudioBuffer resampledData(samplesToGet, format_);
+        resampler_->resample(micData, resampledData);
+        if (audioEncoder_->encode_audio(resampledData) < 0)
+            SFL_ERR("encoding failed");
+    } else {
+        if (audioEncoder_->encode_audio(micData) < 0)
+            SFL_ERR("encoding failed");
+    }
+
+    const int millisecondsPerPacket = secondsPerPacket_ * 1000;
+    if (waitForDataEncode(std::chrono::milliseconds(millisecondsPerPacket))) {
+        // Data available !
+    }
+}
+
+bool
+AudioSender::waitForDataEncode(const std::chrono::milliseconds& max_wait) const
+{
+    auto& mainBuffer = Manager::instance().getRingBufferPool();
+    auto mainBuffFormat = mainBuffer.getInternalAudioFormat();
+    auto resampleFactor = (double) mainBuffFormat.sample_rate / format_.sample_rate;
+    const size_t samplesToGet = resampleFactor * secondsPerPacket_ * format_.sample_rate;
+
+    return mainBuffer.waitForDataAvailable(id_, samplesToGet, max_wait);
+}
+
+class AudioReceiveThread
+{
+    public:
+        AudioReceiveThread(const std::string &id, const std::string &sdp);
+        ~AudioReceiveThread();
+        void addIOContext(sfl_video::SocketPair &socketPair);
+        void startLoop();
+
+    private:
+        NON_COPYABLE(AudioReceiveThread);
+
+        static constexpr int SDP_BUFFER_SIZE = 8192;
+        static constexpr auto SDP_FILENAME = "dummyFilename";
+
+        std::map<std::string, std::string> args_;
+
+        static int interruptCb(void *ctx);
+        static int readFunction(void *opaque, uint8_t *buf, int buf_size);
+
+        void openDecoder();
+        bool decodeFrame();
+
+        /*-----------------------------------------------------------------*/
+        /* These variables should be used in thread (i.e. process()) only! */
+        /*-----------------------------------------------------------------*/
+        const std::string id_;
+        std::istringstream stream_;
+        std::unique_ptr<sfl_video::VideoDecoder> audioDecoder_;
+        std::unique_ptr<sfl_video::VideoIOHandle> sdpContext_;
+        std::unique_ptr<sfl_video::VideoIOHandle> demuxContext_;
+        std::shared_ptr<sfl::RingBuffer> ringbuffer_;
+
+        ThreadLoop loop_;
+        bool setup();
+        void process();
+        void cleanup();
+};
+
+AudioReceiveThread::AudioReceiveThread(const std::string& id, const std::string& sdp)
+    : id_(id)
+    , stream_(sdp)
+    , sdpContext_(new VideoIOHandle(SDP_BUFFER_SIZE, false, &readFunction, 0, 0, this))
+    , loop_(std::bind(&AudioReceiveThread::setup, this),
+            std::bind(&AudioReceiveThread::process, this),
+            std::bind(&AudioReceiveThread::cleanup, this))
+{}
+
+AudioReceiveThread::~AudioReceiveThread()
+{
+    loop_.join();
+}
+
+
+bool
+AudioReceiveThread::setup()
+{
+    audioDecoder_.reset(new sfl_video::VideoDecoder());
+    audioDecoder_->setInterruptCallback(interruptCb, this);
+    // custom_io so the SDP demuxer will not open any UDP connections
+    args_["sdp_flags"] = "custom_io";
+    EXIT_IF_FAIL(not stream_.str().empty(), "No SDP loaded");
+    audioDecoder_->setIOContext(sdpContext_.get());
+    audioDecoder_->setOptions(args_);
+    EXIT_IF_FAIL(not audioDecoder_->openInput(SDP_FILENAME, "sdp"),
+                 "Could not open input \"%s\"", SDP_FILENAME);
+    // Now replace our custom AVIOContext with one that will read packets
+    audioDecoder_->setIOContext(demuxContext_.get());
+
+    EXIT_IF_FAIL(not audioDecoder_->setupFromAudioData(),
+                 "decoder IO startup failed");
+
+    ringbuffer_ = Manager::instance().getRingBufferPool().getRingBuffer(id_);
+    return true;
+}
+
+void
+AudioReceiveThread::process()
+{
+    sfl::AudioFormat mainBuffFormat = Manager::instance().getRingBufferPool().getInternalAudioFormat();
+    std::unique_ptr<AVFrame, void(*)(AVFrame*)> decodedFrame(av_frame_alloc(), [](AVFrame*p){av_frame_free(&p);});
+
+    switch (audioDecoder_->decode_audio(decodedFrame.get())) {
+
+        case sfl_video::VideoDecoder::Status::FrameFinished:
+            audioDecoder_->writeToRingBuffer(decodedFrame.get(), *ringbuffer_,
+                                             mainBuffFormat);
+            return;
+
+        case sfl_video::VideoDecoder::Status::DecodeError:
+            SFL_WARN("decoding failure, trying to reset decoder...");
+            if (not setup()) {
+                SFL_ERR("fatal error, rx thread re-setup failed");
+                loop_.stop();
+                break;
+            }
+            if (not audioDecoder_->setupFromAudioData()) {
+                SFL_ERR("fatal error, a-decoder setup failed");
+                loop_.stop();
+                break;
+            }
+            break;
+
+        case sfl_video::VideoDecoder::Status::ReadError:
+            SFL_ERR("fatal error, read failed");
+            loop_.stop();
+            break;
+
+        default:
+            break;
+    }
+}
+
+void
+AudioReceiveThread::cleanup()
+{
+    audioDecoder_.reset();
+    demuxContext_.reset();
+}
+
+int
+AudioReceiveThread::readFunction(void* opaque, uint8_t* buf, int buf_size)
+{
+    std::istream& is = static_cast<AudioReceiveThread*>(opaque)->stream_;
+    is.read(reinterpret_cast<char*>(buf), buf_size);
+    return is.gcount();
+}
+
+// This callback is used by libav internally to break out of blocking calls
+int
+AudioReceiveThread::interruptCb(void* data)
+{
+    auto context = static_cast<AudioReceiveThread*>(data);
+    return not context->loop_.isRunning();
+}
+
+void
+AudioReceiveThread::addIOContext(SocketPair& socketPair)
+{
+    demuxContext_.reset(socketPair.createIOContext());
+}
+
+void
+AudioReceiveThread::startLoop()
+{
+    loop_.start();
+}
+
+AVFormatRtpSession::AVFormatRtpSession(const std::string& id,
+                                       const std::map<std::string, std::string>& txArgs)
+    : id_(id), txArgs_(txArgs)
+{
+    // don't move this into the initializer list or Cthulus will emerge
+    ringbuffer_ = Manager::instance().getRingBufferPool().createRingBuffer(id_);
+}
+
+AVFormatRtpSession::~AVFormatRtpSession()
+{
+    stop();
+}
+
+void
+AVFormatRtpSession::updateSDP(const Sdp& sdp)
+{
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
+    std::string desc(sdp.getIncomingAudioDescription());
+
+    // if port has changed
+    if (not desc.empty() and desc != receivingSDP_) {
+        receivingSDP_ = desc;
+        SFL_WARN("Updated incoming SDP to:\n%s",
+                receivingSDP_.c_str());
+    }
+
+    if (desc.empty()) {
+        SFL_DBG("Audio is inactive");
+        receiving_ = false;
+        sending_ = false;
+    } else if (desc.find("sendrecv") != std::string::npos) {
+        SFL_DBG("Sending and receiving audio");
+        receiving_ = true;
+        sending_ = true;
+    } else if (desc.find("inactive") != std::string::npos) {
+        SFL_DBG("Audio is inactive");
+        receiving_ = false;
+        sending_ = false;
+    } else if (desc.find("sendonly") != std::string::npos) {
+        SFL_DBG("Receiving audio disabled, audio set to sendonly");
+        receiving_ = false;
+        sending_ = true;
+    } else if (desc.find("recvonly") != std::string::npos) {
+        SFL_DBG("Sending audio disabled, audio set to recvonly");
+        sending_ = false;
+        receiving_ = true;
+    }
+    // even if it says sendrecv or recvonly, our peer may disable audio by
+    // setting the port to 0
+    if (desc.find("m=audio 0") != std::string::npos) {
+        SFL_DBG("Receiving audio disabled, port was set to 0");
+        receiving_ = false;
+    }
+
+    if (sending_)
+        sending_ = sdp.getOutgoingAudioSettings(txArgs_);
+}
+
+void
+AVFormatRtpSession::updateDestination(const std::string& destination,
+                                      unsigned int port)
+{
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
+
+    if (destination.empty()) {
+        SFL_WARN("Destination is empty, ignoring");
+        return;
+    }
+
+    std::stringstream tmp;
+    tmp << "rtp://" << destination << ":" << port;
+
+    // if destination has changed
+    if (tmp.str() != txArgs_["destination"]) {
+        if (sender_) {
+            SFL_WARN("Audio is already being sent");
+            return;
+        }
+        txArgs_["destination"] = tmp.str();
+        SFL_DBG("updated dest to %s", txArgs_["destination"].c_str());
+    }
+
+    if (port == 0) {
+        SFL_DBG("Sending audio disabled, port was set to 0");
+        sending_ = false;
+    }
+}
+
+void
+AVFormatRtpSession::startSender()
+{
+    if (not sending_)
+        return;
+
+    if (sender_)
+        SFL_WARN("Restarting audio sender");
+
+    try {
+        sender_.reset(new AudioSender(id_, txArgs_, *socketPair_));
+    } catch (const VideoEncoderException &e) {
+        SFL_ERR("%s", e.what());
+        sending_ = false;
+    }
+}
+
+void
+AVFormatRtpSession::startReceiver()
+{
+    if (receiving_) {
+        if (receiveThread_)
+            SFL_WARN("restarting video receiver");
+        receiveThread_.reset(new AudioReceiveThread(id_, receivingSDP_));
+        receiveThread_->addIOContext(*socketPair_);
+        receiveThread_->startLoop();
+    } else {
+        SFL_DBG("Audio receiving disabled");
+        receiveThread_.reset();
+    }
+}
+
+void
+AVFormatRtpSession::start(int localPort)
+{
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
+
+    if (not sending_ and not receiving_) {
+        stop();
+        return;
+    }
+
+    try {
+        socketPair_.reset(new SocketPair(txArgs_["destination"].c_str(), localPort));
+    } catch (const std::runtime_error &e) {
+        SFL_ERR("Socket creation failed on port %d: %s", localPort, e.what());
+        return;
+    }
+
+    startSender();
+    startReceiver();
+}
+
+void
+AVFormatRtpSession::stop()
+{
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
+
+    if (socketPair_)
+        socketPair_->interrupt();
+
+    receiveThread_.reset();
+    sender_.reset();
+    socketPair_.reset();
+}
+
+} // end namespace sfl
diff --git a/daemon/src/audio/audiortp/avformat_rtp_session.h b/daemon/src/audio/audiortp/avformat_rtp_session.h
new file mode 100644
index 0000000000000000000000000000000000000000..de1e1c9ffb5af6c01b88a1f43a73a7ae84b4e79d
--- /dev/null
+++ b/daemon/src/audio/audiortp/avformat_rtp_session.h
@@ -0,0 +1,89 @@
+/*
+ *  Copyright (C) 2014 Savoir-Faire Linux Inc.
+ *  Author: Tristan Matthews <tristan.matthews@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ *
+ *  Additional permission under GNU GPL version 3 section 7:
+ *
+ *  If you modify this program, or any covered work, by linking or
+ *  combining it with the OpenSSL project's OpenSSL library (or a
+ *  modified version of that library), containing parts covered by the
+ *  terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc.
+ *  grants you additional permission to convey the resulting work.
+ *  Corresponding Source for a non-source form of such a combination
+ *  shall include the source code for the parts of OpenSSL used as well
+ *  as that of the covered work.
+ */
+
+#ifndef AVFORMAT_RTP_SESSION_H__
+#define AVFORMAT_RTP_SESSION_H__
+
+#include "threadloop.h"
+#include "audio/audiobuffer.h"
+#include "noncopyable.h"
+
+#include <map>
+#include <string>
+#include <memory>
+#include <mutex>
+
+namespace sfl_video {
+class SocketPair;
+class VideoEncoder;
+}
+
+class Sdp;
+class ThreadLoop;
+
+namespace sfl {
+
+class RingBuffer;
+class Resampler;
+class AudioSender;
+class AudioReceiveThread;
+
+class AVFormatRtpSession {
+    public:
+        AVFormatRtpSession(const std::string& id,
+                           const std::map<std::string, std::string>& txArgs);
+        ~AVFormatRtpSession();
+
+        void start(int localPort);
+        void stop();
+        void updateDestination(const std::string& destination, unsigned int port);
+        void updateSDP(const Sdp &sdp);
+
+    private:
+        NON_COPYABLE(AVFormatRtpSession);
+
+        void startSender();
+        void startReceiver();
+
+        std::string id_;
+        std::map<std::string, std::string> txArgs_;
+        std::string receivingSDP_;
+        std::unique_ptr<sfl_video::SocketPair> socketPair_;
+        std::unique_ptr<AudioSender> sender_;
+        std::unique_ptr<AudioReceiveThread> receiveThread_;
+        std::shared_ptr<sfl::RingBuffer> ringbuffer_;
+        std::recursive_mutex mutex_;
+        bool sending_;
+        bool receiving_;
+};
+
+}
+
+#endif // __AVFORMAT_RTP_SESSION_H__
diff --git a/daemon/src/client/callmanager.cpp b/daemon/src/client/callmanager.cpp
index f5b8ae8ac744355c3e7211f176268c2be7cf8177..feea893e1a409fdc30e3ac5af8288d534b6127f2 100644
--- a/daemon/src/client/callmanager.cpp
+++ b/daemon/src/client/callmanager.cpp
@@ -278,7 +278,7 @@ CallManager::startTone(int32_t start, int32_t type)
 // for conferencing in order to get
 // the right pointer for the given
 // callID.
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
 sfl::AudioZrtpSession *
 CallManager::getAudioZrtpSession(const std::string& callID)
 {
@@ -299,7 +299,7 @@ CallManager::getAudioZrtpSession(const std::string& callID)
 void
 CallManager::setSASVerified(const std::string& callID)
 {
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
     try {
         sfl::AudioZrtpSession * zSession;
         zSession = getAudioZrtpSession(callID);
@@ -314,7 +314,7 @@ CallManager::setSASVerified(const std::string& callID)
 void
 CallManager::resetSASVerified(const std::string& callID)
 {
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
     try {
         sfl::AudioZrtpSession * zSession;
         zSession = getAudioZrtpSession(callID);
@@ -329,7 +329,7 @@ CallManager::resetSASVerified(const std::string& callID)
 void
 CallManager::setConfirmGoClear(const std::string& callID)
 {
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
     try {
         sfl::AudioZrtpSession * zSession;
         zSession = getAudioZrtpSession(callID);
@@ -344,7 +344,7 @@ CallManager::setConfirmGoClear(const std::string& callID)
 void
 CallManager::requestGoClear(const std::string& callID)
 {
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
     try {
         sfl::AudioZrtpSession * zSession;
         zSession = getAudioZrtpSession(callID);
@@ -359,7 +359,7 @@ CallManager::requestGoClear(const std::string& callID)
 void
 CallManager::acceptEnrollment(const std::string& callID, bool accepted)
 {
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
     try {
         sfl::AudioZrtpSession * zSession;
         zSession = getAudioZrtpSession(callID);
diff --git a/daemon/src/client/callmanager.h b/daemon/src/client/callmanager.h
index 2d65a712695755dd016de1b695e653e64ceb2454..81b345098698b015eb655fbf9ff958f94e4b6818 100644
--- a/daemon/src/client/callmanager.h
+++ b/daemon/src/client/callmanager.h
@@ -156,7 +156,7 @@ class CallManager
 
     private:
 
-#if HAVE_ZRTP
+#if USE_CCRTP && HAVE_ZRTP
         sfl::AudioZrtpSession * getAudioZrtpSession(const std::string& callID);
 #endif
 
diff --git a/daemon/src/ringdht/ringaccount.cpp b/daemon/src/ringdht/ringaccount.cpp
index 94e768119a04945abaeb362fac279ef0997f348d..940219afbfe98e507e0fbe211c8235b7f1163f83 100644
--- a/daemon/src/ringdht/ringaccount.cpp
+++ b/daemon/src/ringdht/ringaccount.cpp
@@ -180,6 +180,7 @@ RingAccount::createOutgoingCall(const std::shared_ptr<SIPCall>& call, const std:
     std::vector<sfl::AudioCodec *> audioCodecs;
     audioCodecs.push_back(ac);
 
+#if USE_CCRTP
     try {
         call->getAudioRtp().initConfig();
         call->getAudioRtp().initSession();
@@ -192,6 +193,7 @@ RingAccount::createOutgoingCall(const std::shared_ptr<SIPCall>& call, const std:
     } catch (...) {
         throw VoipLinkException("Could not start rtp session for early media");
     }
+#endif
 
     // Building the local SDP offer
     auto& sdp = call->getSDP();
diff --git a/daemon/src/sip/sdp.cpp b/daemon/src/sip/sdp.cpp
index a9d4c84886b8a5ee2955e2136910856178adf18d..509bff3dfe197d7427635f3bd634e527c8e6ad18 100644
--- a/daemon/src/sip/sdp.cpp
+++ b/daemon/src/sip/sdp.cpp
@@ -289,7 +289,8 @@ Sdp::setMediaDescriptorLines(bool audio)
             enc_name = codec->getMimeSubtype();
             clock_rate = codec->getSDPClockRate();
             channels = codec->getSDPChannels();
-            // G722 require G722/8000 media description even if it is 16000 codec
+            // G722 requires G722/8000 media description even though it's @ 16000 Hz
+            // See http://tools.ietf.org/html/rfc3551#section-4.5.2
             if (codec->getPayloadType () == 9)
                 clock_rate = 8000;
         } else {
@@ -656,6 +657,47 @@ string Sdp::getIncomingVideoDescription() const
     return sessionStr;
 }
 
+// FIXME:
+// Here we filter out parts of the SDP that libavformat doesn't need to
+// know about...we should probably give the audio decoder thread the original
+// SDP and deal with the streams properly at that level
+std::string Sdp::getIncomingAudioDescription() const
+{
+    pjmedia_sdp_session *audioSession = pjmedia_sdp_session_clone(memPool_, activeLocalSession_);
+    if (!audioSession) {
+        SFL_ERR("Could not clone SDP");
+        return "";
+    }
+
+    // deactivate non-audio media
+    bool hasAudio = false;
+    for (unsigned i = 0; i < audioSession->media_count; i++)
+        if (pj_stricmp2(&audioSession->media[i]->desc.media, "audio")) {
+            if (pjmedia_sdp_media_deactivate(memPool_, audioSession->media[i]) != PJ_SUCCESS)
+                SFL_ERR("Could not deactivate media");
+        } else {
+            hasAudio = true;
+        }
+
+    if (not hasAudio) {
+        SFL_DBG("No audio present in active local SDP");
+        return "";
+    }
+
+    char buffer[4096];
+    const size_t size = pjmedia_sdp_print(audioSession, buffer, sizeof(buffer));
+    std::string sessionStr(buffer, std::min(size, sizeof(buffer)));
+
+    // FIXME: find a way to get rid of the "m=video..." line with PJSIP
+
+    const size_t videoPos = sessionStr.find("m=video");
+    const size_t newline2 = sessionStr.find('\n', videoPos);
+    const size_t newline1 = sessionStr.rfind('\n', videoPos);
+
+    sessionStr.erase(newline1, newline2 - newline1);
+    return sessionStr;
+}
+
 std::string Sdp::getOutgoingVideoCodec() const
 {
     string str("a=rtpmap:");
@@ -669,6 +711,64 @@ std::string Sdp::getOutgoingVideoCodec() const
     return string(codec_buf);
 }
 
+// FIXME: merge these into a single parsing function, lot of repetition here
+std::string Sdp::getOutgoingAudioCodec() const
+{
+    string str("a=rtpmap:");
+    std::stringstream os;
+    os << getOutgoingAudioPayload();
+    str += os.str();
+    string aCodecLine(getLineFromSession(activeRemoteSession_, str));
+    char codec_buf[32];
+    codec_buf[0] = '\0';
+    sscanf(aCodecLine.c_str(), "a=rtpmap:%*d %31[^/]", codec_buf);
+    return string(codec_buf);
+}
+
+std::string Sdp::getOutgoingAudioRate() const
+{
+    // e.g. opus/48000/2, g722/16000
+    string str("a=rtpmap:");
+    std::stringstream os;
+    os << getOutgoingAudioPayload();
+    str += os.str();
+    string aCodecLine(getLineFromSession(activeRemoteSession_, str));
+    const auto pos = aCodecLine.find_first_of("/");
+    if (pos < aCodecLine.size() - 1) {
+        const auto tmp = aCodecLine.substr(pos + 1);
+        // strip channel if present
+        const auto end = tmp.find_first_of("/");
+        if (end != string::npos)
+            return tmp.substr(0, end);
+        else
+            return tmp;
+    } else {
+        const char *DEFAULT_RATE = "8000";
+        SFL_ERR("No rate found in SDP, defaulting to %s", DEFAULT_RATE);
+        return DEFAULT_RATE;
+    }
+}
+
+std::string Sdp::getOutgoingAudioChannels() const
+{
+    // e.g. opus/48000/2, g722/16000
+    string str("a=rtpmap:");
+    std::stringstream os;
+    os << getOutgoingAudioPayload();
+    str += os.str();
+    string aCodecLine(getLineFromSession(activeRemoteSession_, str));
+
+    const auto nb_slashes = std::count(aCodecLine.begin(), aCodecLine.end(), '/');
+    const auto pos = aCodecLine.find_last_of("/");
+    if (nb_slashes > 1 and pos < aCodecLine.size() - 1) {
+        return aCodecLine.substr(pos + 1);
+    } else {
+        const char *DEFAULT_CHANNELS = "1";
+        SFL_ERR("No channels found in SDP, defaulting to %s", DEFAULT_CHANNELS);
+        return DEFAULT_CHANNELS;
+    }
+}
+
 static vector<map<string, string> >::const_iterator
 findCodecInList(const vector<map<string, string> > &codecs, const string &codec)
 {
@@ -702,6 +802,16 @@ Sdp::getOutgoingVideoPayload() const
     return payload_num;
 }
 
+int
+Sdp::getOutgoingAudioPayload() const
+{
+    string audioLine(getLineFromSession(activeRemoteSession_, "m=audio"));
+    int payload_num;
+    if (sscanf(audioLine.c_str(), "m=audio %*d %*s %d", &payload_num) != 1)
+        payload_num = 0;
+    return payload_num;
+}
+
 void
 Sdp::getProfileLevelID(const pjmedia_sdp_session *session,
                        std::string &profile, int payload) const
@@ -870,3 +980,47 @@ bool Sdp::getOutgoingVideoSettings(map<string, string> &args) const
 #endif
     return false;
 }
+
+#ifndef USE_CCRTP
+bool Sdp::getOutgoingAudioSettings(map<string, string> &args) const
+{
+    string codec(getOutgoingAudioCodec());
+    if (not codec.empty()) {
+        const string encoder(libav_utils::encodersMap()[codec]);
+        if (encoder.empty()) {
+            SFL_DBG("Couldn't find encoder for \"%s\"\n", codec.c_str());
+            return false;
+        } else {
+            args["codec"] = encoder;
+            const int payload = getOutgoingAudioPayload();
+            std::ostringstream os;
+            os << payload;
+            args["payload_type"] = os.str();
+        }
+
+        const string rate(getOutgoingAudioRate());
+        if (rate.empty()) {
+            SFL_DBG("Couldn't find rate for \"%s\"\n", codec.c_str());
+            return false;
+        } else {
+            // G722 requires G722/8000 media description even though it's @ 16000 Hz
+            // See http://tools.ietf.org/html/rfc3551#section-4.5.2
+            if (codec == "G722")
+                args["sample_rate"] = "16000";
+            else
+                args["sample_rate"] = rate;
+        }
+
+        const string channels(getOutgoingAudioChannels());
+        if (channels.empty()) {
+            SFL_DBG("Couldn't find channels for \"%s\"\n", codec.c_str());
+            return false;
+        } else {
+            args["channels"] = channels;
+        }
+
+        return true;
+    }
+    return false;
+}
+#endif
diff --git a/daemon/src/sip/sdp.h b/daemon/src/sip/sdp.h
index 1d28d43b814556e2a61ba267b09c4ea8f1a2b7af..eae0a82031f4d919cc374216d6dbd28f81711ee3 100644
--- a/daemon/src/sip/sdp.h
+++ b/daemon/src/sip/sdp.h
@@ -118,6 +118,12 @@ class Sdp {
          */
         std::string getIncomingVideoDescription() const;
 
+        /**
+         * Returns a string version of the negotiated SDP fields which pertain
+         * to audio.
+         */
+        std::string getIncomingAudioDescription() const;
+
         /*
          * On building an invite outside a dialog, build the local offer and create the
          * SDP negotiator instance with it.
@@ -217,6 +223,10 @@ class Sdp {
             return localVideoDataPort_;
         }
 
+        unsigned int getLocalAudioPort() const {
+            return localAudioDataPort_;
+        }
+
         void addAttributeToLocalAudioMedia(const char *attr);
         void removeAttributeFromLocalAudioMedia(const char *attr);
         void addAttributeToLocalVideoMedia(const char *attr);
@@ -258,6 +268,7 @@ class Sdp {
         // Sets @param settings with appropriate values and returns true if
         // we are sending video, false otherwise
         bool getOutgoingVideoSettings(std::map<std::string, std::string> &settings) const;
+        bool getOutgoingAudioSettings(std::map<std::string, std::string> &settings) const;
 
     private:
         NON_COPYABLE(Sdp);
@@ -265,8 +276,12 @@ class Sdp {
 
         std::string getLineFromSession(const pjmedia_sdp_session *sess, const std::string &keyword) const;
         std::string getOutgoingVideoCodec() const;
+        std::string getOutgoingAudioCodec() const;
+        std::string getOutgoingAudioRate() const;
+        std::string getOutgoingAudioChannels() const;
         std::string getOutgoingVideoField(const std::string &codec, const char *key) const;
         int getOutgoingVideoPayload() const;
+        int getOutgoingAudioPayload() const;
         void getProfileLevelID(const pjmedia_sdp_session *session, std::string &dest, int payload) const;
         void updateRemoteIP(unsigned index);
 
diff --git a/daemon/src/sip/sipaccount.cpp b/daemon/src/sip/sipaccount.cpp
index e032de192350c1f3d5eb17cead9a83cebf2b0599..d5e1a86e87301aeb7c50994810ebcfe506915b5a 100644
--- a/daemon/src/sip/sipaccount.cpp
+++ b/daemon/src/sip/sipaccount.cpp
@@ -235,6 +235,7 @@ SIPAccount::newOutgoingCall(const std::string& id, const std::string& toUrl)
     std::vector<sfl::AudioCodec *> audioCodecs;
     audioCodecs.push_back(ac);
 
+#if USE_CCRTP
     try {
         call->getAudioRtp().initConfig();
         call->getAudioRtp().initSession();
@@ -247,6 +248,7 @@ SIPAccount::newOutgoingCall(const std::string& id, const std::string& toUrl)
     } catch (...) {
         throw VoipLinkException("Could not start rtp session for early media");
     }
+#endif
 
     // Building the local SDP offer
     auto& sdp = call->getSDP();
diff --git a/daemon/src/sip/sipcall.cpp b/daemon/src/sip/sipcall.cpp
index 977107ac26b8b63cacf6c593dc716105696ea664..2232cfec83cdecb826c12f27a9c0b1aa2ca7e934 100644
--- a/daemon/src/sip/sipcall.cpp
+++ b/daemon/src/sip/sipcall.cpp
@@ -43,7 +43,11 @@
 #include "manager.h"
 #include "array_size.h"
 
+#if USE_CCRTP
 #include "audio/audiortp/audio_rtp_factory.h" // for AudioRtpFactoryException
+#else
+#include "audio/audiortp/avformat_rtp_session.h"
+#endif
 
 #if HAVE_INSTANT_MESSAGING
 #include "im/instant_messaging.h"
@@ -73,7 +77,9 @@ static void
 dtmfSend(SIPCall &call, char code, const std::string &dtmf)
 {
     if (dtmf == SIPAccount::OVERRTP_STR) {
+#if USE_CCRTP
         call.getAudioRtp().sendDtmfDigit(code);
+#endif
         return;
     } else if (dtmf != SIPAccount::SIPINFO_STR) {
         SFL_WARN("Unknown DTMF type %s, defaulting to %s instead",
@@ -100,7 +106,11 @@ dtmfSend(SIPCall &call, char code, const std::string &dtmf)
 
 SIPCall::SIPCall(SIPAccountBase& account, const std::string& id, Call::CallType type)
     : Call(account, id, type)
+#if USE_CCRTP
     , audiortp_(this)
+#else
+    , avformatrtp_(new sfl::AVFormatRtpSession(id, /* FIXME: These are video! */ getSettings()))
+#endif
 #ifdef SFL_VIDEO
     // The ID is used to associate video streams to calls
     , videortp_(id, getSettings())
@@ -130,7 +140,11 @@ void
 SIPCall::stopRtpIfCurrent()
 {
     if (Manager::instance().isCurrentCall(*this)) {
+#if USE_CCRTP
         getAudioRtp().stop();
+#else
+        avformatrtp_->stop();
+#endif
 #ifdef SFL_VIDEO
         getVideoRtp().stop();
 #endif
@@ -242,6 +256,7 @@ SIPCall::sendSIPInfo(const char *const body, const char *const subtype)
 void
 SIPCall::updateSDPFromSTUN()
 {
+#if USE_CCRTP
     auto& account = getSIPAccount();
     std::vector<long> socketDescriptors(getAudioRtp().getSocketDescriptors());
 
@@ -258,6 +273,7 @@ SIPCall::updateSDPFromSTUN()
     } catch (const std::runtime_error &e) {
         SFL_ERR("%s", e.what());
     }
+#endif
 }
 
 void SIPCall::answer()
@@ -359,7 +375,11 @@ SIPCall::refuse()
     if (!isIncoming() or getConnectionState() == Call::CONNECTED or !inv)
         return;
 
+#if USE_CCRTP
     getAudioRtp().stop();
+#else
+    avformatrtp_->stop();
+#endif
 
     pjsip_tx_data *tdata;
 
@@ -553,8 +573,12 @@ SIPCall::onhold()
     if (not setState(Call::HOLD))
         return;
 
+#if USE_CCRTP
     audiortp_.saveLocalContext();
     audiortp_.stop();
+#else
+    avformatrtp_->stop();
+#endif
 #ifdef SFL_VIDEO
     videortp_.stop();
 #endif
@@ -576,6 +600,7 @@ SIPCall::onhold()
 void
 SIPCall::offhold()
 {
+#if USE_CCRTP
     auto& account = getSIPAccount();
 
     try {
@@ -594,6 +619,7 @@ SIPCall::offhold()
     } catch (const sfl::AudioRtpFactoryException &) {
         throw VoipLinkException("Socket problem in offhold");
     }
+#endif
 }
 
 void
@@ -631,6 +657,7 @@ SIPCall::internalOffHold(const std::function<void()> &SDPUpdateFunc)
         throw std::runtime_error("Could not instantiate any codecs");
     }
 
+#if USE_CCRTP
     audiortp_.initConfig();
     audiortp_.initSession();
 
@@ -640,6 +667,7 @@ SIPCall::internalOffHold(const std::function<void()> &SDPUpdateFunc)
     audiortp_.restoreLocalContext();
     audiortp_.initLocalCryptoInfoOnOffHold();
     audiortp_.start(audioCodecs);
+#endif
 
     sdp_->removeAttributeFromLocalAudioMedia("sendrecv");
     sdp_->removeAttributeFromLocalAudioMedia("sendonly");
diff --git a/daemon/src/sip/sipcall.h b/daemon/src/sip/sipcall.h
index eb141666a1b85f6399cac3f072028010e6055afa..c20bdf6f60843edda9a1e3f9e7ff677233bb9d54 100644
--- a/daemon/src/sip/sipcall.h
+++ b/daemon/src/sip/sipcall.h
@@ -39,7 +39,9 @@
 #endif
 
 #include "call.h"
+#if USE_CCRTP
 #include "audio/audiortp/audio_rtp_factory.h"
+#endif
 #ifdef SFL_VIDEO
 #include "video/video_rtp_session.h"
 #endif
@@ -59,6 +61,10 @@ class Sdp;
 class SIPAccountBase;
 class SipTransport;
 
+namespace sfl {
+class AVFormatRtpSession;
+}
+
 /**
  * @file sipcall.h
  * @brief SIPCall are SIP implementation of a normal Call
@@ -90,18 +96,28 @@ class SIPCall : public Call
             return *sdp_;
         }
 
+#if USE_CCRTP
         /**
          * Returns a pointer to the AudioRtp object
          */
-        sfl::AudioRtpFactory & getAudioRtp() {
+        sfl::AudioRtpFactory& getAudioRtp() {
             return audiortp_;
         }
+#else
+
+        /**
+         * Returns a pointer to the AVFormatRtpSession object
+         */
+        sfl::AVFormatRtpSession& getAVFormatRTP() const {
+            return *avformatrtp_;
+        }
+#endif
 
 #ifdef SFL_VIDEO
         /**
          * Returns a pointer to the VideoRtp object
          */
-        sfl_video::VideoRtpSession &getVideoRtp () {
+        sfl_video::VideoRtpSession& getVideoRtp () {
             return videortp_;
         }
 #endif
@@ -196,10 +212,14 @@ class SIPCall : public Call
 
         int SIPSessionReinvite();
 
+#if USE_CCRTP
         /**
          * Audio Rtp Session factory
          */
         sfl::AudioRtpFactory audiortp_;
+#else
+        std::unique_ptr<sfl::AVFormatRtpSession> avformatrtp_;
+#endif
 
 #ifdef SFL_VIDEO
         /**
diff --git a/daemon/src/sip/sipvoiplink.cpp b/daemon/src/sip/sipvoiplink.cpp
index 9ee2577960892614c3e53e61ed81359a2620e685..2be08be206dfbf7af2355350f35fc559d151c48a 100644
--- a/daemon/src/sip/sipvoiplink.cpp
+++ b/daemon/src/sip/sipvoiplink.cpp
@@ -65,6 +65,10 @@
 
 #include "audio/audiolayer.h"
 
+#ifndef USE_CCRTP
+#include "audio/audiortp/avformat_rtp_session.h"
+#endif
+
 #ifdef SFL_VIDEO
 #include "video/video_rtp_session.h"
 #include "client/videomanager.h"
@@ -323,19 +327,24 @@ transaction_request_cb(pjsip_rx_data *rdata)
     call->initRecFilename(peerNumber);
     call->setCallMediaLocal(addrToUse);
     call->getSDP().setPublishedIP(addrSdp);
+#if USE_CCRTP
     call->getAudioRtp().initConfig();
+#endif
     call->setTransport(transport);
 
+#if USE_CCRTP
     try {
         call->getAudioRtp().initSession();
     } catch (const ost::Socket::Error &err) {
         SFL_ERR("AudioRtp socket error");
         return PJ_FALSE;
     }
+#endif
 
     if (account->isStunEnabled())
         call->updateSDPFromSTUN();
 
+#if USE_CCRTP
     if (body and body->len > 0 and call->getAudioRtp().isSdesEnabled()) {
         std::string sdpOffer(static_cast<const char*>(body->data), body->len);
         size_t start = sdpOffer.find("a=crypto:");
@@ -367,6 +376,7 @@ transaction_request_cb(pjsip_rx_data *rdata)
 #endif
         }
     }
+#endif
 
     call->getSDP().receiveOffer(r_sdp, account->getActiveAudioCodecs(), account->getActiveVideoCodecs());
 
@@ -379,7 +389,9 @@ transaction_request_cb(pjsip_rx_data *rdata)
 
     std::vector<sfl::AudioCodec *> audioCodecs;
     audioCodecs.push_back(ac);
+#if USE_CCRTP
     call->getAudioRtp().start(audioCodecs);
+#endif
 
     pjsip_dialog *dialog = 0;
 
@@ -1020,14 +1032,22 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status)
     // Update connection information
     sdp.setMediaTransportInfoFromRemoteSdp();
 
+#if USE_CCRTP
     auto& audioRTP = call->getAudioRtp();
     try {
         audioRTP.updateDestinationIpAddress();
     } catch (const AudioRtpFactoryException &e) {
         SFL_ERR("%s", e.what());
     }
-
     audioRTP.setDtmfPayloadType(sdp.getTelephoneEventType());
+#else
+    call->getAVFormatRTP().updateSDP(sdp);
+    call->getAVFormatRTP().updateDestination(sdp.getRemoteIP(), sdp.getRemoteAudioPort());
+    auto localAudioPort = sdp.getLocalAudioPort();
+    if (!localAudioPort)
+        localAudioPort = sdp.getRemoteAudioPort();
+    call->getAVFormatRTP().start(localAudioPort);
+#endif
 
 #ifdef SFL_VIDEO
     auto& videoRTP = call->getVideoRtp();
@@ -1039,9 +1059,9 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status)
 
     // Get the crypto attribute containing srtp's cryptographic context (keys, cipher)
     CryptoOffer crypto_offer;
-    call->getSDP().getRemoteSdpCryptoFromOffer(remoteSDP, crypto_offer);
+    sdp.getRemoteSdpCryptoFromOffer(remoteSDP, crypto_offer);
 
-#if HAVE_SDES
+#if USE_CCRTP && HAVE_SDES
     bool nego_success = false;
 
     if (!crypto_offer.empty()) {
@@ -1113,8 +1133,10 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status)
             }
         }
 
+#if USE_CCRTP
         if (not audioCodecs.empty())
             call->getAudioRtp().updateSessionMedia(audioCodecs);
+#endif
     } catch (const SdpException &e) {
         SFL_ERR("%s", e.what());
     } catch (const std::exception &rtpException) {
diff --git a/daemon/src/video/libav_deps.h b/daemon/src/video/libav_deps.h
index 2f9e9af8ab81673c26993c55f99f11f2a2a506e7..21402b956ffde1be08dbdde35b24d9d771e9c202 100644
--- a/daemon/src/video/libav_deps.h
+++ b/daemon/src/video/libav_deps.h
@@ -66,6 +66,7 @@ extern "C" {
 #endif
 #include <libavutil/pixdesc.h>
 #include <libavutil/opt.h>
+#include <libavutil/channel_layout.h>
 #include <libavutil/mathematics.h> // for av_rescale_q (old libav support)
 #include <libavutil/imgutils.h>
 #include <libavutil/intreadwrite.h>
@@ -96,4 +97,17 @@ static inline const AVPixFmtDescriptor *av_pix_fmt_desc_get(enum AVPixelFormat p
 #define avcodec_free_frame(x) av_freep(x)
 #endif
 
+// Especially for Fedora < 20 and UBUNTU < 14.10
+#define USE_OLD_AVU ! LIBAVUTIL_VERSION_CHECK(52, 8, 0, 19, 100)
+
+#if USE_OLD_AVU
+#define av_frame_alloc avcodec_alloc_frame
+#define av_frame_free avcodec_free_frame
+#define av_frame_unref avcodec_get_frame_defaults
+#define av_frame_get_buffer(x, y) avpicture_alloc((AVPicture *)(x), \
+                                                  (AVPixelFormat)(x)->format, \
+                                                  (x)->width, (x)->height)
+#endif
+
+
 #endif // __LIBAV_DEPS_H__
diff --git a/daemon/src/video/libav_utils.cpp b/daemon/src/video/libav_utils.cpp
index bb1fb5603d7e806fa665f0b669a30771da510a03..8f82c4f28828de40755e0c87c8dacf7a30db0e68 100644
--- a/daemon/src/video/libav_utils.cpp
+++ b/daemon/src/video/libav_utils.cpp
@@ -125,6 +125,12 @@ static void init_once()
     encoders_["VP8"]         = "libvpx";
     encoders_["MP4V-ES"]     = "mpeg4";
 
+    encoders_["PCMA"]        = "pcm_alaw";
+    encoders_["PCMU"]        = "pcm_mulaw";
+    encoders_["opus"]        = "libopus";
+    encoders_["G722"]        = "g722";
+    encoders_["speex"]       = "libspeex";
+
     //FFmpeg needs to be modified to allow us to send configuration
     //inline, with CODEC_FLAG_GLOBAL_HEADER
     //encoders["THEORA"]        = "libtheora";
diff --git a/daemon/src/video/video_base.cpp b/daemon/src/video/video_base.cpp
index e75cc2356aa7a4d38c9bf3b098f12b2cd924a229..e58d457e844bb16fdbecd90850802d6281374f39 100644
--- a/daemon/src/video/video_base.cpp
+++ b/daemon/src/video/video_base.cpp
@@ -34,18 +34,6 @@
 #include "video_base.h"
 #include "logger.h"
 
-// Especially for Fedora < 20 and UBUNTU < 14.10
-#define USE_OLD_AVU ! LIBAVUTIL_VERSION_CHECK(52, 8, 0, 19, 100)
-
-#if USE_OLD_AVU
-#define av_frame_alloc avcodec_alloc_frame
-#define av_frame_free avcodec_free_frame
-#define av_frame_unref avcodec_get_frame_defaults
-#define av_frame_get_buffer(x, y) avpicture_alloc((AVPicture *)(x), \
-                                                  (AVPixelFormat)(x)->format, \
-                                                  (x)->width, (x)->height)
-#endif
-
 namespace sfl_video {
 
 /*=== VideoPacket  ===========================================================*/
diff --git a/daemon/src/video/video_decoder.cpp b/daemon/src/video/video_decoder.cpp
index faa29befbc30071fcc9c6fa61baa844796b33dd7..b541495630a1ff78b1ef04bd349b71a5bd6ff8af 100644
--- a/daemon/src/video/video_decoder.cpp
+++ b/daemon/src/video/video_decoder.cpp
@@ -32,6 +32,9 @@
 // libav_deps.h must be included first
 #include "libav_deps.h"
 #include "video_decoder.h"
+#include "audio/audiobuffer.h"
+#include "audio/ringbuffer.h"
+#include "audio/resampler.h"
 #include "logger.h"
 
 #include <iostream>
@@ -116,6 +119,82 @@ void VideoDecoder::setInterruptCallback(int (*cb)(void*), void *opaque)
 void VideoDecoder::setIOContext(VideoIOHandle *ioctx)
 { inputCtx_->pb = ioctx->getContext(); }
 
+int VideoDecoder::setupFromAudioData()
+{
+    int ret;
+
+    if (decoderCtx_)
+        avcodec_close(decoderCtx_);
+
+    // Increase analyze time to solve synchronization issues between callers.
+    static const unsigned MAX_ANALYZE_DURATION = 30; // time in seconds
+
+    inputCtx_->max_analyze_duration = MAX_ANALYZE_DURATION * AV_TIME_BASE;
+
+    SFL_DBG("Finding stream info");
+#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 8, 0)
+    ret = av_find_stream_info(inputCtx_);
+#else
+    ret = avformat_find_stream_info(inputCtx_, NULL);
+#endif
+
+    if (ret < 0) {
+        // workaround for this bug:
+        // http://patches.libav.org/patch/22541/
+        if (ret == -1)
+            ret = AVERROR_INVALIDDATA;
+        char errBuf[64] = {0};
+        // print nothing for unknown errors
+        if (av_strerror(ret, errBuf, sizeof errBuf) < 0)
+            errBuf[0] = '\0';
+
+        // always fail here
+        SFL_ERR("Could not find stream info: %s", errBuf);
+        return -1;
+    }
+
+    // find the first audio stream from the input
+    for (size_t i = 0; streamIndex_ == -1 && i < inputCtx_->nb_streams; ++i)
+        if (inputCtx_->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
+            streamIndex_ = i;
+
+    if (streamIndex_ == -1) {
+        SFL_ERR("Could not find audio stream");
+        return -1;
+    }
+
+    // Get a pointer to the codec context for the video stream
+    decoderCtx_ = inputCtx_->streams[streamIndex_]->codec;
+    if (decoderCtx_ == 0) {
+        SFL_ERR("Decoder context is NULL");
+        return -1;
+    }
+
+    // find the decoder for the video stream
+    inputDecoder_ = avcodec_find_decoder(decoderCtx_->codec_id);
+    if (!inputDecoder_) {
+        SFL_ERR("Unsupported codec");
+        return -1;
+    }
+
+    decoderCtx_->thread_count = 1;
+    if (emulateRate_) {
+        SFL_DBG("Using framerate emulation");
+        startTime_ = av_gettime();
+    }
+
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+    decoderCtx_->refcounted_frames = 1;
+#endif
+    ret = avcodec_open2(decoderCtx_, inputDecoder_, NULL);
+    if (ret) {
+        SFL_ERR("Could not open codec");
+        return -1;
+    }
+
+    return 0;
+}
+
 int VideoDecoder::setupFromVideoData()
 {
     int ret;
@@ -248,6 +327,63 @@ VideoDecoder::decode(VideoFrame& result, VideoPacket& video_packet)
     return Status::Success;
 }
 
+VideoDecoder::Status
+VideoDecoder::decode_audio(AVFrame *decoded_frame)
+{
+    AVPacket inpacket;
+    memset(&inpacket, 0, sizeof(inpacket));
+    av_init_packet(&inpacket);
+    inpacket.data = NULL;
+    inpacket.size = 0;
+
+   int ret = av_read_frame(inputCtx_, &inpacket);
+    if (ret == AVERROR(EAGAIN)) {
+        return Status::Success;
+    } else if (ret == AVERROR_EOF) {
+        return Status::EOFError;
+    } else if (ret < 0) {
+        char errbuf[64];
+        av_strerror(ret, errbuf, sizeof(errbuf));
+        SFL_ERR("Couldn't read frame: %s\n", errbuf);
+        return Status::ReadError;
+    }
+
+    // is this a packet from the audio stream?
+    if (inpacket.stream_index != streamIndex_)
+        return Status::Success;
+
+    int frameFinished = 0;
+    int len = avcodec_decode_audio4(decoderCtx_, decoded_frame,
+                                    &frameFinished, &inpacket);
+    if (len <= 0) {
+        return Status::DecodeError;
+    }
+
+    if (frameFinished) {
+        if (emulateRate_) {
+            if (decoded_frame->pkt_dts != AV_NOPTS_VALUE) {
+                const auto now = std::chrono::system_clock::now();
+                const std::chrono::duration<double> seconds = now - lastFrameClock_;
+                const double dTB = av_q2d(inputCtx_->streams[streamIndex_]->time_base);
+                const double dts_diff = dTB * (decoded_frame->pkt_dts - lastDts_);
+                const double usDelay = 1e6 * (dts_diff - seconds.count());
+                if (usDelay > 0.0) {
+#if LIBAVUTIL_VERSION_CHECK(51, 34, 0, 61, 100)
+                    av_usleep(usDelay);
+#else
+                    usleep(usDelay);
+#endif
+                }
+                lastFrameClock_ = now;
+                lastDts_ = decoded_frame->pkt_dts;
+            }
+        }
+        return Status::FrameFinished;
+    }
+
+    return Status::Success;
+}
+
 VideoDecoder::Status
 VideoDecoder::flush(VideoFrame& result)
 {
@@ -278,4 +414,32 @@ int VideoDecoder::getHeight() const
 int VideoDecoder::getPixelFormat() const
 { return libav_utils::sfl_pixel_format(decoderCtx_->pix_fmt); }
 
+void VideoDecoder::writeToRingBuffer(AVFrame* decoded_frame,
+                                     sfl::RingBuffer& rb,
+                                     const sfl::AudioFormat outFormat)
+{
+    const sfl::AudioFormat decoderFormat = {
+        (unsigned) decoded_frame->sample_rate,
+        (unsigned) decoderCtx_->channels
+    };
+
+    sfl::AudioBuffer out(decoded_frame->nb_samples, decoderFormat);
+
+    out.deinterleave(reinterpret_cast<const SFLAudioSample*>(decoded_frame->data[0]),
+                     decoded_frame->nb_samples, decoderCtx_->channels);
+    if ((unsigned)decoded_frame->sample_rate != outFormat.sample_rate) {
+        if (!resampler_) {
+            SFL_DBG("Creating audio resampler");
+            resampler_.reset(new sfl::Resampler(outFormat));
+        }
+        sfl::AudioBuffer resampledData(decoded_frame->nb_samples,
+                                       {(unsigned) outFormat.sample_rate,
+                                        (unsigned) decoderCtx_->channels});
+        resampler_->resample(out, resampledData);
+        rb.put(resampledData);
+    } else {
+        rb.put(out);
+    }
+}
+
 }
diff --git a/daemon/src/video/video_decoder.h b/daemon/src/video/video_decoder.h
index 69e092733a7978932e827960ebe0f771d1999d49..6b2ad115781415a79375f2b772ab0741650bc593 100644
--- a/daemon/src/video/video_decoder.h
+++ b/daemon/src/video/video_decoder.h
@@ -38,6 +38,14 @@
 
 #include <map>
 #include <string>
+#include <memory>
+
+namespace sfl {
+    class AudioBuffer;
+    class AudioFormat;
+    class RingBuffer;
+    class Resampler;
+}
 
 class AVCodecContext;
 class AVStream;
@@ -65,7 +73,11 @@ namespace sfl_video {
         int openInput(const std::string &source_str,
                       const std::string &format_str);
         int setupFromVideoData();
+        int setupFromAudioData();
         Status decode(VideoFrame&, VideoPacket&);
+        Status decode_audio(AVFrame* frame);
+        void writeToRingBuffer(AVFrame* frame, sfl::RingBuffer& rb,
+                               const sfl::AudioFormat outFormat);
         Status flush(VideoFrame&);
 
         int getWidth() const;
@@ -80,6 +92,7 @@ namespace sfl_video {
         AVCodec *inputDecoder_ = nullptr;
         AVCodecContext *decoderCtx_ = nullptr;
         AVFormatContext *inputCtx_ = nullptr;
+        std::unique_ptr<sfl::Resampler> resampler_;
         int streamIndex_ = -1;
         bool emulateRate_ = false;
         int64_t startTime_;
diff --git a/daemon/src/video/video_encoder.cpp b/daemon/src/video/video_encoder.cpp
index 724c57ac476e3959a1aa9c927890047ddc47dbf7..08f79898b01aef29e2c6804068221f8e7df767e3 100644
--- a/daemon/src/video/video_encoder.cpp
+++ b/daemon/src/video/video_encoder.cpp
@@ -31,10 +31,12 @@
 
 #include "libav_deps.h"
 #include "video_encoder.h"
+#include "audio/audiobuffer.h"
 #include "logger.h"
 
 #include <iostream>
 #include <sstream>
+#include <algorithm>
 
 
 namespace sfl_video {
@@ -78,17 +80,28 @@ void VideoEncoder::setOptions(const std::map<std::string, std::string>& options)
     const char *value;
 
     value = extract(options, "width");
-    if (!value)
-        throw VideoEncoderException("width option not set");
-    av_dict_set(&options_, "width", value, 0);
+    if (value)
+        av_dict_set(&options_, "width", value, 0);
 
     value = extract(options, "height");
-    if (!value)
-        throw VideoEncoderException("height option not set");
-    av_dict_set(&options_, "height", value, 0);
+    if (value)
+        av_dict_set(&options_, "height", value, 0);
 
     value = extract(options, "bitrate") ? : "";
-    av_dict_set(&options_, "bitrate", value, 0);
+    if (value)
+        av_dict_set(&options_, "bitrate", value, 0);
+
+    value = extract(options, "sample_rate") ? : "";
+    if (value)
+        av_dict_set(&options_, "sample_rate", value, 0);
+
+    value = extract(options, "channels") ? : "";
+    if (value)
+        av_dict_set(&options_, "channels", value, 0);
+
+    value = extract(options, "frame_size") ? : "";
+    if (value)
+        av_dict_set(&options_, "frame_size", value, 0);
 
     value = extract(options, "framerate");
     if (value)
@@ -105,7 +118,7 @@ void VideoEncoder::setOptions(const std::map<std::string, std::string>& options)
 
 void
 VideoEncoder::openOutput(const char *enc_name, const char *short_name,
-                             const char *filename, const char *mime_type)
+                         const char *filename, const char *mime_type, bool is_video)
 {
     AVOutputFormat *oformat = av_guess_format(short_name, filename, mime_type);
 
@@ -126,7 +139,7 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
         throw VideoEncoderException("No output encoder");
     }
 
-    prepareEncoderContext();
+    prepareEncoderContext(is_video);
 
     /* let x264 preset override our encoder settings */
     if (!strcmp(enc_name, "libx264")) {
@@ -161,26 +174,28 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
 
     stream_->codec = encoderCtx_;
 
-    // allocate buffers for both scaled (pre-encoder) and encoded frames
-    const int width = encoderCtx_->width;
-    const int height = encoderCtx_->height;
-    const int format = libav_utils::sfl_pixel_format((int)encoderCtx_->pix_fmt);
-    scaledFrameBufferSize_ = scaledFrame_.getSize(width, height, format);
-    if (scaledFrameBufferSize_ <= FF_MIN_BUFFER_SIZE)
-        throw VideoEncoderException("buffer too small");
+    if (is_video) {
+        // allocate buffers for both scaled (pre-encoder) and encoded frames
+        const int width = encoderCtx_->width;
+        const int height = encoderCtx_->height;
+        const int format = libav_utils::sfl_pixel_format((int)encoderCtx_->pix_fmt);
+        scaledFrameBufferSize_ = scaledFrame_.getSize(width, height, format);
+        if (scaledFrameBufferSize_ <= FF_MIN_BUFFER_SIZE)
+            throw VideoEncoderException("buffer too small");
 
 #if (LIBAVCODEC_VERSION_MAJOR < 54)
-    encoderBufferSize_ = scaledFrameBufferSize_; // seems to be ok
-    encoderBuffer_ = (uint8_t*) av_malloc(encoderBufferSize_);
-    if (!encoderBuffer_)
-        throw VideoEncoderException("Could not allocate encoder buffer");
+        encoderBufferSize_ = scaledFrameBufferSize_; // seems to be ok
+        encoderBuffer_ = (uint8_t*) av_malloc(encoderBufferSize_);
+        if (!encoderBuffer_)
+            throw VideoEncoderException("Could not allocate encoder buffer");
 #endif
 
-    scaledFrameBuffer_ = (uint8_t*) av_malloc(scaledFrameBufferSize_);
-    if (!scaledFrameBuffer_)
-        throw VideoEncoderException("Could not allocate scaled frame buffer");
+        scaledFrameBuffer_ = (uint8_t*) av_malloc(scaledFrameBufferSize_);
+        if (!scaledFrameBuffer_)
+            throw VideoEncoderException("Could not allocate scaled frame buffer");
 
-    scaledFrame_.setDestination(scaledFrameBuffer_, width, height, format);
+        scaledFrame_.setDestination(scaledFrameBuffer_, width, height, format);
+    }
 }
 
 void VideoEncoder::setInterruptCallback(int (*cb)(void*), void *opaque)
@@ -306,6 +321,96 @@ int VideoEncoder::encode(VideoFrame &input, bool is_keyframe, int64_t frame_numb
     return ret;
 }
 
+int VideoEncoder::encode_audio(const sfl::AudioBuffer &buffer)
+{
+    const int needed_bytes = av_samples_get_buffer_size(NULL, buffer.channels(), buffer.frames(), AV_SAMPLE_FMT_S16, 0);
+    if (needed_bytes < 0) {
+        SFL_ERR("Couldn't calculate buffer size");
+        return -1;
+    }
+
+    SFLAudioSample *sample_data = reinterpret_cast<SFLAudioSample*>(av_malloc(needed_bytes));
+    if (!sample_data)
+        return -1;
+
+    SFLAudioSample *offset_ptr = sample_data;
+    int nb_frames = buffer.frames();
+
+    buffer.interleave(sample_data);
+    const auto layout = buffer.channels() == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+    const auto sample_rate = buffer.getSampleRate();
+
+    while (nb_frames > 0) {
+        AVFrame *frame = avcodec_alloc_frame();
+        if (!frame) {
+            av_freep(&sample_data);
+            return -1;
+        }
+
+        if (encoderCtx_->frame_size)
+            frame->nb_samples = std::min<int>(nb_frames, encoderCtx_->frame_size);
+        else
+            frame->nb_samples = nb_frames;
+
+        frame->format = AV_SAMPLE_FMT_S16;
+        frame->channel_layout = layout;
+        frame->sample_rate = sample_rate;
+
+        const auto buffer_size = av_samples_get_buffer_size(NULL, buffer.channels(), frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
+
+        int err = avcodec_fill_audio_frame(frame, buffer.channels(), AV_SAMPLE_FMT_S16,
+                    reinterpret_cast<const uint8_t *>(offset_ptr), buffer_size, 0);
+        if (err < 0) {
+            char errbuf[128];
+            av_strerror(err, errbuf, sizeof(errbuf));
+            SFL_ERR("Couldn't fill audio frame: %s: %d %d", errbuf, frame->nb_samples, buffer_size);
+            av_freep(&sample_data);
+            av_frame_free(&frame);
+            return -1;
+        }
+        nb_frames -= frame->nb_samples;
+        offset_ptr += frame->nb_samples * buffer.channels();
+
+        AVPacket pkt;
+        av_init_packet(&pkt);
+        pkt.data = NULL; // packet data will be allocated by the encoder
+        pkt.size = 0;
+
+        int got_packet;
+        int ret = avcodec_encode_audio2(encoderCtx_, &pkt, frame, &got_packet);
+        if (ret < 0) {
+            print_averror("avcodec_encode_audio2", ret);
+            av_free_packet(&pkt);
+            av_freep(&sample_data);
+            av_frame_free(&frame);
+            return ret;
+        }
+
+        if (pkt.size and got_packet) {
+            if (pkt.pts != AV_NOPTS_VALUE)
+                pkt.pts = av_rescale_q(pkt.pts, encoderCtx_->time_base, stream_->time_base);
+            if (pkt.dts != AV_NOPTS_VALUE)
+                pkt.dts = av_rescale_q(pkt.dts, encoderCtx_->time_base, stream_->time_base);
+
+            pkt.stream_index = stream_->index;
+
+            // write the compressed frame
+            ret = av_write_frame(outputCtx_, &pkt);
+            if (ret < 0)
+                print_averror("av_write_frame", ret);
+        }
+
+        av_free_packet(&pkt);
+        av_frame_free(&frame);
+    }
+
+    //SFL_WARN("%d", *std::max_element(sample_data, sample_data + needed_bytes / 2));
+
+    av_freep(&sample_data);
+
+    return 0;
+}
+
 int VideoEncoder::flush()
 {
     AVPacket pkt;
@@ -370,7 +475,7 @@ void VideoEncoder::print_sdp(std::string &sdp_)
     SFL_DBG("Sending SDP: \n%s", sdp_.c_str());
 }
 
-void VideoEncoder::prepareEncoderContext()
+void VideoEncoder::prepareEncoderContext(bool is_video)
 {
 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 12, 0)
     encoderCtx_ = avcodec_alloc_context();
@@ -385,26 +490,61 @@ void VideoEncoder::prepareEncoderContext()
                                                     NULL, 0)->value);
     SFL_DBG("Using bitrate %d", encoderCtx_->bit_rate);
 
-    // resolution must be a multiple of two
-    char *width = av_dict_get(options_, "width", NULL, 0)->value;
-    dstWidth_ = encoderCtx_->width = width ? atoi(width) : 0;
-    char *height = av_dict_get(options_, "height", NULL, 0)->value;
-    dstHeight_ = encoderCtx_->height = height ? atoi(height) : 0;
-
-    const char *framerate = av_dict_get(options_, "framerate",
-                                        NULL, 0)->value;
-    const int DEFAULT_FPS = 30;
-    const int fps = framerate ? atoi(framerate) : DEFAULT_FPS;
-    encoderCtx_->time_base = (AVRational) {1, fps};
-    // emit one intra frame every gop_size frames
-    encoderCtx_->max_b_frames = 0;
-    encoderCtx_->pix_fmt = PIXEL_FORMAT(YUV420P); // TODO: option me !
-
-    // Fri Jul 22 11:37:59 EDT 2011:tmatth:XXX: DON'T set this, we want our
-    // pps and sps to be sent in-band for RTP
-    // This is to place global headers in extradata instead of every
-    // keyframe.
-    // encoderCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
+    if (is_video) {
+        // resolution must be a multiple of two
+        char *width = av_dict_get(options_, "width", NULL, 0)->value;
+        dstWidth_ = encoderCtx_->width = width ? atoi(width) : 0;
+        char *height = av_dict_get(options_, "height", NULL, 0)->value;
+        dstHeight_ = encoderCtx_->height = height ? atoi(height) : 0;
+
+        const char *framerate = av_dict_get(options_, "framerate",
+                                            NULL, 0)->value;
+        const int DEFAULT_FPS = 30;
+        const int fps = framerate ? atoi(framerate) : DEFAULT_FPS;
+        encoderCtx_->time_base = (AVRational) {1, fps};
+        // emit one intra frame every gop_size frames
+        encoderCtx_->max_b_frames = 0;
+        encoderCtx_->pix_fmt = PIXEL_FORMAT(YUV420P); // TODO: option me !
+
+        // Fri Jul 22 11:37:59 EDT 2011:tmatth:XXX: DON'T set this, we want our
+        // pps and sps to be sent in-band for RTP
+        // This is to place global headers in extradata instead of every
+        // keyframe.
+        // encoderCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
+
+    } else {
+        encoderCtx_->sample_fmt = AV_SAMPLE_FMT_S16;
+        auto v = av_dict_get(options_, "sample_rate", NULL, 0);
+        if (v) {
+            encoderCtx_->sample_rate = atoi(v->value);
+        } else {
+            SFL_WARN("No sample rate set");
+            encoderCtx_->sample_rate = 8000;
+        }
+
+        v = av_dict_get(options_, "channels", NULL, 0);
+        if (v) {
+            auto c = std::atoi(v->value);
+            if (c > 2 or c < 1) {
+                SFL_WARN("Clamping invalid channel value %d", c);
+                c = 1;
+            }
+            encoderCtx_->channels = c;
+        } else {
+            SFL_WARN("Channels not set");
+            encoderCtx_->channels = 1;
+        }
+
+        encoderCtx_->channel_layout = encoderCtx_->channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+
+        v = av_dict_get(options_, "frame_size", NULL, 0);
+        if (v) {
+            encoderCtx_->frame_size = atoi(v->value);
+            SFL_WARN("Frame size %d", encoderCtx_->frame_size);
+        } else {
+            SFL_WARN("Frame size not set");
+        }
+    }
 }
 
 void VideoEncoder::forcePresetX264()
diff --git a/daemon/src/video/video_encoder.h b/daemon/src/video/video_encoder.h
index 95c32d6fc78e34d01985799d6c90b6ec21d102de..92e0d58322a873b521927cfbd93c6101ef585e8f 100644
--- a/daemon/src/video/video_encoder.h
+++ b/daemon/src/video/video_encoder.h
@@ -44,6 +44,10 @@ class AVStream;
 class AVFormatContext;
 class AVCodec;
 
+namespace sfl {
+    class AudioBuffer;
+}
+
 namespace sfl_video {
 
 class VideoEncoderException : public std::runtime_error {
@@ -61,9 +65,10 @@ public:
     void setInterruptCallback(int (*cb)(void*), void *opaque);
     void setIOContext(const std::unique_ptr<VideoIOHandle> &ioctx);
     void openOutput(const char *enc_name, const char *short_name,
-                   const char *filename, const char *mime_type);
+                   const char *filename, const char *mime_type, bool is_video);
     void startIO();
     int encode(VideoFrame &input, bool is_keyframe, int64_t frame_number);
+    int encode_audio(const sfl::AudioBuffer &input);
     int flush();
     void print_sdp(std::string &sdp_);
 
@@ -76,7 +81,7 @@ public:
 private:
     NON_COPYABLE(VideoEncoder);
     void setScaleDest(void *data, int width, int height, int pix_fmt);
-    void prepareEncoderContext();
+    void prepareEncoderContext(bool is_video);
     void forcePresetX264();
     void extractProfileLevelID(const std::string &parameters, AVCodecContext *ctx);
 
diff --git a/daemon/src/video/video_sender.cpp b/daemon/src/video/video_sender.cpp
index ee4387ab74d18f9bdfeb1a09ea9613b5a9f45ac5..c109bd6cc97d856669802e0af40565983aff0338 100644
--- a/daemon/src/video/video_sender.cpp
+++ b/daemon/src/video/video_sender.cpp
@@ -54,7 +54,7 @@ VideoSender::VideoSender(std::map<string, string> args,
 
     /* Encoder setup (may throw VideoEncoderException) */
     videoEncoder_->setOptions(args);
-    videoEncoder_->openOutput(enc_name, "rtp", dest, NULL);
+    videoEncoder_->openOutput(enc_name, "rtp", dest, NULL, true);
     videoEncoder_->setIOContext(muxContext_);
     videoEncoder_->startIO();