diff --git a/daemon/bin/main.cpp b/daemon/bin/main.cpp
index 20831ad78b9f2f72abad64f2e8fcb2db76b5bb16..26f293ac602203e4d5eca206779b786abf8e9d08 100644
--- a/daemon/bin/main.cpp
+++ b/daemon/bin/main.cpp
@@ -173,7 +173,7 @@ int main(int argc, char *argv [])
 
 #ifdef TOP_BUILDDIR
     if (!getenv("CODECS_PATH"))
-        setenv("CODECS_PATH", TOP_BUILDDIR "/src/audio/codecs", 1);
+        setenv("CODECS_PATH", TOP_BUILDDIR "/src/media/audio/codecs", 1);
 #endif
 
     print_title();
diff --git a/daemon/bin/osxmain.cpp b/daemon/bin/osxmain.cpp
index e19d5aea9f4a33ae644c3edd2d25ef3a3b1c0e58..61c001507919797aa254a5885a386caa642cbc2c 100644
--- a/daemon/bin/osxmain.cpp
+++ b/daemon/bin/osxmain.cpp
@@ -210,7 +210,7 @@ int main(int argc, char *argv [])
 
 #ifdef TOP_BUILDDIR
     if (!getenv("CODECS_PATH"))
-        setenv("CODECS_PATH", TOP_BUILDDIR "/src/audio/codecs", 1);
+        setenv("CODECS_PATH", TOP_BUILDDIR "/src/media/audio/codecs", 1);
 #endif
 
     print_title();
diff --git a/daemon/src/media/Makefile.am b/daemon/src/media/Makefile.am
index 3cdc6f3b690f49c8712ee871833465bee4d200b1..b245d41e7d7b2f1a53fe1fd7c1777b1678e77b18 100644
--- a/daemon/src/media/Makefile.am
+++ b/daemon/src/media/Makefile.am
@@ -9,11 +9,19 @@ SUBDIRS += video
 endif
 
 libmedia_la_SOURCES = \
-	libav_utils.cpp
+	libav_utils.cpp \
+    socket_pair.cpp \
+    media_decoder.cpp \
+    media_encoder.cpp \
+    media_io_handle.cpp
 
 noinst_HEADERS = \
 	libav_utils.h \
-	libav_deps.h
+	libav_deps.h \
+	socket_pair.h \
+    media_decoder.h \
+    media_encoder.h \
+    media_io_handle.h
 
 libmedia_la_LIBADD = \
 	./audio/libaudio.la
diff --git a/daemon/src/media/audio/audiortp/avformat_rtp_session.cpp b/daemon/src/media/audio/audiortp/avformat_rtp_session.cpp
index 09b30db2c0cc3f0e04225def00bd41e1d7f3835d..17396842c2f77ffb64af64d944242291e08ac033 100644
--- a/daemon/src/media/audio/audiortp/avformat_rtp_session.cpp
+++ b/daemon/src/media/audio/audiortp/avformat_rtp_session.cpp
@@ -32,11 +32,16 @@
 #include "logger.h"
 #include "noncopyable.h"
 #include "sip/sdp.h"
-#include "video/socket_pair.h"
+
+#ifdef RING_VIDEO
 #include "video/video_base.h"
-#include "video/video_encoder.h"
-#include "video/video_decoder.h"
+#endif //RING_VIDEO
+
+#include "socket_pair.h"
 #include "libav_deps.h"
+#include "media_encoder.h"
+#include "media_decoder.h"
+#include "media_io_handle.h"
 #include "audio/audiobuffer.h"
 #include "audio/ringbufferpool.h"
 #include "audio/resampler.h"
@@ -44,29 +49,32 @@
 #include <sstream>
 
 namespace ring {
-using ring::video::SocketPair;
-using ring::video::VideoEncoder;
-using ring::video::VideoIOHandle;
-using ring::video::VideoEncoderException;
+
+using ring::MediaEncoder;
+using ring::MediaDecoder;
+using ring::MediaEncoderException;
+using ring::MediaIOHandle;
 
 class AudioSender {
     public:
         AudioSender(const std::string& id,
                     std::map<std::string, std::string> txArgs,
-                    ring::video::SocketPair& socketPair);
+                    SocketPair& socketPair);
         ~AudioSender();
 
     private:
         NON_COPYABLE(AudioSender);
 
         bool waitForDataEncode(const std::chrono::milliseconds& max_wait) const;
-        bool setup(ring::video::SocketPair& socketPair);
+        bool setup(SocketPair& socketPair);
 
         std::string id_;
         std::map<std::string, std::string> args_;
         const AudioFormat format_;
-        std::unique_ptr<ring::video::VideoEncoder> audioEncoder_;
-        std::unique_ptr<ring::video::VideoIOHandle> muxContext_;
+        std::unique_ptr<ring::MediaEncoder> audioEncoder_;
+#ifdef RING_VIDEO
+        std::unique_ptr<ring::MediaIOHandle> muxContext_;
+#endif
         std::unique_ptr<ring::Resampler> resampler_;
         const double secondsPerPacket_ {0.02}; // 20 ms
 
@@ -101,16 +109,20 @@ AudioSender::setup(SocketPair& socketPair)
     auto enc_name = args_["codec"].c_str();
     auto dest = args_["destination"].c_str();
 
-    audioEncoder_.reset(new VideoEncoder);
+    audioEncoder_.reset(new MediaEncoder);
+#ifdef RING_VIDEO
     muxContext_.reset(socketPair.createIOContext());
+#endif // RING_VIDEO
 
     try {
         /* Encoder setup */
         audioEncoder_->setOptions(args_);
         audioEncoder_->openOutput(enc_name, "rtp", dest, NULL, false);
+#ifdef RING_VIDEO
         audioEncoder_->setIOContext(muxContext_);
+#endif // RING_VIDEO
         audioEncoder_->startIO();
-    } catch (const VideoEncoderException &e) {
+    } catch (const MediaEncoderException &e) {
         RING_ERR("%s", e.what());
         return false;
     }
@@ -126,7 +138,9 @@ void
 AudioSender::cleanup()
 {
     audioEncoder_.reset();
+#ifdef RING_VIDEO
     muxContext_.reset();
+#endif // RING_VIDEO
 }
 
 void
@@ -190,7 +204,7 @@ class AudioReceiveThread
     public:
         AudioReceiveThread(const std::string &id, const std::string &sdp);
         ~AudioReceiveThread();
-        void addIOContext(ring::video::SocketPair &socketPair);
+        void addIOContext(SocketPair &socketPair);
         void startLoop();
 
     private:
@@ -211,9 +225,10 @@ class AudioReceiveThread
         /*-----------------------------------------------------------------*/
         const std::string id_;
         std::istringstream stream_;
-        std::unique_ptr<ring::video::VideoDecoder> audioDecoder_;
-        std::unique_ptr<ring::video::VideoIOHandle> sdpContext_;
-        std::unique_ptr<ring::video::VideoIOHandle> demuxContext_;
+        std::unique_ptr<ring::MediaDecoder> audioDecoder_;
+        std::unique_ptr<ring::MediaIOHandle> sdpContext_;
+        std::unique_ptr<ring::MediaIOHandle> demuxContext_;
+
         std::shared_ptr<ring::RingBuffer> ringbuffer_;
 
         ThreadLoop loop_;
@@ -225,7 +240,7 @@ class AudioReceiveThread
 AudioReceiveThread::AudioReceiveThread(const std::string& id, const std::string& sdp)
     : id_(id)
     , stream_(sdp)
-    , sdpContext_(new VideoIOHandle(sdp.size(), false, &readFunction, 0, 0, this))
+    , sdpContext_(new MediaIOHandle(sdp.size(), false, &readFunction, 0, 0, this))
     , loop_(std::bind(&AudioReceiveThread::setup, this),
             std::bind(&AudioReceiveThread::process, this),
             std::bind(&AudioReceiveThread::cleanup, this))
@@ -240,7 +255,7 @@ AudioReceiveThread::~AudioReceiveThread()
 bool
 AudioReceiveThread::setup()
 {
-    audioDecoder_.reset(new ring::video::VideoDecoder());
+    audioDecoder_.reset(new ring::MediaDecoder());
     audioDecoder_->setInterruptCallback(interruptCb, this);
     // custom_io so the SDP demuxer will not open any UDP connections
     args_["sdp_flags"] = "custom_io";
@@ -267,12 +282,12 @@ AudioReceiveThread::process()
 
     switch (audioDecoder_->decode_audio(decodedFrame.get())) {
 
-        case ring::video::VideoDecoder::Status::FrameFinished:
+        case ring::MediaDecoder::Status::FrameFinished:
             audioDecoder_->writeToRingBuffer(decodedFrame.get(), *ringbuffer_,
                                              mainBuffFormat);
             return;
 
-        case ring::video::VideoDecoder::Status::DecodeError:
+        case ring::MediaDecoder::Status::DecodeError:
             RING_WARN("decoding failure, trying to reset decoder...");
             if (not setup()) {
                 RING_ERR("fatal error, rx thread re-setup failed");
@@ -286,7 +301,7 @@ AudioReceiveThread::process()
             }
             break;
 
-        case ring::video::VideoDecoder::Status::ReadError:
+        case ring::MediaDecoder::Status::ReadError:
             RING_ERR("fatal error, read failed");
             loop_.stop();
             break;
@@ -430,7 +445,7 @@ AVFormatRtpSession::startSender()
 
     try {
         sender_.reset(new AudioSender(id_, txArgs_, *socketPair_));
-    } catch (const VideoEncoderException &e) {
+    } catch (const MediaEncoderException &e) {
         RING_ERR("%s", e.what());
         sending_ = false;
     }
diff --git a/daemon/src/media/audio/audiortp/avformat_rtp_session.h b/daemon/src/media/audio/audiortp/avformat_rtp_session.h
index 1d1fef0336e85da9d28636b4a6ae1bfe963d9d69..a8c56c417abc1f6be2467e14408a4956776c6acb 100644
--- a/daemon/src/media/audio/audiortp/avformat_rtp_session.h
+++ b/daemon/src/media/audio/audiortp/avformat_rtp_session.h
@@ -40,16 +40,13 @@
 #include <memory>
 #include <mutex>
 
-namespace ring { namespace video {
-class SocketPair;
-class VideoEncoder;
-}}
-
 class Sdp;
 class ThreadLoop;
+class MediaEncoder;
 
 namespace ring {
 
+class SocketPair;
 class RingBuffer;
 class Resampler;
 class AudioSender;
@@ -78,7 +75,7 @@ class AVFormatRtpSession {
         std::string id_;
         std::map<std::string, std::string> txArgs_;
         std::string receivingSDP_;
-        std::unique_ptr<ring::video::SocketPair> socketPair_;
+        std::unique_ptr<ring::SocketPair> socketPair_;
         std::unique_ptr<AudioSender> sender_;
         std::unique_ptr<AudioReceiveThread> receiveThread_;
         std::shared_ptr<ring::RingBuffer> ringbuffer_;
diff --git a/daemon/src/media/video/video_decoder.cpp b/daemon/src/media/media_decoder.cpp
similarity index 92%
rename from daemon/src/media/video/video_decoder.cpp
rename to daemon/src/media/media_decoder.cpp
index 821ced2e2c95cd2cbe831806bd6d0335476ac1a7..4a14fef7885303e6ff74f660e3d356533d4ce536 100644
--- a/daemon/src/media/video/video_decoder.cpp
+++ b/daemon/src/media/media_decoder.cpp
@@ -31,7 +31,8 @@
 
 // libav_deps.h must be included first
 #include "libav_deps.h"
-#include "video_decoder.h"
+#include "media_decoder.h"
+#include "media_io_handle.h"
 #include "audio/audiobuffer.h"
 #include "audio/ringbuffer.h"
 #include "audio/resampler.h"
@@ -40,18 +41,18 @@
 #include <iostream>
 #include <unistd.h>
 
-namespace ring { namespace video {
+namespace ring {
 
 using std::string;
 
-VideoDecoder::VideoDecoder() :
+MediaDecoder::MediaDecoder() :
     inputCtx_(avformat_alloc_context()),
     startTime_(AV_NOPTS_VALUE),
     lastDts_(AV_NOPTS_VALUE)
 {
 }
 
-VideoDecoder::~VideoDecoder()
+MediaDecoder::~MediaDecoder()
 {
     if (decoderCtx_)
         avcodec_close(decoderCtx_);
@@ -66,7 +67,7 @@ VideoDecoder::~VideoDecoder()
 }
 
 void
-VideoDecoder::extract(const std::map<std::string, std::string>& map, const std::string& key)
+MediaDecoder::extract(const std::map<std::string, std::string>& map, const std::string& key)
 {
     auto iter = map.find(key);
 
@@ -75,7 +76,7 @@ VideoDecoder::extract(const std::map<std::string, std::string>& map, const std::
 }
 
 void
-VideoDecoder::setOptions(const std::map<std::string, std::string>& options)
+MediaDecoder::setOptions(const std::map<std::string, std::string>& options)
 {
     extract(options, "framerate");
     extract(options, "video_size");
@@ -84,7 +85,7 @@ VideoDecoder::setOptions(const std::map<std::string, std::string>& options)
     extract(options, "sdp_flags");
 }
 
-int VideoDecoder::openInput(const std::string &source_str,
+int MediaDecoder::openInput(const std::string &source_str,
                             const std::string &format_str)
 {
     AVInputFormat *iformat = av_find_input_format(format_str.c_str());
@@ -106,7 +107,7 @@ int VideoDecoder::openInput(const std::string &source_str,
     return ret;
 }
 
-void VideoDecoder::setInterruptCallback(int (*cb)(void*), void *opaque)
+void MediaDecoder::setInterruptCallback(int (*cb)(void*), void *opaque)
 {
     if (cb) {
         inputCtx_->interrupt_callback.callback = cb;
@@ -116,10 +117,10 @@ void VideoDecoder::setInterruptCallback(int (*cb)(void*), void *opaque)
     }
 }
 
-void VideoDecoder::setIOContext(VideoIOHandle *ioctx)
+void MediaDecoder::setIOContext(MediaIOHandle *ioctx)
 { inputCtx_->pb = ioctx->getContext(); }
 
-int VideoDecoder::setupFromAudioData()
+int MediaDecoder::setupFromAudioData()
 {
     int ret;
 
@@ -197,7 +198,8 @@ int VideoDecoder::setupFromAudioData()
     return 0;
 }
 
-int VideoDecoder::setupFromVideoData()
+#ifdef RING_VIDEO
+int MediaDecoder::setupFromVideoData()
 {
     int ret;
 
@@ -277,8 +279,8 @@ int VideoDecoder::setupFromVideoData()
     return 0;
 }
 
-VideoDecoder::Status
-VideoDecoder::decode(VideoFrame& result, VideoPacket& video_packet)
+MediaDecoder::Status
+MediaDecoder::decode(ring::video::VideoFrame& result, ring::video::VideoPacket& video_packet)
 {
     AVPacket *inpacket = video_packet.get();
     int ret = av_read_frame(inputCtx_, inpacket);
@@ -328,9 +330,10 @@ VideoDecoder::decode(VideoFrame& result, VideoPacket& video_packet)
 
     return Status::Success;
 }
+#endif // RING_VIDEO
 
-VideoDecoder::Status
-VideoDecoder::decode_audio(AVFrame *decoded_frame)
+MediaDecoder::Status
+MediaDecoder::decode_audio(AVFrame *decoded_frame)
 {
     AVPacket inpacket;
     memset(&inpacket, 0, sizeof(inpacket));
@@ -386,8 +389,9 @@ VideoDecoder::decode_audio(AVFrame *decoded_frame)
     return Status::Success;
 }
 
-VideoDecoder::Status
-VideoDecoder::flush(VideoFrame& result)
+#ifdef RING_VIDEO
+MediaDecoder::Status
+MediaDecoder::flush(ring::video::VideoFrame& result)
 {
     AVPacket inpacket;
     memset(&inpacket, 0, sizeof(inpacket));
@@ -406,17 +410,18 @@ VideoDecoder::flush(VideoFrame& result)
 
     return Status::Success;
 }
+#endif // RING_VIDEO
 
-int VideoDecoder::getWidth() const
+int MediaDecoder::getWidth() const
 { return decoderCtx_->width; }
 
-int VideoDecoder::getHeight() const
+int MediaDecoder::getHeight() const
 { return decoderCtx_->height; }
 
-int VideoDecoder::getPixelFormat() const
+int MediaDecoder::getPixelFormat() const
 { return libav_utils::sfl_pixel_format(decoderCtx_->pix_fmt); }
 
-void VideoDecoder::writeToRingBuffer(AVFrame* decoded_frame,
+void MediaDecoder::writeToRingBuffer(AVFrame* decoded_frame,
                                      ring::RingBuffer& rb,
                                      const ring::AudioFormat outFormat)
 {
@@ -444,4 +449,4 @@ void VideoDecoder::writeToRingBuffer(AVFrame* decoded_frame,
     }
 }
 
-}}
+}
diff --git a/daemon/src/media/video/video_decoder.h b/daemon/src/media/media_decoder.h
similarity index 81%
rename from daemon/src/media/video/video_decoder.h
rename to daemon/src/media/media_decoder.h
index c58713124af363c09998f8d0cf52a628a3935af9..42fee0429aa3b2f2933bc6febc6983274bdf57d4 100644
--- a/daemon/src/media/video/video_decoder.h
+++ b/daemon/src/media/media_decoder.h
@@ -29,33 +29,47 @@
  *  as that of the covered work.
  */
 
-#ifndef __VIDEO_DECODER_H__
-#define __VIDEO_DECODER_H__
+#ifndef __MEDIA_DECODER_H__
+#define __MEDIA_DECODER_H__
+
+#include "config.h"
+
+#ifdef RING_VIDEO
+#include "video/video_base.h"
+#include "video/video_scaler.h"
+#endif // RING_VIDEO
 
-#include "video_base.h"
-#include "video_scaler.h"
 #include "noncopyable.h"
 
 #include <map>
 #include <string>
 #include <memory>
-
-namespace ring {
-    class AudioBuffer;
-    class AudioFormat;
-    class RingBuffer;
-    class Resampler;
-}
+#include <chrono>
 
 class AVCodecContext;
 class AVStream;
+class AVFrame;
+class AVDictionary;
 class AVFormatContext;
 class AVCodec;
 
+#ifdef RING_VIDEO
 namespace ring { namespace video {
+    class VideoFrame;
+    class VideoPacket;
+}}
+#endif // RING_VIDEO
 
-    class VideoDecoder {
-    public:
+namespace ring {
+    class AudioBuffer;
+    class AudioFormat;
+    class RingBuffer;
+    class Resampler;
+    class MediaIOHandle;
+
+    class MediaDecoder {
+
+public:
         enum class Status {
             Success,
             FrameFinished,
@@ -64,21 +78,25 @@ namespace ring { namespace video {
             DecodeError
         };
 
-        VideoDecoder();
-        ~VideoDecoder();
+        MediaDecoder();
+        ~MediaDecoder();
 
         void emulateRate() { emulateRate_ = true; }
         void setInterruptCallback(int (*cb)(void*), void *opaque);
-        void setIOContext(VideoIOHandle *ioctx);
         int openInput(const std::string &source_str,
                       const std::string &format_str);
+
+        void setIOContext(MediaIOHandle *ioctx);
+#ifdef RING_VIDEO
         int setupFromVideoData();
+        Status decode(ring::video::VideoFrame&, ring::video::VideoPacket&);
+        Status flush(ring::video::VideoFrame&);
+ #endif // RING_VIDEO
+
         int setupFromAudioData();
-        Status decode(VideoFrame&, VideoPacket&);
         Status decode_audio(AVFrame* frame);
         void writeToRingBuffer(AVFrame* frame, ring::RingBuffer& rb,
                                const ring::AudioFormat outFormat);
-        Status flush(VideoFrame&);
 
         int getWidth() const;
         int getHeight() const;
@@ -87,7 +105,7 @@ namespace ring { namespace video {
         void setOptions(const std::map<std::string, std::string>& options);
 
     private:
-        NON_COPYABLE(VideoDecoder);
+        NON_COPYABLE(MediaDecoder);
 
         AVCodec *inputDecoder_ = nullptr;
         AVCodecContext *decoderCtx_ = nullptr;
@@ -104,6 +122,6 @@ namespace ring { namespace video {
     protected:
         AVDictionary *options_ = nullptr;
     };
-}}
+}
 
-#endif // __VIDEO_DECODER_H__
+#endif // __MEDIA_DECODER_H__
diff --git a/daemon/src/media/video/video_encoder.cpp b/daemon/src/media/media_encoder.cpp
similarity index 92%
rename from daemon/src/media/video/video_encoder.cpp
rename to daemon/src/media/media_encoder.cpp
index 4ff26d4eb4b59ae1e998378cfeb76f397ececa49..7efc040328548cc8a374759c94a56975f383c6eb 100644
--- a/daemon/src/media/video/video_encoder.cpp
+++ b/daemon/src/media/media_encoder.cpp
@@ -30,7 +30,8 @@
  */
 
 #include "libav_deps.h"
-#include "video_encoder.h"
+#include "media_encoder.h"
+#include "media_io_handle.h"
 #include "audio/audiobuffer.h"
 #include "logger.h"
 
@@ -39,17 +40,19 @@
 #include <algorithm>
 
 
-namespace ring { namespace video {
+namespace ring {
 
 using std::string;
 
-VideoEncoder::VideoEncoder() :
-    outputCtx_(avformat_alloc_context()),
+MediaEncoder::MediaEncoder() :
+#ifdef RING_VIDEO
     scaler_(),
-    scaledFrame_()
+    scaledFrame_(),
+#endif // RING_VIDEO
+    outputCtx_(avformat_alloc_context())
 {}
 
-VideoEncoder::~VideoEncoder()
+MediaEncoder::~MediaEncoder()
 {
     if (outputCtx_ and outputCtx_->priv_data)
         av_write_trailer(outputCtx_);
@@ -75,7 +78,7 @@ extract(const std::map<std::string, std::string>& map, const std::string& key)
     return iter->second.c_str();
 }
 
-void VideoEncoder::setOptions(const std::map<std::string, std::string>& options)
+void MediaEncoder::setOptions(const std::map<std::string, std::string>& options)
 {
     const char *value;
 
@@ -117,14 +120,14 @@ void VideoEncoder::setOptions(const std::map<std::string, std::string>& options)
 }
 
 void
-VideoEncoder::openOutput(const char *enc_name, const char *short_name,
+MediaEncoder::openOutput(const char *enc_name, const char *short_name,
                          const char *filename, const char *mime_type, bool is_video)
 {
     AVOutputFormat *oformat = av_guess_format(short_name, filename, mime_type);
 
     if (!oformat) {
         RING_ERR("Unable to find a suitable output format for %s", filename);
-        throw VideoEncoderException("No output format");
+        throw MediaEncoderException("No output format");
     }
 
     outputCtx_->oformat = oformat;
@@ -136,7 +139,7 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
     outputEncoder_ = avcodec_find_encoder_by_name(enc_name);
     if (!outputEncoder_) {
         RING_ERR("Encoder \"%s\" not found!", enc_name);
-        throw VideoEncoderException("No output encoder");
+        throw MediaEncoderException("No output encoder");
     }
 
     prepareEncoderContext(is_video);
@@ -161,7 +164,7 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
     ret = avcodec_open2(encoderCtx_, outputEncoder_, NULL);
 #endif
     if (ret)
-        throw VideoEncoderException("Could not open encoder");
+        throw MediaEncoderException("Could not open encoder");
 
     // add video stream to outputformat context
 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 8, 0)
@@ -170,10 +173,10 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
     stream_ = avformat_new_stream(outputCtx_, 0);
 #endif
     if (!stream_)
-        throw VideoEncoderException("Could not allocate stream");
+        throw MediaEncoderException("Could not allocate stream");
 
     stream_->codec = encoderCtx_;
-
+#ifdef RING_VIDEO
     if (is_video) {
         // allocate buffers for both scaled (pre-encoder) and encoded frames
         const int width = encoderCtx_->width;
@@ -181,24 +184,25 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name,
         const int format = libav_utils::sfl_pixel_format((int)encoderCtx_->pix_fmt);
         scaledFrameBufferSize_ = scaledFrame_.getSize(width, height, format);
         if (scaledFrameBufferSize_ <= FF_MIN_BUFFER_SIZE)
-            throw VideoEncoderException("buffer too small");
+            throw MediaEncoderException("buffer too small");
 
 #if (LIBAVCODEC_VERSION_MAJOR < 54)
         encoderBufferSize_ = scaledFrameBufferSize_; // seems to be ok
         encoderBuffer_ = (uint8_t*) av_malloc(encoderBufferSize_);
         if (!encoderBuffer_)
-            throw VideoEncoderException("Could not allocate encoder buffer");
+            throw MediaEncoderException("Could not allocate encoder buffer");
 #endif
 
         scaledFrameBuffer_ = (uint8_t*) av_malloc(scaledFrameBufferSize_);
         if (!scaledFrameBuffer_)
-            throw VideoEncoderException("Could not allocate scaled frame buffer");
+            throw MediaEncoderException("Could not allocate scaled frame buffer");
 
         scaledFrame_.setDestination(scaledFrameBuffer_, width, height, format);
     }
+#endif // RING_VIDEO
 }
 
-void VideoEncoder::setInterruptCallback(int (*cb)(void*), void *opaque)
+void MediaEncoder::setInterruptCallback(int (*cb)(void*), void *opaque)
 {
     if (cb) {
         outputCtx_->interrupt_callback.callback = cb;
@@ -208,18 +212,18 @@ void VideoEncoder::setInterruptCallback(int (*cb)(void*), void *opaque)
     }
 }
 
-void VideoEncoder::setIOContext(const std::unique_ptr<VideoIOHandle> &ioctx)
+void MediaEncoder::setIOContext(const std::unique_ptr<MediaIOHandle> &ioctx)
 {
     outputCtx_->pb = ioctx->getContext();
     outputCtx_->packet_size = outputCtx_->pb->buffer_size;
 }
 
 void
-VideoEncoder::startIO()
+MediaEncoder::startIO()
 {
     if (avformat_write_header(outputCtx_, options_ ? &options_ : NULL)) {
         RING_ERR("Could not write header for output file... check codec parameters");
-        throw VideoEncoderException("Failed to write output file header");
+        throw MediaEncoderException("Failed to write output file header");
     }
 
     av_dump_format(outputCtx_, 0, outputCtx_->filename, 1);
@@ -233,7 +237,8 @@ print_averror(const char *funcname, int err)
     RING_ERR("%s failed: %s", funcname, errbuf);
 }
 
-int VideoEncoder::encode(VideoFrame &input, bool is_keyframe, int64_t frame_number)
+#ifdef RING_VIDEO
+int MediaEncoder::encode(ring::video::VideoFrame &input, bool is_keyframe, int64_t frame_number)
 {
     /* Prepare a frame suitable to our encoder frame format,
      * keeping also the input aspect ratio.
@@ -320,8 +325,9 @@ int VideoEncoder::encode(VideoFrame &input, bool is_keyframe, int64_t frame_numb
 
     return ret;
 }
+#endif // RING_VIDEO
 
-int VideoEncoder::encode_audio(const ring::AudioBuffer &buffer)
+int MediaEncoder::encode_audio(const ring::AudioBuffer &buffer)
 {
     const int needed_bytes = av_samples_get_buffer_size(NULL, buffer.channels(), buffer.frames(), AV_SAMPLE_FMT_S16, 0);
     if (needed_bytes < 0) {
@@ -411,7 +417,7 @@ int VideoEncoder::encode_audio(const ring::AudioBuffer &buffer)
     return 0;
 }
 
-int VideoEncoder::flush()
+int MediaEncoder::flush()
 {
     AVPacket pkt;
     memset(&pkt, 0, sizeof(pkt));
@@ -457,7 +463,7 @@ int VideoEncoder::flush()
     return ret;
 }
 
-void VideoEncoder::print_sdp(std::string &sdp_)
+void MediaEncoder::print_sdp(std::string &sdp_)
 {
     /* theora sdp can be huge */
     const size_t sdp_size = outputCtx_->streams[0]->codec->extradata_size \
@@ -475,7 +481,7 @@ void VideoEncoder::print_sdp(std::string &sdp_)
     RING_DBG("Sending SDP: \n%s", sdp_.c_str());
 }
 
-void VideoEncoder::prepareEncoderContext(bool is_video)
+void MediaEncoder::prepareEncoderContext(bool is_video)
 {
 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 12, 0)
     encoderCtx_ = avcodec_alloc_context();
@@ -549,7 +555,7 @@ void VideoEncoder::prepareEncoderContext(bool is_video)
     }
 }
 
-void VideoEncoder::forcePresetX264()
+void MediaEncoder::forcePresetX264()
 {
     const char *speedPreset = "ultrafast";
     if (av_opt_set(encoderCtx_->priv_data, "preset", speedPreset, 0))
@@ -559,7 +565,7 @@ void VideoEncoder::forcePresetX264()
         RING_WARN("Failed to set x264 tune '%s'", tune);
 }
 
-void VideoEncoder::extractProfileLevelID(const std::string &parameters,
+void MediaEncoder::extractProfileLevelID(const std::string &parameters,
                                          AVCodecContext *ctx)
 {
     // From RFC3984:
@@ -607,4 +613,4 @@ void VideoEncoder::extractProfileLevelID(const std::string &parameters,
     RING_DBG("Using profile %x and level %d", ctx->profile, ctx->level);
 }
 
-}}
+}
diff --git a/daemon/src/media/video/video_encoder.h b/daemon/src/media/media_encoder.h
similarity index 78%
rename from daemon/src/media/video/video_encoder.h
rename to daemon/src/media/media_encoder.h
index 524d45ae9508868bc5b08205d8c3e005bf8ba275..1360c82fd638edfadf31424ebd8ff05cea9dad4c 100644
--- a/daemon/src/media/video/video_encoder.h
+++ b/daemon/src/media/media_encoder.h
@@ -29,45 +29,57 @@
  *  as that of the covered work.
  */
 
-#ifndef __VIDEO_ENCODER_H__
-#define __VIDEO_ENCODER_H__
+#ifndef __MEDIA_ENCODER_H__
+#define __MEDIA_ENCODER_H__
+
+#include "config.h"
+
+#ifdef RING_VIDEO
+#include "video/video_base.h"
+#include "video/video_scaler.h"
+#endif
 
-#include "video_base.h"
-#include "video_scaler.h"
 #include "noncopyable.h"
 
 #include <map>
+#include <memory>
 #include <string>
 
 class AVCodecContext;
 class AVStream;
 class AVFormatContext;
+class AVDictionary;
 class AVCodec;
 
 namespace ring {
     class AudioBuffer;
+    class MediaIOHandle;
 }
 
-namespace ring { namespace video {
+namespace ring {
 
-class VideoEncoderException : public std::runtime_error {
+class MediaEncoderException : public std::runtime_error {
     public:
-        VideoEncoderException(const char *msg) : std::runtime_error(msg) {}
+        MediaEncoderException(const char *msg) : std::runtime_error(msg) {}
 };
 
-class VideoEncoder {
+class MediaEncoder {
 public:
-    VideoEncoder();
-    ~VideoEncoder();
+    MediaEncoder();
+    ~MediaEncoder();
 
     void setOptions(const std::map<std::string, std::string>& options);
 
     void setInterruptCallback(int (*cb)(void*), void *opaque);
-    void setIOContext(const std::unique_ptr<VideoIOHandle> &ioctx);
     void openOutput(const char *enc_name, const char *short_name,
                    const char *filename, const char *mime_type, bool is_video);
     void startIO();
-    int encode(VideoFrame &input, bool is_keyframe, int64_t frame_number);
+    void setIOContext(const std::unique_ptr<MediaIOHandle> &ioctx);
+
+#ifdef RING_VIDEO
+    int encode(ring::video::VideoFrame &input, bool is_keyframe, int64_t frame_number);
+#endif // RING_VIDEO
+
     int encode_audio(const ring::AudioBuffer &input);
     int flush();
     void print_sdp(std::string &sdp_);
@@ -79,7 +91,7 @@ public:
     int getHeight() const { return dstHeight_; }
 
 private:
-    NON_COPYABLE(VideoEncoder);
+    NON_COPYABLE(MediaEncoder);
     void setScaleDest(void *data, int width, int height, int pix_fmt);
     void prepareEncoderContext(bool is_video);
     void forcePresetX264();
@@ -89,8 +101,11 @@ private:
     AVCodecContext *encoderCtx_ = nullptr;
     AVFormatContext *outputCtx_ = nullptr;
     AVStream *stream_ = nullptr;
-    VideoScaler scaler_;
-    VideoFrame scaledFrame_;
+
+#ifdef RING_VIDEO
+    ring::video::VideoScaler scaler_;
+    ring::video::VideoFrame scaledFrame_;
+#endif // RING_VIDEO
 
     uint8_t *scaledFrameBuffer_ = nullptr;
     int scaledFrameBufferSize_ = 0;
@@ -106,6 +121,6 @@ protected:
     AVDictionary *options_ = nullptr;
 };
 
-}}
+}
 
-#endif // __VIDEO_ENCODER_H__
+#endif // __MEDIA_ENCODER_H__
diff --git a/daemon/src/media/media_io_handle.cpp b/daemon/src/media/media_io_handle.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5ba2bb555f14cecc73827934babe253c228ecf81
--- /dev/null
+++ b/daemon/src/media/media_io_handle.cpp
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (C) 2013 Savoir-Faire Linux Inc.
+ *  Author: Guillaume Roguez <Guillaume.Roguez@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA  02110-1301 USA.
+ *
+ *  Additional permission under GNU GPL version 3 section 7:
+ *
+ *  If you modify this program, or any covered work, by linking or
+ *  combining it with the OpenSSL project's OpenSSL library (or a
+ *  modified version of that library), containing parts covered by the
+ *  terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc.
+ *  grants you additional permission to convey the resulting work.
+ *  Corresponding Source for a non-source form of such a combination
+ *  shall include the source code for the parts of OpenSSL used as well
+ *  as that of the covered work.
+ */
+
+#include "libav_deps.h"
+#include "media_io_handle.h"
+
+namespace ring {
+
+MediaIOHandle::MediaIOHandle(ssize_t buffer_size,
+                             bool writeable,
+                             io_readcallback read_cb,
+                             io_writecallback write_cb,
+                             io_seekcallback seek_cb,
+                             void *opaque) : ctx_(0), buf_(0)
+
+{
+    buf_ = static_cast<unsigned char *>(av_malloc(buffer_size));
+    ctx_ = avio_alloc_context(buf_, buffer_size, writeable, opaque, read_cb,
+                              write_cb, seek_cb);
+    ctx_->max_packet_size = buffer_size;
+}
+
+MediaIOHandle::~MediaIOHandle() { av_free(ctx_); av_free(buf_); }
+
+}
diff --git a/daemon/src/media/media_io_handle.h b/daemon/src/media/media_io_handle.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d1d6374a61465ebf0c1c9e4667f4e47414e0c60
--- /dev/null
+++ b/daemon/src/media/media_io_handle.h
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (C) 2013 Savoir-Faire Linux Inc.
+ *  Author: Guillaume Roguez <Guillaume.Roguez@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA  02110-1301 USA.
+ *
+ *  Additional permission under GNU GPL version 3 section 7:
+ *
+ *  If you modify this program, or any covered work, by linking or
+ *  combining it with the OpenSSL project's OpenSSL library (or a
+ *  modified version of that library), containing parts covered by the
+ *  terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc.
+ *  grants you additional permission to convey the resulting work.
+ *  Corresponding Source for a non-source form of such a combination
+ *  shall include the source code for the parts of OpenSSL used as well
+ *  as that of the covered work.
+ */
+
+#ifndef __MEDIA_IO_HANDLE_H__
+#define __MEDIA_IO_HANDLE_H__
+
+#include "noncopyable.h"
+
+#ifndef AVFORMAT_AVIO_H
+class AVIOContext;
+#endif
+
+typedef int(*io_readcallback)(void *opaque, uint8_t *buf, int buf_size);
+typedef int(*io_writecallback)(void *opaque, uint8_t *buf, int buf_size);
+typedef int64_t(*io_seekcallback)(void *opaque, int64_t offset, int whence);
+
+namespace ring {
+
+class MediaIOHandle {
+public:
+    MediaIOHandle(ssize_t buffer_size,
+                  bool writeable,
+                  io_readcallback read_cb,
+                  io_writecallback write_cb,
+                  io_seekcallback seek_cb,
+                  void *opaque);
+    ~MediaIOHandle();
+
+    AVIOContext* getContext() { return ctx_; }
+
+private:
+    NON_COPYABLE(MediaIOHandle);
+    AVIOContext *ctx_;
+    unsigned char *buf_;
+};
+
+}
+
+#endif // __MEDIA_DECODER_H__
diff --git a/daemon/src/media/video/socket_pair.cpp b/daemon/src/media/socket_pair.cpp
similarity index 98%
rename from daemon/src/media/video/socket_pair.cpp
rename to daemon/src/media/socket_pair.cpp
index 9fd08d2e3f259074d92bb2290dae579e24e396de..cd9d989e65cce4538601b97e826cbec9decf8737 100644
--- a/daemon/src/media/video/socket_pair.cpp
+++ b/daemon/src/media/socket_pair.cpp
@@ -133,8 +133,9 @@ udp_socket_create(sockaddr_storage *addr, socklen_t *addr_len, int local_port)
     return udp_fd;
 }
 
-namespace ring { namespace video {
+namespace ring {
 
+using std::string;
 static const int RTP_BUFFER_SIZE = 1472;
 
 SocketPair::SocketPair(const char *uri, int localPort)
@@ -210,9 +211,9 @@ void SocketPair::openSockets(const char *uri, int local_rtp_port)
              local_rtp_port, local_rtcp_port, rtp_port, rtcp_port);
 }
 
-VideoIOHandle* SocketPair::createIOContext()
+MediaIOHandle* SocketPair::createIOContext()
 {
-    return new VideoIOHandle(RTP_BUFFER_SIZE, true,
+    return new MediaIOHandle(RTP_BUFFER_SIZE, true,
                              &readCallback, &writeCallback, 0,
                              reinterpret_cast<void*>(this));
 }
@@ -384,4 +385,4 @@ retry:
     return ret < 0 ? errno : ret;
 }
 
-}}
+}
diff --git a/daemon/src/media/video/socket_pair.h b/daemon/src/media/socket_pair.h
similarity index 88%
rename from daemon/src/media/video/socket_pair.h
rename to daemon/src/media/socket_pair.h
index c2614a3f2f236c3b704b442f43a2f841766733f5..bb5f51df5bc363bcb8644cc2b2173fc21b922696 100644
--- a/daemon/src/media/video/socket_pair.h
+++ b/daemon/src/media/socket_pair.h
@@ -32,27 +32,28 @@
 #ifndef SOCKET_PAIR_H_
 #define SOCKET_PAIR_H_
 
-#include "video_base.h"
+#include "media_io_handle.h"
 
 #include <sys/socket.h>
 #include <mutex>
 #include <stdint.h>
+#include <memory>
 
 namespace ring {
-class IceSocket;
-};
 
-namespace ring { namespace video {
+class IceSocket;
 
 class SocketPair {
     public:
         SocketPair(const char *uri, int localPort);
-        SocketPair(std::unique_ptr<ring::IceSocket> rtp_sock,
-                   std::unique_ptr<ring::IceSocket> rtcp_sock);
+        SocketPair(std::unique_ptr<IceSocket> rtp_sock,
+                   std::unique_ptr<IceSocket> rtcp_sock);
         ~SocketPair();
 
         void interrupt();
-        VideoIOHandle* createIOContext();
+
+        MediaIOHandle* createIOContext();
+
         void openSockets(const char *uri, int localPort);
         void closeSockets();
 
@@ -68,8 +69,8 @@ class SocketPair {
         int writeRtpData(void *buf, int buf_size);
         int writeRtcpData(void *buf, int buf_size);
 
-        std::unique_ptr<ring::IceSocket> rtp_sock_;
-        std::unique_ptr<ring::IceSocket> rtcp_sock_;
+        std::unique_ptr<IceSocket> rtp_sock_;
+        std::unique_ptr<IceSocket> rtcp_sock_;
 
         std::mutex rtcpWriteMutex_;
 
@@ -82,6 +83,6 @@ class SocketPair {
         bool interrupted_ {false};
 };
 
-}}
+}
 
 #endif  // SOCKET_PAIR_H_
diff --git a/daemon/src/media/video/Makefile.am b/daemon/src/media/video/Makefile.am
index ab5d484149f8b8cf94958a9b7dc4ceb336687079..899fc5ea936d8280455ff7b09cbc5b8ef074b607 100644
--- a/daemon/src/media/video/Makefile.am
+++ b/daemon/src/media/video/Makefile.am
@@ -8,10 +8,7 @@ libvideo_la_SOURCES = \
 	video_device_monitor.cpp video_device_monitor.h \
 	video_base.cpp video_base.h \
 	video_scaler.cpp video_scaler.h \
-	video_decoder.cpp video_decoder.h \
-	video_encoder.cpp video_encoder.h \
 	video_mixer.cpp video_mixer.h \
-	socket_pair.cpp socket_pair.h \
 	shm_sink.cpp shm_sink.h \
 	video_input.cpp video_input.h \
 	video_receive_thread.cpp video_receive_thread.h \
diff --git a/daemon/src/media/video/video_base.cpp b/daemon/src/media/video/video_base.cpp
index eec2ccfd05635d58ce372cec857dfcbf892e468a..25a85c924fb5261b89ef1a357a65060fe9cef749 100644
--- a/daemon/src/media/video/video_base.cpp
+++ b/daemon/src/media/video/video_base.cpp
@@ -45,24 +45,6 @@ VideoPacket::VideoPacket() : packet_(static_cast<AVPacket *>(av_mallocz(sizeof(A
 
 VideoPacket::~VideoPacket() { av_free_packet(packet_); av_free(packet_); }
 
-/*=== VideoIOHandle  =========================================================*/
-
-VideoIOHandle::VideoIOHandle(ssize_t buffer_size,
-                             bool writeable,
-                             io_readcallback read_cb,
-                             io_writecallback write_cb,
-                             io_seekcallback seek_cb,
-                             void *opaque) : ctx_(0), buf_(0)
-
-{
-    buf_ = static_cast<unsigned char *>(av_malloc(buffer_size));
-    ctx_ = avio_alloc_context(buf_, buffer_size, writeable, opaque, read_cb,
-                              write_cb, seek_cb);
-    ctx_->max_packet_size = buffer_size;
-}
-
-VideoIOHandle::~VideoIOHandle() { av_free(ctx_); av_free(buf_); }
-
 /*=== VideoFrame =============================================================*/
 
 VideoFrame::VideoFrame()
diff --git a/daemon/src/media/video/video_base.h b/daemon/src/media/video/video_base.h
index 3a74eaa0758b546c8c3af76d4f0748025c3287b7..42505ee58034b894aa89cc0d330b3e481a501911 100644
--- a/daemon/src/media/video/video_base.h
+++ b/daemon/src/media/video/video_base.h
@@ -59,10 +59,6 @@ template <typename T> class Observer;
 template <typename T> class Observable;
 class VideoFrame;
 
-typedef int(*io_readcallback)(void *opaque, uint8_t *buf, int buf_size);
-typedef int(*io_writecallback)(void *opaque, uint8_t *buf, int buf_size);
-typedef int64_t(*io_seekcallback)(void *opaque, int64_t offset, int whence);
-
 /*=== Observable =============================================================*/
 
 template <typename T>
@@ -138,26 +134,6 @@ private:
     AVPacket *packet_;
 };
 
-/*=== VideoIOHandle  =========================================================*/
-
-class VideoIOHandle {
-public:
-    VideoIOHandle(ssize_t buffer_size,
-                  bool writeable,
-                  io_readcallback read_cb,
-                  io_writecallback write_cb,
-                  io_seekcallback seek_cb,
-                  void *opaque);
-    ~VideoIOHandle();
-
-    AVIOContext* getContext() { return ctx_; }
-
-private:
-    NON_COPYABLE(VideoIOHandle);
-    AVIOContext *ctx_;
-    unsigned char *buf_;
-};
-
 /*=== VideoFrame =============================================================*/
 
 class VideoFrame {
diff --git a/daemon/src/media/video/video_input.cpp b/daemon/src/media/video/video_input.cpp
index ab245815f987232d1b0f44013ceb3305a36f56fb..8b49d15a29fe56f50b4b9071e5e052f401f88291 100644
--- a/daemon/src/media/video/video_input.cpp
+++ b/daemon/src/media/video/video_input.cpp
@@ -29,9 +29,12 @@
  *  as that of the covered work.
  */
 
+
+#ifdef RING_VIDEO
 #include "video_input.h"
-#include "video_decoder.h"
+#endif // RING_VIDEO
 
+#include "media_decoder.h"
 #include "manager.h"
 #include "client/videomanager.h"
 #include "logger.h"
@@ -116,18 +119,18 @@ bool VideoInput::captureFrame()
     const auto ret = decoder_->decode(getNewFrame(), pkt);
 
     switch (ret) {
-        case VideoDecoder::Status::FrameFinished:
+        case MediaDecoder::Status::FrameFinished:
             break;
 
-        case VideoDecoder::Status::ReadError:
-        case VideoDecoder::Status::DecodeError:
+        case MediaDecoder::Status::ReadError:
+        case MediaDecoder::Status::DecodeError:
             loop_.stop();
             // fallthrough
-        case VideoDecoder::Status::Success:
+        case MediaDecoder::Status::Success:
             return false;
 
             // Play in loop
-        case VideoDecoder::Status::EOFError:
+        case MediaDecoder::Status::EOFError:
             deleteDecoder();
             createDecoder();
             return false;
@@ -143,7 +146,7 @@ VideoInput::createDecoder()
     if (input_.empty())
         return;
 
-    decoder_ = new VideoDecoder();
+    decoder_ = new ring::MediaDecoder();
 
     decoder_->setOptions(decOpts_);
     if (emulateRate_)
@@ -244,7 +247,7 @@ VideoInput::initFile(std::string path)
     } else {
         RING_WARN("Guessing file type for %s", path.c_str());
         // FIXME: proper parsing of FPS etc. should be done in
-        // VideoDecoder, not here.
+        // MediaDecoder, not here.
         decOpts_["framerate"] = "25";
     }
 
diff --git a/daemon/src/media/video/video_input.h b/daemon/src/media/video/video_input.h
index 149a4811a6cde44a95deacfadbd8a9535cb48934..ac1338740bfec6b751567e72c1fa38f8f0dcd79e 100644
--- a/daemon/src/media/video/video_input.h
+++ b/daemon/src/media/video/video_input.h
@@ -36,15 +36,19 @@
 
 #include "noncopyable.h"
 #include "shm_sink.h"
-#include "video_decoder.h"
 #include "threadloop.h"
 
 #include <map>
 #include <atomic>
 #include <string>
 
+namespace ring {
+    class MediaDecoder;
+}
+
 namespace ring { namespace video {
 
+
 class VideoInput : public VideoGenerator
 {
 public:
@@ -63,7 +67,7 @@ private:
 
     std::string sinkID_     = "local";
 
-    VideoDecoder *decoder_  = nullptr;
+    ring::MediaDecoder *decoder_  = nullptr;
     SHMSink sink_;
     std::atomic<bool> switchPending_ = {false};
 
diff --git a/daemon/src/media/video/video_receive_thread.cpp b/daemon/src/media/video/video_receive_thread.cpp
index edb262e21924d46bb5f685c53b16a867243107c6..cccac927cb9d8239fbb95e1f63fede100fe72d6d 100644
--- a/daemon/src/media/video/video_receive_thread.cpp
+++ b/daemon/src/media/video/video_receive_thread.cpp
@@ -31,7 +31,6 @@
  */
 
 #include "libav_deps.h"
-
 #include "video_receive_thread.h"
 #include "socket_pair.h"
 #include "manager.h"
@@ -78,7 +77,7 @@ VideoReceiveThread::startLoop()
 // main thread to block while this executes, so it happens in the video thread.
 bool VideoReceiveThread::setup()
 {
-    videoDecoder_ = new VideoDecoder();
+    videoDecoder_ = new MediaDecoder();
 
     dstWidth_ = atoi(args_["width"].c_str());
     dstHeight_ = atoi(args_["height"].c_str());
@@ -174,7 +173,7 @@ int VideoReceiveThread::readFunction(void *opaque, uint8_t *buf, int buf_size)
     return is.gcount();
 }
 
-void VideoReceiveThread::addIOContext(SocketPair &socketPair)
+void VideoReceiveThread::addIOContext(ring::SocketPair &socketPair)
 {
     demuxContext_ = socketPair.createIOContext();
 }
@@ -185,11 +184,11 @@ bool VideoReceiveThread::decodeFrame()
     const auto ret = videoDecoder_->decode(getNewFrame(), pkt);
 
     switch (ret) {
-        case VideoDecoder::Status::FrameFinished:
+        case MediaDecoder::Status::FrameFinished:
             publishFrame();
             return true;
 
-        case VideoDecoder::Status::DecodeError:
+        case MediaDecoder::Status::DecodeError:
             RING_WARN("decoding failure, trying to reset decoder...");
             delete videoDecoder_;
             if (!setup()) {
@@ -206,7 +205,7 @@ bool VideoReceiveThread::decodeFrame()
                 requestKeyFrameCallback_(id_);
             break;
 
-        case VideoDecoder::Status::ReadError:
+        case MediaDecoder::Status::ReadError:
             RING_ERR("fatal error, read failed");
             loop_.stop();
 
diff --git a/daemon/src/media/video/video_receive_thread.h b/daemon/src/media/video/video_receive_thread.h
index b03469ca0f727fcf2b9aa8adb7819d6a12100086..cd858e9adaf975b2cc5d6dc32880b6fb8efa3ae9 100644
--- a/daemon/src/media/video/video_receive_thread.h
+++ b/daemon/src/media/video/video_receive_thread.h
@@ -31,7 +31,8 @@
 #ifndef _VIDEO_RECEIVE_THREAD_H_
 #define _VIDEO_RECEIVE_THREAD_H_
 
-#include "video_decoder.h"
+#include "media_decoder.h"
+#include "media_io_handle.h"
 #include "shm_sink.h"
 #include "threadloop.h"
 #include "noncopyable.h"
@@ -42,10 +43,12 @@
 #include <sstream>
 #include <memory>
 
-namespace ring { namespace video {
+namespace ring {
 
 class SocketPair;
 
+    namespace video {
+
 class VideoReceiveThread : public VideoGenerator {
 public:
     VideoReceiveThread(const std::string &id,
@@ -71,13 +74,13 @@ private:
     /*-------------------------------------------------------------*/
     /* These variables should be used in thread (i.e. run()) only! */
     /*-------------------------------------------------------------*/
-    VideoDecoder *videoDecoder_;
+    MediaDecoder *videoDecoder_;
     int dstWidth_;
     int dstHeight_;
     const std::string id_;
     std::istringstream stream_;
-    VideoIOHandle sdpContext_;
-    VideoIOHandle *demuxContext_;
+    MediaIOHandle sdpContext_;
+    MediaIOHandle *demuxContext_;
     SHMSink sink_;
 
     void (*requestKeyFrameCallback_)(const std::string &);
diff --git a/daemon/src/media/video/video_rtp_session.cpp b/daemon/src/media/video/video_rtp_session.cpp
index 1d1b1127b1cbf9c855606ef1a1657eaa54ede8c7..dae710221edec927513eb484f0896c4bdb3ba9ff 100644
--- a/daemon/src/media/video/video_rtp_session.cpp
+++ b/daemon/src/media/video/video_rtp_session.cpp
@@ -47,6 +47,7 @@
 
 namespace ring { namespace video {
 
+using ring::SocketPair;
 using std::map;
 using std::string;
 
@@ -142,7 +143,7 @@ void VideoRtpSession::startSender()
 
         try {
             sender_.reset(new VideoSender(txArgs_, *socketPair_));
-        } catch (const VideoEncoderException &e) {
+        } catch (const MediaEncoderException &e) {
             RING_ERR("%s", e.what());
             sending_ = false;
         }
diff --git a/daemon/src/media/video/video_rtp_session.h b/daemon/src/media/video/video_rtp_session.h
index eaaa4a1ec382a394ad3605c1118e23f6feffdc5f..9fe90fe674cbc5fabf46aa73324f946eafa19694 100644
--- a/daemon/src/media/video/video_rtp_session.h
+++ b/daemon/src/media/video/video_rtp_session.h
@@ -48,7 +48,8 @@ class Sdp;
 class Conference;
 
 namespace ring {
-class IceSocket;
+    class IceSocket;
+    class SocketPair;
 };
 
 namespace ring { namespace video {
@@ -82,7 +83,7 @@ private:
     // all public methods must be locked internally before use
     std::recursive_mutex mutex_ = {};
 
-    std::unique_ptr<SocketPair> socketPair_ = nullptr;
+    std::unique_ptr<ring::SocketPair> socketPair_ = nullptr;
     std::unique_ptr<VideoSender> sender_ = nullptr;
     std::unique_ptr<VideoReceiveThread> receiveThread_ = nullptr;
     std::map<std::string, std::string> txArgs_;
diff --git a/daemon/src/media/video/video_sender.cpp b/daemon/src/media/video/video_sender.cpp
index 9e7583ba207679e945a77d8efc6f4d0185436c58..06d8ae07be51a2b5ca465db424dcd99bfeaacbfc 100644
--- a/daemon/src/media/video/video_sender.cpp
+++ b/daemon/src/media/video/video_sender.cpp
@@ -47,7 +47,7 @@ using std::string;
 VideoSender::VideoSender(std::map<string, string> args,
                          SocketPair& socketPair) :
     muxContext_(socketPair.createIOContext()),
-    videoEncoder_(new VideoEncoder)
+    videoEncoder_(new MediaEncoder)
 {
     const char *enc_name = args["codec"].c_str();
     const char *dest = args["destination"].c_str();
diff --git a/daemon/src/media/video/video_sender.h b/daemon/src/media/video/video_sender.h
index f594a1733e5dd619c526fa25efca9a26e63bb6a1..78aea60cd51631d32d34b8a156ab3d3bcebd683b 100644
--- a/daemon/src/media/video/video_sender.h
+++ b/daemon/src/media/video/video_sender.h
@@ -33,7 +33,8 @@
 #define __VIDEO_SENDER_H__
 
 #include "noncopyable.h"
-#include "video_encoder.h"
+#include "media_encoder.h"
+#include "media_io_handle.h"
 #include "video_mixer.h"
 
 #include <map>
@@ -41,9 +42,11 @@
 #include <memory>
 #include <atomic>
 
-namespace ring { namespace video {
+namespace ring {
+    class SocketPair;
+}
 
-class SocketPair;
+namespace ring { namespace video {
 
 class VideoSender : public VideoFramePassiveReader
 {
@@ -64,8 +67,8 @@ private:
     void encodeAndSendVideo(VideoFrame&);
 
     // encoder MUST be deleted before muxContext
-    std::unique_ptr<VideoIOHandle> muxContext_ = nullptr;
-    std::unique_ptr<VideoEncoder> videoEncoder_ = nullptr;
+    std::unique_ptr<MediaIOHandle> muxContext_ = nullptr;
+    std::unique_ptr<MediaEncoder> videoEncoder_ = nullptr;
 
     std::atomic<int> forceKeyFrame_ = { 0 };
     int64_t frameNumber_ = 0;