diff --git a/src/connectivity/sip_utils.h b/src/connectivity/sip_utils.h
index d0013e6b9c32f0f848a35c759766396796f84333..85b8c20de8b8823c41d51efc7b3acf2ba0cde347 100644
--- a/src/connectivity/sip_utils.h
+++ b/src/connectivity/sip_utils.h
@@ -20,8 +20,8 @@
 
 #pragma once
 
+#include "connectivity/ip_utils.h"
 #include "media/media_codec.h"
-#include "media/audio/audiobuffer.h"
 #include "noncopyable.h"
 
 #include <utility>
diff --git a/src/media/audio/audio-processing/audio_processor.h b/src/media/audio/audio-processing/audio_processor.h
index 97cbedb2d041ba58560b71d316aeb24cfb645f40..f6cac50e07e1a29625daab5f3c2a84a7370d435d 100644
--- a/src/media/audio/audio-processing/audio_processor.h
+++ b/src/media/audio/audio-processing/audio_processor.h
@@ -21,7 +21,7 @@
 #include "noncopyable.h"
 #include "media/audio/audio_frame_resizer.h"
 #include "media/audio/resampler.h"
-#include "media/audio/audiobuffer.h"
+#include "media/audio/audio_format.h"
 #include "media/libav_deps.h"
 #include "logger.h"
 
diff --git a/src/media/audio/audio-processing/speex.h b/src/media/audio/audio-processing/speex.h
index 122b35423c96c5f903cb0c9e1e7fb787b84ed14c..018dbb9e27f7bc44132f73d3b5b82ce75397cad4 100644
--- a/src/media/audio/audio-processing/speex.h
+++ b/src/media/audio/audio-processing/speex.h
@@ -21,6 +21,7 @@
 #pragma once
 
 #include "audio_processor.h"
+#include "media/audio/audiobuffer.h"
 
 // typedef speex C structs
 extern "C" {
diff --git a/src/media/audio/audio_format.h b/src/media/audio/audio_format.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c1b9e1c9355b1f73ff96f0691c111cfa65002f7
--- /dev/null
+++ b/src/media/audio/audio_format.h
@@ -0,0 +1,83 @@
+/*
+ *  Copyright (C) 2004-2023 Savoir-faire Linux Inc.
+ *
+ *  Author: Adrien Beraud <adrien.beraud@wisdomvibes.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#pragma once
+
+extern "C" {
+#include <libavutil/samplefmt.h>
+}
+
+#include <fmt/core.h>
+#include <sstream>
+#include <string>
+#include <cstddef> // for size_t
+
+namespace jami {
+
+/**
+ * Structure to hold sample rate and channel number associated with audio data.
+ */
+struct AudioFormat
+{
+    unsigned sample_rate;
+    unsigned nb_channels;
+    AVSampleFormat sampleFormat;
+
+    constexpr AudioFormat(unsigned sr, unsigned c, AVSampleFormat f = AV_SAMPLE_FMT_S16)
+        : sample_rate(sr)
+        , nb_channels(c)
+        , sampleFormat(f)
+    {}
+
+    inline bool operator==(const AudioFormat& b) const
+    {
+        return ((b.sample_rate == sample_rate) && (b.nb_channels == nb_channels)
+                && (b.sampleFormat == sampleFormat));
+    }
+
+    inline bool operator!=(const AudioFormat& b) const { return !(*this == b); }
+
+    inline std::string toString() const
+    {
+        return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate);
+    }
+
+    /**
+     * Returns bytes necessary to hold one frame of audio data.
+     */
+    inline size_t getBytesPerFrame() const { return av_get_bytes_per_sample(sampleFormat) * nb_channels; }
+
+    /**
+     * Bytes per second (default), or bytes necessary
+     * to hold delay_ms milliseconds of audio data.
+     */
+    inline size_t getBandwidth(unsigned delay_ms = 1000) const
+    {
+        return (getBytesPerFrame() * sample_rate * delay_ms) / 1000;
+    }
+
+    static const constexpr unsigned DEFAULT_SAMPLE_RATE = 48000;
+    static const constexpr AudioFormat DEFAULT() { return AudioFormat {16000, 1}; }
+    static const constexpr AudioFormat NONE() { return AudioFormat {0, 0}; }
+    static const constexpr AudioFormat MONO() { return AudioFormat {DEFAULT_SAMPLE_RATE, 1}; }
+    static const constexpr AudioFormat STEREO() { return AudioFormat {DEFAULT_SAMPLE_RATE, 2}; }
+};
+
+} // namespace jami
diff --git a/src/media/audio/audio_frame_resizer.cpp b/src/media/audio/audio_frame_resizer.cpp
index 0fc4bad9373844bc363c7bcddf840fbdea41518a..2275aa8f97b0c9b74b9d063eaae310dee9ef41e9 100644
--- a/src/media/audio/audio_frame_resizer.cpp
+++ b/src/media/audio/audio_frame_resizer.cpp
@@ -97,8 +97,7 @@ AudioFrameResizer::enqueue(std::shared_ptr<AudioFrame>&& frame)
     auto f = frame->pointer();
     AudioFormat format(f->sample_rate, f->ch_layout.nb_channels, (AVSampleFormat) f->format);
     if (format != format_) {
-        JAMI_ERR() << "Expected " << format_ << ", but got "
-                   << AudioFormat(f->sample_rate, f->ch_layout.nb_channels, (AVSampleFormat) f->format);
+        JAMI_WARNING("Expected {} but got {}", format_.toString(), format.toString());
         setFormat(format, frameSize_);
     }
 
diff --git a/src/media/audio/audio_frame_resizer.h b/src/media/audio/audio_frame_resizer.h
index b4fecdccef299645578fc87f4ab5506614176601..daddbc36300192aa76d8f601f3838f379fc44f3e 100644
--- a/src/media/audio/audio_frame_resizer.h
+++ b/src/media/audio/audio_frame_resizer.h
@@ -20,7 +20,7 @@
 
 #pragma once
 
-#include "audiobuffer.h"
+#include "audio_format.h"
 #include "media/media_buffer.h"
 #include "noncopyable.h"
 
diff --git a/src/media/audio/audio_input.h b/src/media/audio/audio_input.h
index 035422659b5fd38e938007538ed2d55bf5c96aeb..e9d1d8126579977863e755ebc07dacfcb0e5f160 100644
--- a/src/media/audio/audio_input.h
+++ b/src/media/audio/audio_input.h
@@ -26,7 +26,7 @@
 #include <mutex>
 #include <chrono>
 
-#include "media/audio/audiobuffer.h"
+#include "audio_format.h"
 #include "media/media_device.h"
 #include "media/media_buffer.h"
 #include "observer.h"
diff --git a/src/media/audio/audio_receive_thread.h b/src/media/audio/audio_receive_thread.h
index 02486f56c57717d4847034436ec07190734bb918..4f1f3400e3cd53ffd2dfd22de30c628ecb3db0c4 100644
--- a/src/media/audio/audio_receive_thread.h
+++ b/src/media/audio/audio_receive_thread.h
@@ -20,7 +20,7 @@
  */
 #pragma once
 
-#include "audiobuffer.h"
+#include "audio_format.h"
 #include "media/media_buffer.h"
 #include "media/media_device.h"
 #include "media/media_codec.h"
diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp
index d835e1e8a0d9cbd0a0af4ea53c94a640cba575a8..7a18ed3320cb3c2c52c18749a577ab42638312b7 100644
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@@ -37,7 +37,6 @@
 #include "media_device.h"
 
 #include "audio/audio_input.h"
-#include "audio/audiobuffer.h"
 #include "audio/ringbufferpool.h"
 #include "audio/resampler.h"
 #include "client/videomanager.h"
diff --git a/src/media/audio/audio_rtp_session.h b/src/media/audio/audio_rtp_session.h
index 0f94d1193df43ab5e0f156b087c7ba6cd250691c..e517b9968ed19bcd1a28f2011532d9a5e174e381 100644
--- a/src/media/audio/audio_rtp_session.h
+++ b/src/media/audio/audio_rtp_session.h
@@ -21,7 +21,6 @@
 
 #pragma once
 
-#include "audiobuffer.h"
 #include "media/media_device.h"
 #include "media/rtp_session.h"
 #include "media/media_stream.h"
diff --git a/src/media/audio/audio_sender.cpp b/src/media/audio/audio_sender.cpp
index a645c95180509b7f52dfc41f0f3f33083c6c750a..09173ddadd14fa9e49b487ed7cb265f267a2afd5 100644
--- a/src/media/audio/audio_sender.cpp
+++ b/src/media/audio/audio_sender.cpp
@@ -49,8 +49,6 @@ AudioSender::~AudioSender()
 {
     audioEncoder_.reset();
     muxContext_.reset();
-    micData_.clear();
-    resampledData_.clear();
 }
 
 bool
diff --git a/src/media/audio/audio_sender.h b/src/media/audio/audio_sender.h
index 858eba8c3c87cf5a932e482da6b0a28883094880..edfd6f19a64070519d11b20e56a48e44f954b8e6 100644
--- a/src/media/audio/audio_sender.h
+++ b/src/media/audio/audio_sender.h
@@ -20,7 +20,6 @@
  */
 #pragma once
 
-#include "audiobuffer.h"
 #include "media_buffer.h"
 #include "media_codec.h"
 #include "noncopyable.h"
@@ -65,8 +64,6 @@ private:
 
     uint64_t sent_samples = 0;
 
-    AudioBuffer micData_;
-    AudioBuffer resampledData_;
     const uint16_t seqVal_;
     uint16_t mtu_;
 
diff --git a/src/media/audio/audiobuffer.h b/src/media/audio/audiobuffer.h
index 9a90c067e5babc6e77a172e86292364fbe4304f6..8d76b2940bc375f50272e9230b5a047ce51871e5 100644
--- a/src/media/audio/audiobuffer.h
+++ b/src/media/audio/audiobuffer.h
@@ -25,12 +25,12 @@
 #endif
 
 extern "C" {
-#include <libavutil/samplefmt.h>
 struct AVFrame;
 }
 
 #include "ring_types.h"
 #include "media/media_buffer.h"
+#include "audio_format.h"
 
 #include <fmt/core.h>
 
@@ -42,56 +42,8 @@ struct AVFrame;
 
 namespace jami {
 
-/**
- * Structure to hold sample rate and channel number associated with audio data.
- */
-struct AudioFormat
-{
-    unsigned sample_rate;
-    unsigned nb_channels;
-    AVSampleFormat sampleFormat;
-
-    constexpr AudioFormat(unsigned sr, unsigned c, AVSampleFormat f = AV_SAMPLE_FMT_S16)
-        : sample_rate(sr)
-        , nb_channels(c)
-        , sampleFormat(f)
-    {}
-
-    inline bool operator==(const AudioFormat& b) const
-    {
-        return ((b.sample_rate == sample_rate) && (b.nb_channels == nb_channels)
-                && (b.sampleFormat == sampleFormat));
-    }
-
-    inline bool operator!=(const AudioFormat& b) const { return !(*this == b); }
-
-    inline std::string toString() const
-    {
-        return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate);
-    }
-
-    /**
-     * Returns bytes necessary to hold one frame of audio data.
-     */
-    inline size_t getBytesPerFrame() const { return av_get_bytes_per_sample(sampleFormat) * nb_channels; }
-
-    /**
-     * Bytes per second (default), or bytes necessary
-     * to hold delay_ms milliseconds of audio data.
-     */
-    inline size_t getBandwidth(unsigned delay_ms = 1000) const
-    {
-        return (getBytesPerFrame() * sample_rate * delay_ms) / 1000;
-    }
-
-    static const constexpr unsigned DEFAULT_SAMPLE_RATE = 48000;
-    static const constexpr AudioFormat DEFAULT() { return AudioFormat {16000, 1}; }
-    static const constexpr AudioFormat NONE() { return AudioFormat {0, 0}; }
-    static const constexpr AudioFormat MONO() { return AudioFormat {DEFAULT_SAMPLE_RATE, 1}; }
-    static const constexpr AudioFormat STEREO() { return AudioFormat {DEFAULT_SAMPLE_RATE, 2}; }
-};
-
-std::ostream& operator<<(std::ostream& stream, const AudioFormat& f);
+[[deprecated("Use AudioFrame and AVSampleFormat")]]
+typedef int16_t AudioSample;
 
 class
 [[deprecated("Use AudioFrame instead")]] AudioBuffer
diff --git a/src/media/audio/audiolayer.h b/src/media/audio/audiolayer.h
index 8a1bc3ad15652d80b1a9657b07a4251c8ebb9002..748bf403f4c442dcc87a556541756098862ed117 100644
--- a/src/media/audio/audiolayer.h
+++ b/src/media/audio/audiolayer.h
@@ -26,6 +26,7 @@
 #include "noncopyable.h"
 #include "audio_frame_resizer.h"
 #include "audio-processing/audio_processor.h"
+#include "audiobuffer.h"
 
 #include <chrono>
 #include <mutex>
diff --git a/src/media/audio/resampler.cpp b/src/media/audio/resampler.cpp
index 211fc135fa56663325e4db703a3eaa42736bcbe9..6f502d6c9f9ec4ef7353c7c6c98bab8c2bf0970a 100644
--- a/src/media/audio/resampler.cpp
+++ b/src/media/audio/resampler.cpp
@@ -23,6 +23,7 @@
 #include "libav_deps.h"
 #include "logger.h"
 #include "resampler.h"
+#include "audiobuffer.h"
 
 extern "C" {
 #include <libswresample/swresample.h>
diff --git a/src/media/audio/resampler.h b/src/media/audio/resampler.h
index bb943e225242a600bfec3b7b9f909547ce726b96..141e0759b06f1bbd6ddbfc4c119f977f8a526e9a 100644
--- a/src/media/audio/resampler.h
+++ b/src/media/audio/resampler.h
@@ -22,7 +22,7 @@
 
 #pragma once
 
-#include "audiobuffer.h"
+#include "audio_format.h"
 #include "media/media_buffer.h"
 #include "noncopyable.h"
 
@@ -33,6 +33,8 @@ struct SwrContext;
 
 namespace jami {
 
+class AudioBuffer;
+
 /**
  * @brief Wrapper class for libswresample
  */
diff --git a/src/media/audio/ringbuffer.h b/src/media/audio/ringbuffer.h
index 99c2610140a0d993e04dde464505f9d33cb249f7..582e4c8bfcc561289b26dc8c935851c839382543 100644
--- a/src/media/audio/ringbuffer.h
+++ b/src/media/audio/ringbuffer.h
@@ -23,7 +23,7 @@
 
 #pragma once
 
-#include "audiobuffer.h"
+#include "audio_format.h"
 #include "noncopyable.h"
 #include "audio_frame_resizer.h"
 #include "resampler.h"
diff --git a/src/media/audio/ringbufferpool.h b/src/media/audio/ringbufferpool.h
index ba74464e5efd774fc232ffde171c09d5c9bde610..76c64554ff468c335a3411f9a7036c16e3d75929 100644
--- a/src/media/audio/ringbufferpool.h
+++ b/src/media/audio/ringbufferpool.h
@@ -21,7 +21,8 @@
 
 #pragma once
 
-#include "audiobuffer.h"
+#include "audio_format.h"
+#include "media_buffer.h"
 #include "noncopyable.h"
 
 #include <map>
diff --git a/src/media/media_attribute.h b/src/media/media_attribute.h
index 5053321fea4e137d86aa077efbd709ffeac30eaa..8e52b9278c73c69d0b987bffa8cae7f00f470ca3 100644
--- a/src/media/media_attribute.h
+++ b/src/media/media_attribute.h
@@ -25,6 +25,7 @@
 #endif
 
 #include "media/media_codec.h"
+#include "jami.h"
 
 namespace jami {
 
diff --git a/src/media/media_buffer.h b/src/media/media_buffer.h
index d86ebe86301cc56dbb3aeff1d5f3ad34953aebff..9f5409f7fc4f5df13e9835346bf0c89c7b5a3d02 100644
--- a/src/media/media_buffer.h
+++ b/src/media/media_buffer.h
@@ -36,9 +36,6 @@ using MediaFrame = libjami::MediaFrame;
 using AudioFrame = libjami::AudioFrame;
 using MediaObserver = std::function<void(std::shared_ptr<MediaFrame>&&)>;
 
-[[deprecated("Use AudioFrame and AVSampleFormat")]]
-typedef int16_t AudioSample;
-
 #ifdef ENABLE_VIDEO
 
 using VideoFrame = libjami::VideoFrame;
diff --git a/src/media/media_codec.h b/src/media/media_codec.h
index a2ce4b4b55dba918abca2723bcfdabefd03a020e..76637bebe3aeb484d36afd8dce4114fd776fb459 100644
--- a/src/media/media_codec.h
+++ b/src/media/media_codec.h
@@ -25,12 +25,14 @@
 #include <config.h>
 #endif
 
-#include "audio/audiobuffer.h" // for AudioFormat
+#include "audio/audio_format.h"
+
 #include <dhtnet/ip_utils.h>
 #include <cctype>
 #include <string>
 #include <vector>
 #include <map>
+#include <memory>
 #include <iostream>
 #include <unistd.h>
 
diff --git a/src/media/media_decoder.cpp b/src/media/media_decoder.cpp
index f86f257905cf40ef404fa29de3f6897e3684694f..9f3f8d35de2c07587af4f3b04cea2dc83ae57f89 100644
--- a/src/media/media_decoder.cpp
+++ b/src/media/media_decoder.cpp
@@ -25,7 +25,6 @@
 #include "media_device.h"
 #include "media_buffer.h"
 #include "media_io_handle.h"
-#include "audio/audiobuffer.h"
 #include "audio/ringbuffer.h"
 #include "audio/resampler.h"
 #include "decoder_finder.h"
diff --git a/src/media/media_decoder.h b/src/media/media_decoder.h
index 8384ff89d0ace10c8ef2da07d1d2672792a04e13..dd8e5418b961a8a080060fce9260de02ac965500 100644
--- a/src/media/media_decoder.h
+++ b/src/media/media_decoder.h
@@ -36,10 +36,11 @@
 #endif
 #include "logger.h"
 
-#include "audio/audiobuffer.h"
+#include "audio/audio_format.h"
 
 #include "media_device.h"
 #include "media_stream.h"
+#include "media_buffer.h"
 #include "noncopyable.h"
 
 #include <map>
diff --git a/src/media/media_filter.cpp b/src/media/media_filter.cpp
index 66f18163a0d40219c051e0e8465816840d71cd9a..bd4cf9c2c5d7d9d231a1e0821d121e22b6920d67 100644
--- a/src/media/media_filter.cpp
+++ b/src/media/media_filter.cpp
@@ -21,6 +21,7 @@
 #include "libav_deps.h" // MUST BE INCLUDED FIRST
 #include "logger.h"
 #include "media_filter.h"
+#include "media_buffer.h"
 
 extern "C" {
 #include <libavfilter/buffersink.h>
diff --git a/src/media/media_filter.h b/src/media/media_filter.h
index 2c2f0dff6bcaff26e58e419b3244c4261939adf0..9721430c050a50b2e85515f761b2fe7b845ff8ab 100644
--- a/src/media/media_filter.h
+++ b/src/media/media_filter.h
@@ -26,6 +26,7 @@
 
 #include "media_stream.h"
 #include "noncopyable.h"
+#include "video/video_base.h"
 
 #include <map>
 #include <string>
diff --git a/src/media/media_stream.h b/src/media/media_stream.h
index 34c0ef7578ea62aa2d060da48a5de1bf561b25c0..8162554b522e63f401424f2e0937e5b7a0af387b 100644
--- a/src/media/media_stream.h
+++ b/src/media/media_stream.h
@@ -23,7 +23,7 @@
 #include "libav_deps.h"
 #include "logger.h"
 #include "rational.h"
-#include "audio/audiobuffer.h"
+#include "audio/audio_format.h"
 
 #include <string>