diff --git a/src/media/audio/audio-processing/speex.cpp b/src/media/audio/audio-processing/speex.cpp
index cc3ce74462b9ec0fa9b9e7b561154329a4e79a42..f875eedeeaed83c81374a25f0450fdad6bc42921 100644
--- a/src/media/audio/audio-processing/speex.cpp
+++ b/src/media/audio/audio-processing/speex.cpp
@@ -34,19 +34,25 @@ extern "C" {
 
 namespace jami {
 
+inline AudioFormat
+audioFormatToSampleFormat(AudioFormat format)
+{
+    return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_S16};
+}
+
 SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
-    : AudioProcessor(format, frameSize)
+    : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_S16), frameSize)
     , echoState(speex_echo_state_init_mc((int) frameSize,
                                          (int) frameSize * 16,
-                                         (int) format.nb_channels,
-                                         (int) format.nb_channels),
+                                         (int) format_.nb_channels,
+                                         (int) format_.nb_channels),
                 &speex_echo_state_destroy)
-    , iProcBuffer(frameSize_, format)
+    , procBuffer(std::make_unique<AudioFrame>(format.withSampleFormat(AV_SAMPLE_FMT_S16P), frameSize_))
 {
     JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
              frameSize,
              frameDurationMs_,
-             format.nb_channels);
+             format_.nb_channels);
     // set up speex echo state
     speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
 
@@ -66,10 +72,10 @@ SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
 
     // set up speex preprocess states, one for each channel
     // note that they are not enabled here, but rather in the enable* functions
-    for (unsigned int i = 0; i < format.nb_channels; i++) {
+    for (unsigned int i = 0; i < format_.nb_channels; i++) {
         auto channelPreprocessorState
             = SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
-                                                                  (int) format.sample_rate),
+                                                                  (int) format_.sample_rate),
                                       &speex_preprocess_state_destroy);
 
         // set max noise suppression level
@@ -184,11 +190,11 @@ SpeexAudioProcessor::getProcessed()
         return {};
     }
 
-    auto processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
-
+    std::shared_ptr<AudioFrame> processed;
     if (shouldAEC) {
         // we want to echo cancel
         // multichannel, output into processed
+        processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
         speex_echo_cancellation(echoState.get(),
                                 (int16_t*) record->pointer()->data[0],
                                 (int16_t*) playback->pointer()->data[0],
@@ -198,16 +204,7 @@ SpeexAudioProcessor::getProcessed()
         processed = record;
     }
 
-    // deinterleave processed into channels
-    std::vector<int16_t*> procData {format_.nb_channels};
-    iProcBuffer.deinterleave((const AudioSample*) processed->pointer()->data[0],
-                             frameSize_,
-                             format_.nb_channels);
-
-    // point procData to correct channels
-    for (unsigned int channel = 0; channel < format_.nb_channels; channel++) {
-        procData[channel] = iProcBuffer.getChannel(channel)->data();
-    }
+    deinterleaveResampler.resample(processed->pointer(), procBuffer->pointer());
 
     // overall voice activity
     bool overallVad = false;
@@ -218,7 +215,7 @@ SpeexAudioProcessor::getProcessed()
     int channel = 0;
     for (auto& channelPreprocessorState : preprocessorStates) {
         // preprocesses in place, returns voice activity boolean
-        channelVad = speex_preprocess_run(channelPreprocessorState.get(), procData[channel]);
+        channelVad = speex_preprocess_run(channelPreprocessorState.get(), (int16_t*)procBuffer->pointer()->data[channel]);
 
         // boolean OR
         overallVad |= channelVad;
@@ -226,12 +223,10 @@ SpeexAudioProcessor::getProcessed()
         channel += 1;
     }
 
-    // reinterleave into processed
-    iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
+    interleaveResampler.resample(procBuffer->pointer(), processed->pointer());
 
     // add stabilized voice activity to the AudioFrame
     processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
-
     return processed;
 }
 
diff --git a/src/media/audio/audio-processing/speex.h b/src/media/audio/audio-processing/speex.h
index 018dbb9e27f7bc44132f73d3b5b82ce75397cad4..5a6323e74b02c900405b8bb1da2f4b28344cbfdd 100644
--- a/src/media/audio/audio-processing/speex.h
+++ b/src/media/audio/audio-processing/speex.h
@@ -21,7 +21,6 @@
 #pragma once
 
 #include "audio_processor.h"
-#include "media/audio/audiobuffer.h"
 
 // typedef speex C structs
 extern "C" {
@@ -57,7 +56,9 @@ private:
     // one for each channel
     std::vector<SpeexPreprocessStatePtr> preprocessorStates;
 
-    AudioBuffer iProcBuffer;
+    std::unique_ptr<AudioFrame> procBuffer {};
+    Resampler deinterleaveResampler;
+    Resampler interleaveResampler;
 
     // if we should do echo cancellation
     bool shouldAEC {false};
diff --git a/src/media/audio/audio-processing/webrtc.cpp b/src/media/audio/audio-processing/webrtc.cpp
index ea6c48fadd5d2ff264c970e7c1f129634dd87833..cb768a9fbdac1e5b113068388ca465e177b93477 100644
--- a/src/media/audio/audio-processing/webrtc.cpp
+++ b/src/media/audio/audio-processing/webrtc.cpp
@@ -23,12 +23,6 @@
 
 namespace jami {
 
-inline AudioFormat
-audioFormatToFloatPlanar(AudioFormat format)
-{
-    return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_FLTP};
-}
-
 inline size_t
 webrtcFrameSize(AudioFormat format)
 {
@@ -38,19 +32,19 @@ webrtcFrameSize(AudioFormat format)
 constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError;
 
 WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */)
-    : AudioProcessor(audioFormatToFloatPlanar(format), webrtcFrameSize(format))
+    : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_FLTP), webrtcFrameSize(format))
 {
     JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}",
              frameSize_,
              frameDurationMs_,
-             format.nb_channels);
+             format_.nb_channels);
     webrtc::Config config;
     config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
     config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
 
     apm.reset(webrtc::AudioProcessing::Create(config));
 
-    webrtc::StreamConfig streamConfig((int) format.sample_rate, (int) format.nb_channels);
+    webrtc::StreamConfig streamConfig((int) format_.sample_rate, (int) format_.nb_channels);
     webrtc::ProcessingConfig pconfig = {
         streamConfig, /* input stream */
         streamConfig, /* output stream */
diff --git a/src/media/audio/audio_format.h b/src/media/audio/audio_format.h
index 8c1b9e1c9355b1f73ff96f0691c111cfa65002f7..fb5253877ee0274337ac257d647caee1fed1a730 100644
--- a/src/media/audio/audio_format.h
+++ b/src/media/audio/audio_format.h
@@ -59,6 +59,12 @@ struct AudioFormat
         return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate);
     }
 
+    inline AudioFormat withSampleFormat(AVSampleFormat format)
+    {
+        return {sample_rate, nb_channels, format};
+    }
+
+
     /**
      * Returns bytes necessary to hold one frame of audio data.
      */